diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index 3e78bc61b..67f557bad 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -53,6 +53,9 @@ jobs: - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py + - name: Regenerate per-skill docs pages + catalogs + run: python3 website/scripts/generate-skill-docs.py + - name: Build skills index (if not already present) env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docs-site-checks.yml b/.github/workflows/docs-site-checks.yml index 2f985122c..80fe9ea9d 100644 --- a/.github/workflows/docs-site-checks.yml +++ b/.github/workflows/docs-site-checks.yml @@ -36,6 +36,9 @@ jobs: - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py + - name: Regenerate per-skill docs pages + catalogs + run: python3 website/scripts/generate-skill-docs.py + - name: Lint docs diagrams run: npm run lint:diagrams working-directory: website diff --git a/AGENTS.md b/AGENTS.md index 0f5ce15f2..05a6742d4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent ## Development Environment ```bash -source venv/bin/activate # ALWAYS activate before running Python +# Prefer .venv; fall back to venv if that's what your checkout has. +source .venv/bin/activate # or: source venv/bin/activate ``` +`scripts/run_tests.sh` probes `.venv` first, then `venv`, then +`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the +main checkout). + ## Project Structure +File counts shift constantly — don't treat the tree below as exhaustive. +The canonical source is the filesystem. The notes call out the load-bearing +entry points you'll actually edit. + ``` hermes-agent/ -├── run_agent.py # AIAgent class — core conversation loop +├── run_agent.py # AIAgent class — core conversation loop (~12k LOC) ├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call() ├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list -├── cli.py # HermesCLI class — interactive CLI orchestrator +├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC) ├── hermes_state.py # SessionDB — SQLite session store (FTS5 search) -├── agent/ # Agent internals -│ ├── prompt_builder.py # System prompt assembly -│ ├── context_compressor.py # Auto context compression -│ ├── prompt_caching.py # Anthropic prompt caching -│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization) -│ ├── model_metadata.py # Model context lengths, token estimation -│ ├── models_dev.py # models.dev registry integration (provider-aware context) -│ ├── display.py # KawaiiSpinner, tool preview formatting -│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway) -│ └── trajectory.py # Trajectory saving helpers -├── hermes_cli/ # CLI subcommands and setup -│ ├── main.py # Entry point — all `hermes` subcommands -│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration -│ ├── commands.py # Slash command definitions + SlashCommandCompleter -│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval) -│ ├── setup.py # Interactive setup wizard -│ ├── skin_engine.py # Skin/theme engine — CLI visual customization -│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform -│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform -│ ├── skills_hub.py # `/skills` slash command (search, browse, install) -│ ├── models.py # Model catalog, provider model lists -│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway) -│ └── auth.py # Provider credential resolution -├── tools/ # Tool implementations (one file per tool) -│ ├── registry.py # Central tool registry (schemas, handlers, dispatch) -│ ├── approval.py # Dangerous command detection -│ ├── terminal_tool.py # Terminal orchestration -│ ├── process_registry.py # Background process management -│ ├── file_tools.py # File read/write/search/patch -│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl) -│ ├── browser_tool.py # Browserbase browser automation -│ ├── code_execution_tool.py # execute_code sandbox -│ ├── delegate_tool.py # Subagent delegation -│ ├── mcp_tool.py # MCP client (~1050 lines) +├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths +├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware) +├── batch_runner.py # Parallel batch processing +├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.) +├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine +├── tools/ # Tool implementations — auto-discovered via tools/registry.py │ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity) -├── gateway/ # Messaging platform gateway -│ ├── run.py # Main loop, slash commands, message dispatch -│ ├── session.py # SessionStore — conversation persistence -│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot +├── gateway/ # Messaging gateway — run.py + session.py + platforms/ +│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp, +│ │ # homeassistant, signal, matrix, mattermost, email, sms, +│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles, +│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md. +│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...) +├── plugins/ # Plugin system (see "Plugins" section below) +│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...) +│ ├── context_engine/ # Context-engine plugins +│ └── / # Dashboard, image-gen, disk-cleanup, examples, ... +├── optional-skills/ # Heavier/niche skills shipped but NOT active by default +├── skills/ # Built-in skills bundled with the repo ├── ui-tui/ # Ink (React) terminal UI — `hermes --tui` -│ ├── src/entry.tsx # TTY gate + render() -│ ├── src/app.tsx # Main state machine and UI -│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge -│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks) -│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.) -│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory -│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages) +│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib ├── tui_gateway/ # Python JSON-RPC backend for the TUI -│ ├── entry.py # stdio entrypoint -│ ├── server.py # RPC handlers and session logic -│ ├── render.py # Optional rich/ANSI bridge -│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands ├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration) -├── cron/ # Scheduler (jobs.py, scheduler.py) +├── cron/ # Scheduler — jobs.py, scheduler.py ├── environments/ # RL training environments (Atropos) -├── tests/ # Pytest suite (~3000 tests) -└── batch_runner.py # Parallel batch processing +├── scripts/ # run_tests.sh, release.py, auxiliary scripts +├── website/ # Docusaurus docs site +└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026) ``` -**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys) +**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only). +**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+), +`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`. +Browse with `hermes logs [--follow] [--level ...] [--session ...]`. ## File Dependency Chain @@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/ ## AIAgent Class (run_agent.py) +The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks, +session context, budget, credential pool, etc.). The signature below is the +minimum subset you'll usually touch — read `run_agent.py` for the full list. + ```python class AIAgent: def __init__(self, - model: str = "anthropic/claude-opus-4.6", - max_iterations: int = 90, + base_url: str = None, + api_key: str = None, + provider: str = None, + api_mode: str = None, # "chat_completions" | "codex_responses" | ... + model: str = "", # empty → resolved from config/provider later + max_iterations: int = 90, # tool-calling iterations (shared with subagents) enabled_toolsets: list = None, disabled_toolsets: list = None, quiet_mode: bool = False, save_trajectories: bool = False, - platform: str = None, # "cli", "telegram", etc. + platform: str = None, # "cli", "telegram", etc. session_id: str = None, skip_context_files: bool = False, skip_memory: bool = False, - # ... plus provider, api_mode, callbacks, routing params + credential_pool=None, + # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model, + # checkpoints config, prefill_messages, service_tier, reasoning_config, etc. ): ... def chat(self, message: str) -> str: @@ -120,10 +113,13 @@ class AIAgent: ### Agent Loop -The core loop is inside `run_conversation()` — entirely synchronous: +The core loop is inside `run_conversation()` — entirely synchronous, with +interrupt checks, budget tracking, and a one-turn grace call: ```python -while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0: +while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \ + or self._budget_grace_call: + if self._interrupt_requested: break response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas) if response.tool_calls: for tool_call in response.tool_calls: @@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining > return response.content ``` -Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`. +Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. +Reasoning content is stored in `assistant_msg["reasoning"]`. --- @@ -243,6 +240,19 @@ npm run fmt # prettier npm test # vitest ``` +### TUI in the Dashboard (`hermes dashboard` → `/chat`) + +The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`. + +- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths. +- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade). +- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not). +- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:;]` intercepted on the server and applied with `TIOCSWINSZ`. + +**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead. + +**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired. + --- ## Adding New Tools @@ -280,7 +290,7 @@ The registry handles schema collection, dispatch, availability checking, and err **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state. -**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern. +**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern. --- @@ -288,9 +298,13 @@ The registry handles schema collection, dispatch, availability checking, and err ### config.yaml options: 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py` -2. Bump `_config_version` (currently 5) to trigger migration for existing users +2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`) + ONLY if you need to actively migrate/transform existing user config + (renaming keys, changing structure). Adding a new key to an existing + section is handled automatically by the deep-merge and does NOT require + a version bump. -### .env variables: +### .env variables (SECRETS ONLY — API keys, tokens, passwords): 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata: ```python "NEW_API_KEY": { @@ -302,13 +316,29 @@ The registry handles schema collection, dispatch, availability checking, and err }, ``` -### Config loaders (two separate systems): +Non-secret settings (timeouts, thresholds, feature flags, paths, display +preferences) belong in `config.yaml`, not `.env`. If internal code needs an +env var mirror for backward compatibility, bridge it from `config.yaml` to +the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`). + +### Config loaders (three paths — know which one you're in): | Loader | Used by | Location | |--------|---------|----------| -| `load_cli_config()` | CLI mode | `cli.py` | -| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` | -| Direct YAML load | Gateway | `gateway/run.py` | +| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML | +| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML | +| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw | + +If you add a new key and the CLI sees it but the gateway doesn't (or vice +versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage. + +### Working directory: +- **CLI** — uses the process's current directory (`os.getcwd()`). +- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this + to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been + removed** — the config loader prints a deprecation warning if it's set in + `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is + `terminal.cwd` in `config.yaml`. --- @@ -401,7 +431,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml. --- +## Plugins + +Hermes has two plugin surfaces. Both live under `plugins/` in the repo so +repo-shipped plugins can be discovered alongside user-installed ones in +`~/.hermes/plugins/` and pip-installed entry points. + +### General plugins (`hermes_cli/plugins.py` + `plugins//`) + +`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`, +and pip entry points. Each plugin exposes a `register(ctx)` function that +can: + +- Register Python-callback lifecycle hooks: + `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, + `on_session_start`, `on_session_end` +- Register new tools via `ctx.register_tool(...)` +- Register CLI subcommands via `ctx.register_cli_command(...)` — the + plugin's argparse tree is wired into `hermes` at startup so + `hermes ` works with no change to `main.py` + +Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py` +(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs +as a side effect of importing `model_tools.py`. Code paths that read plugin +state without importing `model_tools.py` first must call `discover_plugins()` +explicitly (it's idempotent). + +### Memory-provider plugins (`plugins/memory//`) + +Separate discovery system for pluggable memory backends. Current built-in +providers include **honcho, mem0, supermemory, byterover, hindsight, +holographic, openviking, retaindb**. + +Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`) +and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include +`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional +`post_setup(hermes_home, config)` for setup-wizard integration. + +**CLI commands via `plugins/memory//cli.py`:** if a memory plugin +defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds +it at argparse setup time and wires it into `hermes `. The +framework only exposes CLI commands for the **currently active** memory +provider (read from `memory.provider` in config.yaml), so disabled +providers don't clutter `hermes --help`. + +**Rule (Teknium, May 2026):** plugins MUST NOT modify core files +(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.). +If a plugin needs a capability the framework doesn't expose, expand the +generic plugin surface (new hook, new ctx method) — never hardcode +plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded +honcho argparse from `main.py` for exactly this reason. + +### Dashboard / context-engine / image-gen plugin directories + +`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`, +etc. follow the same pattern (ABC + orchestrator + per-plugin directory). +Context engines plug into `agent/context_engine.py`; image-gen providers +into `agent/image_gen_provider.py`. + +--- + +## Skills + +Two parallel surfaces: + +- **`skills/`** — built-in skills shipped and loadable by default. + Organized by category directories (e.g. `skills/github/`, `skills/mlops/`). +- **`optional-skills/`** — heavier or niche skills shipped with the repo but + NOT active by default. Installed explicitly via + `hermes skills install official//`. Adapter lives in + `tools/skills_hub.py` (`OptionalSkillSource`). Categories include + `autonomous-ai-agents`, `blockchain`, `communication`, `creative`, + `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`, + `research`, `security`, `web-development`. + +When reviewing skill PRs, check which directory they target — heavy-dep or +niche skills belong in `optional-skills/`. + +### SKILL.md frontmatter + +Standard fields: `name`, `description`, `version`, `platforms` +(OS-gating list: `[macos]`, `[linux, macos]`, ...), +`metadata.hermes.tags`, `metadata.hermes.category`, +`metadata.hermes.config` (config.yaml settings the skill needs — stored +under `skills.config.`, prompted during setup, injected at load time). + +--- + ## Important Policies + ### Prompt Caching Must Not Break Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:** @@ -411,9 +529,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression. -### Working Directory Behavior -- **CLI**: Uses current directory (`.` → `os.getcwd()`) -- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory) +Slash commands that mutate system-prompt state (skills, tools, memory, etc.) +must be **cache-aware**: default to deferred invalidation (change takes +effect next session), with an opt-in `--now` flag for immediate +invalidation. See `/skills install --now` for the canonical pattern. ### Background Process Notifications (Gateway) @@ -435,7 +554,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.). The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets -`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()` +`HERMES_HOME` before any module imports. All `get_hermes_home()` references automatically scope to the active profile. ### Rules for profile-safe code @@ -492,8 +611,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575. -### DO NOT use `simple_term_menu` for interactive menus -Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern. +### DO NOT introduce new `simple_term_menu` usage +Existing call sites in `hermes_cli/main.py` remain for legacy fallback only; +the preferred UI is curses (stdlib) because `simple_term_menu` has +ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New +interactive menus must use `hermes_cli/curses_ui.py` — see +`hermes_cli/tools_config.py` for the canonical pattern. ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`. @@ -504,6 +627,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p ### DO NOT hardcode cross-tool references in schema descriptions Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern. +### The gateway has TWO message guards — both must bypass approval/control commands +When an agent is running, messages pass through two sequential guards: +(1) **base adapter** (`gateway/platforms/base.py`) queues messages in +`_pending_messages` when `session_key in self._active_sessions`, and +(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`, +`/queue`, `/status`, `/approve`, `/deny` before they reach +`running_agent.interrupt()`. Any new command that must reach the runner +while the agent is blocked (e.g. approval prompts) MUST bypass BOTH +guards and be dispatched inline, not via `_process_message_background()` +(which races session lifecycle). + +### Squash merges from stale branches silently revert recent fixes +Before squash-merging a PR, ensure the branch is up to date with `main` +(`git fetch origin main && git reset --hard origin/main` in the worktree, +then re-apply the PR's commits). A stale branch's version of an unrelated +file will silently overwrite recent fixes on main when squashed. Verify +with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a +red flag. + +### Don't wire in dead code without E2E validation +Unused code that was never shipped was dead for a reason. Before wiring an +unused module into a live code path, E2E test the real resolution chain +with actual imports (not mocks) against a temp `HERMES_HOME`. + ### Tests must not write to `~/.hermes/` The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests. @@ -559,7 +706,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells pytest directly), at minimum activate the venv and pass `-n 4`: ```bash -source venv/bin/activate +source .venv/bin/activate # or: source venv/bin/activate python -m pytest tests/ -q -n 4 ``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0e00c3f2c..146cb1161 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee We value contributions in this order: 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority. -2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere. +2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere. 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations). 4. **Performance and robustness** — retry logic, error handling, graceful degradation. 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool) @@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl ## Cross-Platform Compatibility -Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS: +Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS: ### Critical rules @@ -597,7 +597,7 @@ refactor/description # Code restructuring 1. **Run tests**: `pytest tests/ -v` 2. **Test manually**: Run `hermes` and exercise the code path you changed -3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS +3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature. ### PR description diff --git a/Dockerfile b/Dockerfile index 8904c4c74..4ab1d3804 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,9 +10,11 @@ ENV PYTHONUNBUFFERED=1 ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright # Install system dependencies in one layer, clear APT cache +# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.) +# that would otherwise accumulate when hermes runs as PID 1. See #15012. RUN apt-get update && \ apt-get install -y --no-install-recommends \ - build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \ + build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \ rm -rf /var/lib/apt/lists/* # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime @@ -41,9 +43,15 @@ COPY --chown=hermes:hermes . . # Build web dashboard (Vite outputs to hermes_cli/web_dist/) RUN cd web && npm run build +# ---------- Permissions ---------- +# Make install dir world-readable so any HERMES_UID can read it at runtime. +# The venv needs to be traversable too. +USER root +RUN chmod -R a+rX /opt/hermes +# Start as root so the entrypoint can usermod/groupmod + gosu. +# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000). + # ---------- Python virtualenv ---------- -RUN chown hermes:hermes /opt/hermes -USER hermes RUN uv venv && \ uv pip install --no-cache-dir -e ".[all]" @@ -52,4 +60,4 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist ENV HERMES_HOME=/opt/data ENV PATH="/opt/data/.local/bin:${PATH}" VOLUME [ "/opt/data" ] -ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ] +ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ] diff --git a/README.md b/README.md index 70b65debd..11390fb2b 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat | Set a personality | `/personality [name]` | `/personality [name]` | | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` | | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` | -| Browse skills | `/skills` or `/` | `/skills` or `/` | +| Browse skills | `/skills` or `/` | `/` | | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message | | Platform-specific status | `/platforms` | `/status`, `/sethome` | @@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh uv venv venv --python 3.11 source venv/bin/activate uv pip install -e ".[all,dev]" -python -m pytest tests/ -q +scripts/run_tests.sh ``` -> **RL Training (optional):** To work on the RL/Tinker-Atropos integration: -> ```bash -> git submodule update --init tinker-atropos -> uv pip install -e "./tinker-atropos" -> ``` +> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required. --- diff --git a/RELEASE_v0.11.0.md b/RELEASE_v0.11.0.md new file mode 100644 index 000000000..ed25f5a14 --- /dev/null +++ b/RELEASE_v0.11.0.md @@ -0,0 +1,453 @@ +# Hermes Agent v0.11.0 (v2026.4.23) + +**Release Date:** April 23, 2026 +**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors) + +> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth. + +This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack. + +--- + +## ✨ Highlights + +- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium) + +- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium) + +- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)). + +- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720)) + +- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831)) + +- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175)) + +- **`/steer` — mid-run agent nudges** — `/steer ` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116)) + +- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296)) + +- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473)) + +- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718)) + +- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900)) + +- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725)) + +- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium) + +--- + +## 🏗️ Core Agent & Architecture + +### Transport Layer (NEW) +- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347)) +- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor) +- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805)) +- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor) +- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814)) + +### Provider & Model Support +- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549)) +- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)) +- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)) +- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor) +- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)) +- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng) +- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720)) +- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)) +- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783)) +- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782)) +- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169)) +- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor) +- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor) +- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907)) +- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398)) +- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378)) +- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429)) +- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836)) +- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221)) +- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652)) +- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730)) +- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215)) +- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420)) +- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929)) +- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309)) +- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429)) +- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217)) +- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463)) +- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs) +- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826)) +- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot +- Fix: stream reasoning content through OpenAI-compatible providers that emit it + +### Agent Loop & Conversation +- **`/steer `** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116)) +- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691)) +- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718)) +- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088)) +- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556)) +- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093)) +- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934)) +- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501)) +- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891)) +- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900)) +- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch) +- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055)) +- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063)) +- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065)) +- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472)) +- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504)) +- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935)) +- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940)) +- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224)) +- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u) +- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941)) +- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021)) +- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs) +- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238)) + +### Session & Memory +- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861)) +- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619)) +- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987)) +- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511)) +- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529)) +- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507)) +- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059)) + +--- + +## 🖥️ New Ink-based TUI + +A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`. + +### TUI Foundations +- New TUI based on Ink + Python JSON-RPC backend +- Prettier + ESLint + vitest tooling for `ui-tui/` +- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine) +- Persistent `_SlashWorker` subprocess for slash command dispatch + +### UX & Features +- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife) +- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife) +- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife) +- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife) +- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948)) +- Sticky composer that freezes during scroll +- OSC-52 clipboard support for copy across SSH sessions +- Virtualized history rendering for performance +- Slash command autocomplete via `complete.slash` RPC +- Path autocomplete via `complete.path` RPC +- Dozens of resize/ghosting/sticky-prompt fixes landed through the week + +### Structural Refactors +- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding) +- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx` +- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory` + +--- + +## 📱 Messaging Platforms (Gateway) + +### New Platforms +- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831)) + +### Telegram +- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681)) +- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530)) +- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610)) +- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794)) +- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319)) +- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538)) +- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947)) +- Fix: Markdown special char escaping in `send_exec_approval` +- Fix: parentheses in URLs during MarkdownV2 link conversion +- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue) +- Many platform hint / streaming / session-key fixes + +### Discord +- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920)) +- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608)) +- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315)) +- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283)) +- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246)) +- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909)) +- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781)) + +### Feishu +- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898)) +- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927)) +- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167)) + +### DingTalk +- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564)) +- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574)) +- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910)) + +### WhatsApp +- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002)) +- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151)) + +### WeCom / Weixin +- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961)) + +### Signal +- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178)) + +### Slack +- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987)) + +### BlueBubbles (iMessage) +- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806)) + +### Gateway Core +- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787)) +- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564)) +- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175)) +- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307)) +- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683)) +- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043)) +- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850)) +- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895)) +- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235)) +- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor) +- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068)) +- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460)) +- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor) +- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018)) +- Fix: unlink stale PID + lock files on cleanup +- Fix: force-unlink stale PID file after `--replace` takeover + +--- + +## 🔧 Tool System + +### Plugin Surface (major expansion) +- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626)) +- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763)) +- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377)) +- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972)) +- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929)) +- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786)) +- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944)) +- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799)) +- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317)) +- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296)) + +### Browser +- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369)) +- Camofox hardening + connection stability across the window + +### Execute Code +- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971)) + +### Image Generation +- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265)) +- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406)) +- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677)) +- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765)) + +### TTS / STT / Voice +- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229)) +- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473)) +- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783)) +- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395)) +- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u) + +### Webhook / Cron +- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473)) +- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373)) +- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767)) + +### Delegate +- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691)) +- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718)) + +### File / Patch +- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435)) + +### API Server +- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049)) +- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969)) + +### Docker / Podman +- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066)) +- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232)) +- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291)) + +### MCP +- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.) + +--- + +## 🧩 Skills Ecosystem + +### Skill System +- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786)) +- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468)) +- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557)) +- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384)) +- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303)) +- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493)) +- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467)) + +### New Skills +- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363)) +- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906)) +- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725)) +- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu) +- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254)) +- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976)) +- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355)) +- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443)) +- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298)) +- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425)) +- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398)) +- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700)) + +--- + +## 📊 Web Dashboard + +- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453)) +- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687)) +- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951)) +- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett) +- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith) +- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett) +- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357)) +- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894)) +- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett) +- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004)) + +--- + +## 🖱️ CLI & User Experience + +- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785)) +- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461)) +- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416)) +- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934)) +- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277)) +- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428)) +- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112)) +- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902)) +- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285)) +- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029)) + +--- + +## 🔒 Security & Reliability + +- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166)) +- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895)) +- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536)) +- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner) +- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918)) +- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537)) +- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527)) +- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584)) +- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window + +--- + +## 🐛 Notable Bug Fixes + +The `fix:` category in this window covers 482 PRs. Highlights: + +- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs) +- `` and `` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408)) +- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799)) +- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522)) +- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783)) +- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e) +- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor) +- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546)) +- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612)) +- Doctor checks for Kimi China credentials fixed +- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540)) +- Rapid Telegram follow-ups no longer get cut off + +--- + +## 🧪 Testing & CI + +- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376)) +- Hermetic test parity (`scripts/run_tests.sh`) held across this window +- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout + +--- + +## 📚 Documentation + +- Atropos + wandb links in user guide +- ACP / VS Code / Zed / JetBrains integration docs refresh +- Webhook subscription docs updated for direct-delivery mode +- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`) +- Transport layer developer guide added +- Website removed Discussions link from README + +--- + +## 👥 Contributors + +### Core +- **@teknium1** (Teknium) + +### Top Community Contributors (by merged PR count) +- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix +- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay +- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes +- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons +- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes +- **@ethernet8023** — 3 PRs +- **@benbarclay** — 3 PRs +- **@Aslaaen** — 2 PRs + +### Also contributing +@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work). + +### All Contributors (alphabetical) + +@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0, +@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia, +@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9, +@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier, +@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw, +@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione, +@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith, +@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky, +@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle, +@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10, +@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall, +@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy, +@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker, +@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk, +@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu, +@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu, +@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii, +@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany, +@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav, +@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr, +@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod, +@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120, +@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh, +@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes, +@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555, +@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl, +@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24, +@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen, +@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY, +@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu, +@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58, +@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry, +@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy + +Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001. + + +--- + +**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index d73c71157..612748d56 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -60,7 +60,7 @@ from acp_adapter.events import ( make_tool_progress_cb, ) from acp_adapter.permissions import make_approval_callback -from acp_adapter.session import SessionManager, SessionState +from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets logger = logging.getLogger(__name__) @@ -287,7 +287,11 @@ class HermesACPAgent(acp.Agent): try: from model_tools import get_tool_definitions - enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + enabled_toolsets = _expand_acp_enabled_toolsets( + getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"], + mcp_server_names=[server.name for server in mcp_servers], + ) + state.agent.enabled_toolsets = enabled_toolsets disabled_toolsets = getattr(state.agent, "disabled_toolsets", None) state.agent.tools = get_tool_definitions( enabled_toolsets=enabled_toolsets, @@ -754,7 +758,9 @@ class HermesACPAgent(acp.Agent): def _cmd_tools(self, args: str, state: SessionState) -> str: try: from model_tools import get_tool_definitions - toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + toolsets = _expand_acp_enabled_toolsets( + getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + ) tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True) if not tools: return "No tools available." diff --git a/acp_adapter/session.py b/acp_adapter/session.py index 3f5f78f9a..724573002 100644 --- a/acp_adapter/session.py +++ b/acp_adapter/session.py @@ -106,6 +106,24 @@ def _register_task_cwd(task_id: str, cwd: str) -> None: logger.debug("Failed to register ACP task cwd override", exc_info=True) +def _expand_acp_enabled_toolsets( + toolsets: List[str] | None = None, + mcp_server_names: List[str] | None = None, +) -> List[str]: + """Return ACP toolsets plus explicit MCP server toolsets for this session.""" + expanded: List[str] = [] + for name in list(toolsets or ["hermes-acp"]): + if name and name not in expanded: + expanded.append(name) + + for server_name in list(mcp_server_names or []): + toolset_name = f"mcp-{server_name}" + if server_name and toolset_name not in expanded: + expanded.append(toolset_name) + + return expanded + + def _clear_task_cwd(task_id: str) -> None: """Remove task-specific cwd overrides for an ACP session.""" if not task_id: @@ -537,9 +555,18 @@ class SessionManager: elif isinstance(model_cfg, str) and model_cfg.strip(): default_model = model_cfg.strip() + configured_mcp_servers = [ + name + for name, cfg in (config.get("mcp_servers") or {}).items() + if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False + ] + kwargs = { "platform": "acp", - "enabled_toolsets": ["hermes-acp"], + "enabled_toolsets": _expand_acp_enabled_toolsets( + ["hermes-acp"], + mcp_server_names=configured_mcp_servers, + ), "quiet_mode": True, "session_id": session_id, "model": model or default_model, diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index ea09c11ea..01fb8e48b 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -14,6 +14,8 @@ import copy import json import logging import os +import platform +import subprocess from pathlib import Path from hermes_constants import get_hermes_home @@ -277,8 +279,9 @@ def _is_oauth_token(key: str) -> bool: Positively identifies Anthropic OAuth tokens by their key format: - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow + - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN) - Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern + Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern and correctly return False. """ if not key: @@ -292,6 +295,9 @@ def _is_oauth_token(key: str) -> bool: # JWTs from Anthropic OAuth flow if key.startswith("eyJ"): return True + # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN) + if key.startswith("cc-"): + return True return False @@ -461,8 +467,72 @@ def build_anthropic_bedrock_client(region: str): ) +def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]: + """Read Claude Code OAuth credentials from the macOS Keychain. + + Claude Code >=2.1.114 stores credentials in the macOS Keychain under the + service name "Claude Code-credentials" rather than (or in addition to) + the JSON file at ~/.claude/.credentials.json. + + The password field contains a JSON string with the same claudeAiOauth + structure as the JSON file. + + Returns dict with {accessToken, refreshToken?, expiresAt?} or None. + """ + import platform + import subprocess + + if platform.system() != "Darwin": + return None + + try: + # Read the "Claude Code-credentials" generic password entry + result = subprocess.run( + ["security", "find-generic-password", + "-s", "Claude Code-credentials", + "-w"], + capture_output=True, + text=True, + timeout=5, + ) + except (OSError, subprocess.TimeoutExpired): + logger.debug("Keychain: security command not available or timed out") + return None + + if result.returncode != 0: + logger.debug("Keychain: no entry found for 'Claude Code-credentials'") + return None + + raw = result.stdout.strip() + if not raw: + return None + + try: + data = json.loads(raw) + except json.JSONDecodeError: + logger.debug("Keychain: credentials payload is not valid JSON") + return None + + oauth_data = data.get("claudeAiOauth") + if oauth_data and isinstance(oauth_data, dict): + access_token = oauth_data.get("accessToken", "") + if access_token: + return { + "accessToken": access_token, + "refreshToken": oauth_data.get("refreshToken", ""), + "expiresAt": oauth_data.get("expiresAt", 0), + "source": "macos_keychain", + } + + return None + + def read_claude_code_credentials() -> Optional[Dict[str, Any]]: - """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json. + """Read refreshable Claude Code OAuth credentials. + + Checks two sources in order: + 1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry + 2. ~/.claude/.credentials.json file This intentionally excludes ~/.claude.json primaryApiKey. Opencode's subscription flow is OAuth/setup-token based with refreshable credentials, @@ -471,6 +541,12 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]: Returns dict with {accessToken, refreshToken?, expiresAt?} or None. """ + # Try macOS Keychain first (covers Claude Code >=2.1.114) + kc_creds = _read_claude_code_credentials_from_keychain() + if kc_creds: + return kc_creds + + # Fall back to JSON file cred_path = Path.home() / ".claude" / ".credentials.json" if cred_path.exists(): try: @@ -641,7 +717,9 @@ def _write_claude_code_credentials( existing["claudeAiOauth"] = oauth_data cred_path.parent.mkdir(parents=True, exist_ok=True) - cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8") + _tmp_cred = cred_path.with_suffix(".tmp") + _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8") + _tmp_cred.replace(cred_path) # Restrict permissions (credentials file) cred_path.chmod(0o600) except (OSError, IOError) as e: @@ -908,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]: # --------------------------------------------------------------------------- +def _is_bedrock_model_id(model: str) -> bool: + """Detect AWS Bedrock model IDs that use dots as namespace separators. + + Bedrock model IDs come in two forms: + - Bare: ``anthropic.claude-opus-4-7`` + - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0`` + + In both cases the dots separate namespace components, not version + numbers, and must be preserved verbatim for the Bedrock API. + """ + lower = model.lower() + # Regional inference-profile prefixes + if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")): + return True + # Bare Bedrock model IDs: provider.model-family + if lower.startswith("anthropic."): + return True + return False + + def normalize_model_name(model: str, preserve_dots: bool = False) -> str: """Normalize a model name for the Anthropic API. @@ -915,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str: - Converts dots to hyphens in version numbers (OpenRouter uses dots, Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus). + - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and + regional inference profiles (``us.anthropic.claude-*``) whose dots + are namespace separators, not version separators. """ lower = model.lower() if lower.startswith("anthropic/"): model = model[len("anthropic/"):] if not preserve_dots: + # Bedrock model IDs use dots as namespace separators + # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*"). + # These must not be converted to hyphens. See issue #12295. + if _is_bedrock_model_id(model): + return model # OpenRouter uses dots for version separators (claude-opus-4.6), # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens. model = model.replace(".", "-") @@ -1598,4 +1704,3 @@ def build_anthropic_kwargs( return kwargs - diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 1563b866c..5e8a60e76 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -74,6 +74,12 @@ _PROVIDER_ALIASES = { "minimax_cn": "minimax-cn", "claude": "anthropic", "claude-code": "anthropic", + "github": "copilot", + "github-copilot": "copilot", + "github-model": "copilot", + "github-models": "copilot", + "github-copilot-acp": "copilot-acp", + "copilot-acp-agent": "copilot-acp", } @@ -89,10 +95,11 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: if normalized == "main": # Resolve to the user's actual main provider so named custom providers # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly. - main_prov = _read_main_provider() + main_prov = (_read_main_provider() or "").strip().lower() if main_prov and main_prov not in ("auto", "main", ""): - return main_prov - return "custom" + normalized = main_prov + else: + return "custom" return _PROVIDER_ALIASES.get(normalized, normalized) @@ -151,7 +158,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { # differs from their main chat model, map it here. The vision auto-detect # "exotic provider" branch checks this before falling back to the main model. _PROVIDER_VISION_MODELS: Dict[str, str] = { - "xiaomi": "mimo-v2-omni", + "xiaomi": "mimo-v2.5", "zai": "glm-5v-turbo", } @@ -916,6 +923,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: default_headers=_OR_HEADERS), _OPENROUTER_MODEL +def _describe_openrouter_unavailable() -> str: + """Return a more precise OpenRouter auth failure reason for logs.""" + pool_present, entry = _select_pool_entry("openrouter") + if pool_present: + if entry is None: + return "OpenRouter credential pool has no usable entries (credentials may be exhausted)" + if not _pool_runtime_api_key(entry): + return "OpenRouter credential pool entry is missing a runtime API key" + if not str(os.getenv("OPENROUTER_API_KEY") or "").strip(): + return "OPENROUTER_API_KEY not set" + return "no usable OpenRouter credentials found" + + def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: # Check cross-session rate limit guard before attempting Nous — # if another session already recorded a 429, skip Nous entirely @@ -1329,6 +1349,68 @@ def _is_auth_error(exc: Exception) -> bool: return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() +def _evict_cached_clients(provider: str) -> None: + """Drop cached auxiliary clients for a provider so fresh creds are used.""" + normalized = _normalize_aux_provider(provider) + with _client_cache_lock: + stale_keys = [ + key for key in _client_cache + if _normalize_aux_provider(str(key[0])) == normalized + ] + for key in stale_keys: + client = _client_cache.get(key, (None, None, None))[0] + if client is not None: + _force_close_async_httpx(client) + try: + close_fn = getattr(client, "close", None) + if callable(close_fn): + close_fn() + except Exception: + pass + _client_cache.pop(key, None) + + +def _refresh_provider_credentials(provider: str) -> bool: + """Refresh short-lived credentials for OAuth-backed auxiliary providers.""" + normalized = _normalize_aux_provider(provider) + try: + if normalized == "openai-codex": + from hermes_cli.auth import resolve_codex_runtime_credentials + + creds = resolve_codex_runtime_credentials(force_refresh=True) + if not str(creds.get("api_key", "") or "").strip(): + return False + _evict_cached_clients(normalized) + return True + if normalized == "nous": + from hermes_cli.auth import resolve_nous_runtime_credentials + + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + force_mint=True, + ) + if not str(creds.get("api_key", "") or "").strip(): + return False + _evict_cached_clients(normalized) + return True + if normalized == "anthropic": + from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token + + creds = read_claude_code_credentials() + token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None + if not str(token or "").strip(): + token = resolve_anthropic_token() + if not str(token or "").strip(): + return False + _evict_cached_clients(normalized) + return True + except Exception as exc: + logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc) + return False + return False + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -1627,8 +1709,10 @@ def resolve_provider_client( if provider == "openrouter": client, default = _try_openrouter() if client is None: - logger.warning("resolve_provider_client: openrouter requested " - "but OPENROUTER_API_KEY not set") + logger.warning( + "resolve_provider_client: openrouter requested but %s", + _describe_openrouter_unavailable(), + ) return None, None final_model = _normalize_resolved_model(model or default, provider) return (_to_async_client(client, final_model) if async_mode @@ -1721,7 +1805,7 @@ def resolve_provider_client( "but no endpoint credentials found") return None, None - # ── Named custom providers (config.yaml custom_providers list) ─── + # ── Named custom providers (config.yaml providers dict / custom_providers list) ─── try: from hermes_cli.runtime_provider import _get_named_custom_provider custom_entry = _get_named_custom_provider(provider) @@ -1732,16 +1816,51 @@ def resolve_provider_client( if not custom_key and custom_key_env: custom_key = os.getenv(custom_key_env, "").strip() custom_key = custom_key or "no-key-required" + # An explicit per-task api_mode override (from _resolve_task_provider_model) + # wins; otherwise fall back to what the provider entry declared. + entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip() if custom_base: final_model = _normalize_resolved_model( model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini", provider, ) - client = OpenAI(api_key=custom_key, base_url=custom_base) - client = _wrap_if_needed(client, final_model, custom_base) logger.debug( - "resolve_provider_client: named custom provider %r (%s)", - provider, final_model) + "resolve_provider_client: named custom provider %r (%s, api_mode=%s)", + provider, final_model, entry_api_mode or "chat_completions") + # anthropic_messages: route through the Anthropic Messages API + # via AnthropicAuxiliaryClient. Mirrors the anonymous-custom + # branch in _try_custom_endpoint(). See #15033. + if entry_api_mode == "anthropic_messages": + try: + from agent.anthropic_adapter import build_anthropic_client + real_client = build_anthropic_client(custom_key, custom_base) + except ImportError: + logger.warning( + "Named custom provider %r declares api_mode=" + "anthropic_messages but the anthropic SDK is not " + "installed — falling back to OpenAI-wire.", + provider, + ) + client = OpenAI(api_key=custom_key, base_url=custom_base) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + sync_anthropic = AnthropicAuxiliaryClient( + real_client, final_model, custom_key, custom_base, is_oauth=False, + ) + if async_mode: + return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model + return sync_anthropic, final_model + client = OpenAI(api_key=custom_key, base_url=custom_base) + # codex_responses or inherited auto-detect (via _wrap_if_needed). + # _wrap_if_needed reads the closed-over `api_mode` (the task-level + # override). Named-provider entry api_mode=codex_responses also + # flows through here. + if entry_api_mode == "codex_responses" and not isinstance( + client, CodexAuxiliaryClient + ): + client = CodexAuxiliaryClient(client, final_model) + else: + client = _wrap_if_needed(client, final_model, custom_base) return (_to_async_client(client, final_model) if async_mode else (client, final_model)) logger.warning( @@ -1874,6 +1993,39 @@ def resolve_provider_client( "directly supported", provider) return None, None + elif pconfig.auth_type == "aws_sdk": + # AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via + # boto3's credential chain (IAM roles, SSO, env vars, instance metadata). + try: + from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region + from agent.anthropic_adapter import build_anthropic_bedrock_client + except ImportError: + logger.warning("resolve_provider_client: bedrock requested but " + "boto3 or anthropic SDK not installed") + return None, None + + if not has_aws_credentials(): + logger.debug("resolve_provider_client: bedrock requested but " + "no AWS credentials found") + return None, None + + region = resolve_bedrock_region() + default_model = "anthropic.claude-haiku-4-5-20251001-v1:0" + final_model = _normalize_resolved_model(model or default_model, provider) + try: + real_client = build_anthropic_bedrock_client(region) + except ImportError as exc: + logger.warning("resolve_provider_client: cannot create Bedrock " + "client: %s", exc) + return None, None + client = AnthropicAuxiliaryClient( + real_client, final_model, api_key="aws-sdk", + base_url=f"https://bedrock-runtime.{region}.amazonaws.com", + ) + logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + elif pconfig.auth_type in ("oauth_device_code", "oauth_external"): # OAuth providers — route through their specific try functions if provider == "nous": @@ -2842,6 +2994,49 @@ def call_llm( return _validate_llm_response( refreshed_client.chat.completions.create(**kwargs), task) + # ── Auth refresh retry ─────────────────────────────────────── + if (_is_auth_error(first_err) + and resolved_provider not in ("auto", "", None) + and not client_is_nous): + if _refresh_provider_credentials(resolved_provider): + logger.info( + "Auxiliary %s: refreshed %s credentials after auth error, retrying", + task or "call", resolved_provider, + ) + retry_client, retry_model = ( + resolve_vision_provider_client( + provider=resolved_provider, + model=final_model, + async_mode=False, + )[1:] + if task == "vision" + else _get_cached_client( + resolved_provider, + resolved_model, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + main_runtime=main_runtime, + ) + ) + if retry_client is not None: + retry_kwargs = _build_call_kwargs( + resolved_provider, + retry_model or final_model, + messages, + temperature=temperature, + max_tokens=max_tokens, + tools=tools, + timeout=effective_timeout, + extra_body=effective_extra_body, + base_url=resolved_base_url, + ) + _retry_base = str(getattr(retry_client, "base_url", "") or "") + if _is_anthropic_compat_endpoint(resolved_provider, _retry_base): + retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"]) + return _validate_llm_response( + retry_client.chat.completions.create(**retry_kwargs), task) + # ── Payment / credit exhaustion fallback ────────────────────── # When the resolved provider returns 402 or a credit-related error, # try alternative providers instead of giving up. This handles the @@ -3062,6 +3257,48 @@ async def async_call_llm( return _validate_llm_response( await refreshed_client.chat.completions.create(**kwargs), task) + # ── Auth refresh retry (mirrors sync call_llm) ─────────────── + if (_is_auth_error(first_err) + and resolved_provider not in ("auto", "", None) + and not client_is_nous): + if _refresh_provider_credentials(resolved_provider): + logger.info( + "Auxiliary %s (async): refreshed %s credentials after auth error, retrying", + task or "call", resolved_provider, + ) + if task == "vision": + _, retry_client, retry_model = resolve_vision_provider_client( + provider=resolved_provider, + model=final_model, + async_mode=True, + ) + else: + retry_client, retry_model = _get_cached_client( + resolved_provider, + resolved_model, + async_mode=True, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + ) + if retry_client is not None: + retry_kwargs = _build_call_kwargs( + resolved_provider, + retry_model or final_model, + messages, + temperature=temperature, + max_tokens=max_tokens, + tools=tools, + timeout=effective_timeout, + extra_body=effective_extra_body, + base_url=resolved_base_url, + ) + _retry_base = str(getattr(retry_client, "base_url", "") or "") + if _is_anthropic_compat_endpoint(resolved_provider, _retry_base): + retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"]) + return _validate_llm_response( + await retry_client.chat.completions.create(**retry_kwargs), task) + # ── Payment / connection fallback (mirrors sync call_llm) ───── should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) is_auto = resolved_provider in ("auto", "", None) diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py index 9e4297581..48674a562 100644 --- a/agent/bedrock_adapter.py +++ b/agent/bedrock_adapter.py @@ -87,6 +87,114 @@ def reset_client_cache(): _bedrock_control_client_cache.clear() +def invalidate_runtime_client(region: str) -> bool: + """Evict the cached ``bedrock-runtime`` client for a single region. + + Per-region counterpart to :func:`reset_client_cache`. Used by the converse + call wrappers to discard clients whose underlying HTTP connection has + gone stale, so the next call allocates a fresh client (with a fresh + connection pool) instead of reusing a dead socket. + + Returns True if a cached entry was evicted, False if the region was not + cached. + """ + existed = region in _bedrock_runtime_client_cache + _bedrock_runtime_client_cache.pop(region, None) + return existed + + +# --------------------------------------------------------------------------- +# Stale-connection detection +# --------------------------------------------------------------------------- +# +# boto3 caches its HTTPS connection pool inside the client object. When a +# pooled connection is killed out from under us (NAT timeout, VPN flap, +# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as +# one of a handful of low-level exceptions — most commonly +# ``botocore.exceptions.ConnectionClosedError`` or +# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal +# ``assert`` in a couple of paths (connection pool state checks, chunked +# response readers) which bubbles up as a bare ``AssertionError`` with an +# empty ``str(exc)``. +# +# In all of these cases the client is the problem, not the request: retrying +# with the same cached client reproduces the failure until the process +# restarts. The fix is to evict the region's cached client so the next +# attempt builds a new one. + +_STALE_LIB_MODULE_PREFIXES = ( + "urllib3.", + "botocore.", + "boto3.", +) + + +def _traceback_frames_modules(exc: BaseException): + """Yield ``__name__``-style module strings for each frame in exc's traceback.""" + tb = getattr(exc, "__traceback__", None) + while tb is not None: + frame = tb.tb_frame + module = frame.f_globals.get("__name__", "") + yield module or "" + tb = tb.tb_next + + +def is_stale_connection_error(exc: BaseException) -> bool: + """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection. + + Matches: + * ``botocore.exceptions.ConnectionError`` and subclasses + (``ConnectionClosedError``, ``EndpointConnectionError``, + ``ReadTimeoutError``, ``ConnectTimeoutError``). + * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` / + ``ConnectionError`` (best-effort import — urllib3 is a transitive + dependency of botocore so it is always available in practice). + * Bare ``AssertionError`` raised from a frame inside urllib3, botocore, + or boto3. These are internal-invariant failures (typically triggered + by corrupted connection-pool state after a dropped socket) and are + recoverable by swapping the client. + + Non-library ``AssertionError``s (from application code or tests) are + intentionally not matched — only library-internal asserts signal stale + connection state. + """ + # botocore: the canonical signal — HTTPClientError is the umbrella for + # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError, + # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers + # the same family via a different branch of the hierarchy. + try: + from botocore.exceptions import ( + ConnectionError as BotoConnectionError, + HTTPClientError, + ) + botocore_errors: tuple = (BotoConnectionError, HTTPClientError) + except ImportError: # pragma: no cover — botocore always present with boto3 + botocore_errors = () + if botocore_errors and isinstance(exc, botocore_errors): + return True + + # urllib3: low-level transport failures + try: + from urllib3.exceptions import ( + ProtocolError, + NewConnectionError, + ConnectionError as Urllib3ConnectionError, + ) + urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError) + except ImportError: # pragma: no cover + urllib3_errors = () + if urllib3_errors and isinstance(exc, urllib3_errors): + return True + + # Library-internal AssertionError (urllib3 / botocore / boto3) + if isinstance(exc, AssertionError): + for module in _traceback_frames_modules(exc): + if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES): + return True + + return False + + # --------------------------------------------------------------------------- # AWS credential detection # --------------------------------------------------------------------------- @@ -787,7 +895,17 @@ def call_converse( guardrail_config=guardrail_config, ) - response = client.converse(**kwargs) + try: + response = client.converse(**kwargs) + except Exception as exc: + if is_stale_connection_error(exc): + logger.warning( + "bedrock: stale-connection error on converse(region=%s, model=%s): " + "%s — evicting cached client so the next call reconnects.", + region, model, type(exc).__name__, + ) + invalidate_runtime_client(region) + raise return normalize_converse_response(response) @@ -819,7 +937,17 @@ def call_converse_stream( guardrail_config=guardrail_config, ) - response = client.converse_stream(**kwargs) + try: + response = client.converse_stream(**kwargs) + except Exception as exc: + if is_stale_connection_error(exc): + logger.warning( + "bedrock: stale-connection error on converse_stream(region=%s, " + "model=%s): %s — evicting cached client so the next call reconnects.", + region, model, type(exc).__name__, + ) + invalidate_runtime_client(region) + raise return normalize_converse_stream_events(response) diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index 4d3e5590b..3b007a762 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY logger = logging.getLogger(__name__) +# Matches Codex/Harmony tool-call serialization that occasionally leaks into +# assistant-message content when the model fails to emit a structured +# ``function_call`` item. Accepts the common forms: +# +# to=functions.exec_command +# assistant to=functions.exec_command +# <|channel|>commentary to=functions.exec_command +# +# ``to=functions.`` is the stable marker — the optional ``assistant`` or +# Harmony channel prefix varies by degeneration mode. Case-insensitive to +# cover lowercase/uppercase ``assistant`` variants. +_TOOL_CALL_LEAK_PATTERN = re.compile( + r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*", + re.IGNORECASE, +) + + # --------------------------------------------------------------------------- # Multimodal content helpers # --------------------------------------------------------------------------- @@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if isinstance(out_text, str): final_text = out_text.strip() + # ── Tool-call leak recovery ────────────────────────────────── + # gpt-5.x on the Codex Responses API sometimes degenerates and emits + # what should be a structured `function_call` item as plain assistant + # text using the Harmony/Codex serialization (``to=functions.foo + # {json}`` or ``assistant to=functions.foo {json}``). The model + # intended to call a tool, but the intent never made it into + # ``response.output`` as a ``function_call`` item, so ``tool_calls`` + # is empty here. If we pass this through, the parent sees a + # confident-looking summary with no audit trail (empty ``tool_trace``) + # and no tools actually ran — the Taiwan-embassy-email incident. + # + # Detection: leaked tokens always contain ``to=functions.`` and + # the assistant message has no real tool calls. Treat it as incomplete + # so the existing Codex-incomplete continuation path (3 retries, + # handled in run_agent.py) gets a chance to re-elicit a proper + # ``function_call`` item. The existing loop already handles message + # append, dedup, and retry budget. + leaked_tool_call_text = False + if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text): + leaked_tool_call_text = True + logger.warning( + "Codex response contains leaked tool-call text in assistant content " + "(no structured function_call items). Treating as incomplete so the " + "continuation path can re-elicit a proper tool call. Leaked snippet: %r", + final_text[:300], + ) + # Clear the text so downstream code doesn't surface the garbage as + # a summary. The encrypted reasoning items (if any) are preserved + # so the model keeps its chain-of-thought on the retry. + final_text = "" + assistant_message = SimpleNamespace( content=final_text, tool_calls=tool_calls, @@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if tool_calls: finish_reason = "tool_calls" + elif leaked_tool_call_text: + finish_reason = "incomplete" elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" elif reasoning_items_raw and not final_text: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index f8036851f..ef40cbfaf 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine): self._context_probed = False self._context_probe_persistable = False self._previous_summary = None + self._last_summary_error = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 @@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine): self._last_compression_savings_pct: float = 100.0 self._ineffective_compression_count: int = 0 self._summary_failure_cooldown_until: float = 0.0 + self._last_summary_error: Optional[str] = None def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 self._summary_model_fallen_back = False + self._last_summary_error = None return self._with_summary_prefix(summary) except RuntimeError: # No provider configured — long cooldown, unlikely to self-resolve self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS + self._last_summary_error = "no auxiliary LLM provider configured" logging.warning("Context compression: no provider available for " "summary. Middle turns will be dropped without summary " "for %d seconds.", @@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Transient errors (timeout, rate limit, network) — shorter cooldown _transient_cooldown = 60 self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown + err_text = str(e).strip() or e.__class__.__name__ + if len(err_text) > 220: + err_text = err_text[:217].rstrip() + "..." + self._last_summary_error = err_text logging.warning( "Failed to generate context summary: %s. " "Further summary attempts paused for %d seconds.", @@ -1099,6 +1107,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio return max(cut_idx, head_end + 1) + # ------------------------------------------------------------------ + # ContextEngine: manual /compress preflight + # ------------------------------------------------------------------ + + def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: + """Return True if there is a non-empty middle region to compact. + + Overrides the ABC default so the gateway ``/compress`` guard can + skip the LLM call when the transcript is still entirely inside + the protected head/tail. + """ + compress_start = self._align_boundary_forward(messages, self.protect_first_n) + compress_end = self._find_tail_cut_by_tokens(messages, compress_start) + return compress_start < compress_end + # ------------------------------------------------------------------ # Main compression entry point # ------------------------------------------------------------------ diff --git a/agent/context_engine.py b/agent/context_engine.py index 6ae90b6cd..bbafcd29c 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -78,6 +78,7 @@ class ContextEngine(ABC): self, messages: List[Dict[str, Any]], current_tokens: int = None, + focus_topic: str = None, ) -> List[Dict[str, Any]]: """Compact the message list and return the new message list. @@ -86,6 +87,12 @@ class ContextEngine(ABC): context budget. The implementation is free to summarize, build a DAG, or do anything else — as long as the returned list is a valid OpenAI-format message sequence. + + Args: + focus_topic: Optional topic string from manual ``/compress ``. + Engines that support guided compression should prioritise + preserving information related to this topic. Engines that + don't support it may simply ignore this argument. """ # -- Optional: pre-flight check ---------------------------------------- @@ -98,6 +105,21 @@ class ContextEngine(ABC): """ return False + # -- Optional: manual /compress preflight ------------------------------ + + def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: + """Quick check: is there anything in ``messages`` that can be compacted? + + Used by the gateway ``/compress`` command as a preflight guard — + returning False lets the gateway report "nothing to compress yet" + without making an LLM call. + + Default returns True (always attempt). Engines with a cheap way + to introspect their own head/tail boundaries should override this + to return False when the transcript is still entirely protected. + """ + return True + # -- Optional: session lifecycle --------------------------------------- def on_session_start(self, session_id: str, **kwargs) -> None: diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 783f94956..94d40d2d9 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -46,6 +46,47 @@ def _resolve_args() -> list[str]: return shlex.split(raw) +def _resolve_home_dir() -> str: + """Return a stable HOME for child ACP processes.""" + + try: + from hermes_constants import get_subprocess_home + + profile_home = get_subprocess_home() + if profile_home: + return profile_home + except Exception: + pass + + home = os.environ.get("HOME", "").strip() + if home: + return home + + expanded = os.path.expanduser("~") + if expanded and expanded != "~": + return expanded + + try: + import pwd + + resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() + if resolved: + return resolved + except Exception: + pass + + # Last resort: /tmp (writable on any POSIX system). Avoids crashing the + # subprocess with no HOME; callers can set HERMES_HOME explicitly if they + # need a different writable dir. + return "/tmp" + + +def _build_subprocess_env() -> dict[str, str]: + env = os.environ.copy() + env["HOME"] = _resolve_home_dir() + return env + + def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: return { "jsonrpc": "2.0", @@ -382,6 +423,7 @@ class CopilotACPClient: text=True, bufsize=1, cwd=self._acp_cwd, + env=_build_subprocess_env(), ) except FileNotFoundError as exc: raise RuntimeError( diff --git a/agent/credential_pool.py b/agent/credential_pool.py index de8d03185..f6cb24dd6 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -455,6 +455,61 @@ class CredentialPool: logger.debug("Failed to sync from credentials file: %s", exc) return entry + def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: + """Sync a Nous pool entry from auth.json if tokens differ. + + Nous OAuth refresh tokens are single-use. When another process + (e.g. a concurrent cron) refreshes the token via + ``resolve_nous_runtime_credentials``, it writes fresh tokens to + auth.json under ``_auth_store_lock``. The pool entry's tokens + become stale. This method detects that and adopts the newer pair, + avoiding a "refresh token reuse" revocation on the Nous Portal. + """ + if self.provider != "nous" or entry.source != "device_code": + return entry + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "nous") + if not state: + return entry + store_refresh = state.get("refresh_token", "") + store_access = state.get("access_token", "") + if store_refresh and store_refresh != entry.refresh_token: + logger.debug( + "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)", + entry.id, + ) + field_updates: Dict[str, Any] = { + "access_token": store_access, + "refresh_token": store_refresh, + "last_status": None, + "last_status_at": None, + "last_error_code": None, + } + if state.get("expires_at"): + field_updates["expires_at"] = state["expires_at"] + if state.get("agent_key"): + field_updates["agent_key"] = state["agent_key"] + if state.get("agent_key_expires_at"): + field_updates["agent_key_expires_at"] = state["agent_key_expires_at"] + if state.get("inference_base_url"): + field_updates["inference_base_url"] = state["inference_base_url"] + extra_updates = dict(entry.extra) + for extra_key in ("obtained_at", "expires_in", "agent_key_id", + "agent_key_expires_in", "agent_key_reused", + "agent_key_obtained_at"): + val = state.get(extra_key) + if val is not None: + extra_updates[extra_key] = val + updated = replace(entry, extra=extra_updates, **field_updates) + self._replace_entry(entry, updated) + self._persist() + return updated + except Exception as exc: + logger.debug("Failed to sync Nous entry from auth.json: %s", exc) + return entry + def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None: """Write refreshed pool entry tokens back to auth.json providers. @@ -561,6 +616,9 @@ class CredentialPool: last_refresh=refreshed.get("last_refresh"), ) elif self.provider == "nous": + synced = self._sync_nous_entry_from_auth_store(entry) + if synced is not entry: + entry = synced nous_state = { "access_token": entry.access_token, "refresh_token": entry.refresh_token, @@ -635,6 +693,26 @@ class CredentialPool: # Credentials file had a valid (non-expired) token — use it directly logger.debug("Credentials file has valid token, using without refresh") return synced + # For nous: another process may have consumed the refresh token + # between our proactive sync and the HTTP call. Re-sync from + # auth.json and adopt the fresh tokens if available. + if self.provider == "nous": + synced = self._sync_nous_entry_from_auth_store(entry) + if synced.refresh_token != entry.refresh_token: + logger.debug("Nous refresh failed but auth.json has newer tokens — adopting") + updated = replace( + synced, + last_status=STATUS_OK, + last_status_at=None, + last_error_code=None, + last_error_reason=None, + last_error_message=None, + last_error_reset_at=None, + ) + self._replace_entry(synced, updated) + self._persist() + self._sync_device_code_entry_to_auth_store(updated) + return updated self._mark_exhausted(entry, None) return None @@ -698,6 +776,17 @@ class CredentialPool: if synced is not entry: entry = synced cleared_any = True + # For nous entries, sync from auth.json before status checks. + # Another process may have successfully refreshed via + # resolve_nous_runtime_credentials(), making this entry's + # exhausted status stale. + if (self.provider == "nous" + and entry.source == "device_code" + and entry.last_status == STATUS_EXHAUSTED): + synced = self._sync_nous_entry_from_auth_store(entry) + if synced is not entry: + entry = synced + cleared_any = True if entry.last_status == STATUS_EXHAUSTED: exhausted_until = _exhausted_until(entry) if exhausted_until is not None and now < exhausted_until: @@ -739,8 +828,11 @@ class CredentialPool: if self._strategy == STRATEGY_LEAST_USED and len(available) > 1: entry = min(available, key=lambda e: e.request_count) + # Increment usage counter so subsequent selections distribute load + updated = replace(entry, request_count=entry.request_count + 1) + self._replace_entry(entry, updated) self._current_id = entry.id - return entry + return updated if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1: entry = available[0] @@ -1056,6 +1148,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup "inference_base_url": state.get("inference_base_url"), "agent_key": state.get("agent_key"), "agent_key_expires_at": state.get("agent_key_expires_at"), + # Carry the mint/refresh timestamps into the pool so + # freshness-sensitive consumers (self-heal hooks, pool + # pruning by age) can distinguish just-minted credentials + # from stale ones. Without these, fresh device_code + # entries get obtained_at=None and look older than they + # are (#15099). + "obtained_at": state.get("obtained_at"), + "expires_in": state.get("expires_in"), + "agent_key_id": state.get("agent_key_id"), + "agent_key_expires_in": state.get("agent_key_expires_in"), + "agent_key_reused": state.get("agent_key_reused"), + "agent_key_obtained_at": state.get("agent_key_obtained_at"), "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None, "label": seeded_label, }, @@ -1066,9 +1170,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in # the auth store or credential pool, so we resolve them here. try: - from hermes_cli.copilot_auth import resolve_copilot_token + from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token token, source = resolve_copilot_token() if token: + api_token = get_copilot_api_token(token) source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" if not _is_suppressed(provider, source_name): active_sources.add(source_name) @@ -1080,7 +1185,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup { "source": source_name, "auth_type": AUTH_TYPE_API_KEY, - "access_token": token, + "access_token": api_token, "base_url": pconfig.inference_base_url if pconfig else "", "label": source, }, diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 04875b6a5..87324d676 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -45,6 +45,7 @@ class FailoverReason(enum.Enum): # Model model_not_found = "model_not_found" # 404 or invalid model — fallback to different model + provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy # Request format format_error = "format_error" # 400 bad request — abort or strip + retry @@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [ "unsupported model", ] +# OpenRouter aggregator policy-block patterns. +# +# When a user's OpenRouter account privacy setting (or a per-request +# `provider.data_collection: deny` preference) excludes the only endpoint +# serving a model, OpenRouter returns 404 with a *specific* message that is +# distinct from "model not found": +# +# "No endpoints available matching your guardrail restrictions and +# data policy. Configure: https://openrouter.ai/settings/privacy" +# +# We classify this as `provider_policy_blocked` rather than +# `model_not_found` because: +# - The model *exists* — model_not_found is misleading in logs +# - Provider fallback won't help: the account-level setting applies to +# every call on the same OpenRouter account +# - The error body already contains the fix URL, so the user gets +# actionable guidance without us rewriting the message +_PROVIDER_POLICY_BLOCKED_PATTERNS = [ + "no endpoints available matching your guardrail", + "no endpoints available matching your data policy", + "no endpoints found matching your data policy", +] + # Auth patterns (non-status-code signals) _AUTH_PATTERNS = [ "invalid api key", @@ -319,6 +343,11 @@ def classify_api_error( """ status_code = _extract_status_code(error) error_type = type(error).__name__ + # Copilot/GitHub Models RateLimitError may not set .status_code; force 429 + # so downstream rate-limit handling (classifier reason, pool rotation, + # fallback gating) fires correctly instead of misclassifying as generic. + if status_code is None and error_type == "RateLimitError": + status_code = 429 body = _extract_error_body(error) error_code = _extract_error_code(body) @@ -523,6 +552,17 @@ def _classify_by_status( return _classify_402(error_msg, result_fn) if status_code == 404: + # OpenRouter policy-block 404 — distinct from "model not found". + # The model exists; the user's account privacy setting excludes the + # only endpoint serving it. Falling back to another provider won't + # help (same account setting applies). The error body already + # contains the fix URL, so just surface it. + if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS): + return result_fn( + FailoverReason.provider_policy_blocked, + retryable=False, + should_fallback=False, + ) if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS): return result_fn( FailoverReason.model_not_found, @@ -640,6 +680,12 @@ def _classify_400( ) # Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter). + if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS): + return result_fn( + FailoverReason.provider_policy_blocked, + retryable=False, + should_fallback=False, + ) if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS): return result_fn( FailoverReason.model_not_found, @@ -812,6 +858,15 @@ def _classify_by_message( should_fallback=True, ) + # Provider policy-block (aggregator-side guardrail) — check before + # model_not_found so we don't mis-label as a missing model. + if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS): + return result_fn( + FailoverReason.provider_policy_blocked, + retryable=False, + should_fallback=False, + ) + # Model not found patterns if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS): return result_fn( diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py index 406e4a19b..5f64636f2 100644 --- a/agent/gemini_native_adapter.py +++ b/agent/gemini_native_adapter.py @@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool: return not normalized.endswith("/openai") +def probe_gemini_tier( + api_key: str, + base_url: str = DEFAULT_GEMINI_BASE_URL, + *, + model: str = "gemini-2.5-flash", + timeout: float = 10.0, +) -> str: + """Probe a Google AI Studio API key and return its tier. + + Returns one of: + + - ``"free"`` -- key is on the free tier (unusable with Hermes) + - ``"paid"`` -- key is on a paid tier + - ``"unknown"`` -- probe failed; callers should proceed without blocking. + """ + key = (api_key or "").strip() + if not key: + return "unknown" + + normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/") + if not normalized_base: + normalized_base = DEFAULT_GEMINI_BASE_URL + if normalized_base.lower().endswith("/openai"): + normalized_base = normalized_base[: -len("/openai")] + + url = f"{normalized_base}/models/{model}:generateContent" + payload = { + "contents": [{"role": "user", "parts": [{"text": "hi"}]}], + "generationConfig": {"maxOutputTokens": 1}, + } + + try: + with httpx.Client(timeout=timeout) as client: + resp = client.post( + url, + params={"key": key}, + json=payload, + headers={"Content-Type": "application/json"}, + ) + except Exception as exc: + logger.debug("probe_gemini_tier: network error: %s", exc) + return "unknown" + + headers_lower = {k.lower(): v for k, v in resp.headers.items()} + rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day") + if rpd_header: + try: + rpd_val = int(rpd_header) + except (TypeError, ValueError): + rpd_val = None + # Published free-tier daily caps (Dec 2025): + # gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000 + # Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free. + if rpd_val is not None and rpd_val <= 1000: + return "free" + if rpd_val is not None and rpd_val > 1000: + return "paid" + + if resp.status_code == 429: + body_text = "" + try: + body_text = resp.text or "" + except Exception: + body_text = "" + if "free_tier" in body_text.lower(): + return "free" + return "paid" + + if 200 <= resp.status_code < 300: + return "paid" + + return "unknown" + + +def is_free_tier_quota_error(error_message: str) -> bool: + """Return True when a Gemini 429 message indicates free-tier exhaustion.""" + if not error_message: + return False + return "free_tier" in error_message.lower() + + +_FREE_TIER_GUIDANCE = ( + "\n\nYour Google API key is on the free tier (<= 250 requests/day for " + "gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, " + "so the free tier is exhausted in a handful of messages and cannot sustain " + "an agent session. Enable billing on your Google Cloud project and " + "regenerate the key in a billing-enabled project: " + "https://aistudio.google.com/apikey" +) + + class GeminiAPIError(Exception): """Error shape compatible with Hermes retry/error classification.""" @@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError: else: message = f"Gemini returned HTTP {status}: {body_text[:500]}" + # Free-tier quota exhaustion -> append actionable guidance so users who + # bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn + # that the free tier cannot sustain an agent session. + if status == 429 and is_free_tier_quota_error(err_message or body_text): + message = message + _FREE_TIER_GUIDANCE + return GeminiAPIError( message, code=code, @@ -704,6 +801,13 @@ class GeminiNativeClient: http_client: Optional[httpx.Client] = None, **_: Any, ) -> None: + if not (api_key or "").strip(): + raise RuntimeError( + "Gemini native client requires an API key, but none was provided. " + "Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env " + "(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` " + "to configure the Google provider." + ) self.api_key = api_key normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/") if normalized_base.endswith("/openai"): diff --git a/agent/gemini_schema.py b/agent/gemini_schema.py index 904c99d31..3608837a1 100644 --- a/agent/gemini_schema.py +++ b/agent/gemini_schema.py @@ -73,6 +73,20 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]: ] continue cleaned[key] = value + + # Gemini's Schema validator requires every ``enum`` entry to be a string, + # even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``. + # OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's + # ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``), + # so we only drop the ``enum`` when it would collide with Gemini's rule. + # Keeping ``type: integer`` plus the human-readable description gives the + # model enough guidance; the tool handler still validates the value. + enum_val = cleaned.get("enum") + type_val = cleaned.get("type") + if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}: + if any(not isinstance(item, str) for item in enum_val): + cleaned.pop("enum", None) + return cleaned diff --git a/agent/memory_manager.py b/agent/memory_manager.py index 2435c3f24..62cbd6ae1 100644 --- a/agent/memory_manager.py +++ b/agent/memory_manager.py @@ -31,6 +31,7 @@ from __future__ import annotations import json import logging import re +import inspect from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider @@ -312,7 +313,39 @@ class MemoryManager: ) return "\n\n".join(parts) - def on_memory_write(self, action: str, target: str, content: str) -> None: + @staticmethod + def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str: + """Return how to pass metadata to a provider's memory-write hook.""" + try: + signature = inspect.signature(provider.on_memory_write) + except (TypeError, ValueError): + return "keyword" + + params = list(signature.parameters.values()) + if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params): + return "keyword" + if "metadata" in signature.parameters: + return "keyword" + + accepted = [ + p for p in params + if p.kind in ( + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.KEYWORD_ONLY, + ) + ] + if len(accepted) >= 4: + return "positional" + return "legacy" + + def on_memory_write( + self, + action: str, + target: str, + content: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: """Notify external providers when the built-in memory tool writes. Skips the builtin provider itself (it's the source of the write). @@ -321,7 +354,15 @@ class MemoryManager: if provider.name == "builtin": continue try: - provider.on_memory_write(action, target, content) + metadata_mode = self._provider_memory_write_metadata_mode(provider) + if metadata_mode == "keyword": + provider.on_memory_write( + action, target, content, metadata=dict(metadata or {}) + ) + elif metadata_mode == "positional": + provider.on_memory_write(action, target, content, dict(metadata or {})) + else: + provider.on_memory_write(action, target, content) except Exception as e: logger.debug( "Memory provider '%s' on_memory_write failed: %s", diff --git a/agent/memory_provider.py b/agent/memory_provider.py index 24593e334..535338f4e 100644 --- a/agent/memory_provider.py +++ b/agent/memory_provider.py @@ -26,7 +26,7 @@ Optional hooks (override to opt in): on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context on_session_end(messages) — end-of-session extraction on_pre_compress(messages) -> str — extract before context compression - on_memory_write(action, target, content) — mirror built-in memory writes + on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes on_delegation(task, result, **kwargs) — parent-side observation of subagent work """ @@ -34,7 +34,7 @@ from __future__ import annotations import logging from abc import ABC, abstractmethod -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) @@ -220,12 +220,21 @@ class MemoryProvider(ABC): should all have ``env_var`` set and this method stays no-op). """ - def on_memory_write(self, action: str, target: str, content: str) -> None: + def on_memory_write( + self, + action: str, + target: str, + content: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: """Called when the built-in memory tool writes an entry. action: 'add', 'replace', or 'remove' target: 'memory' or 'user' content: the entry content + metadata: structured provenance for the write, when available. Common + keys include ``write_origin``, ``execution_context``, ``session_id``, + ``parent_session_id``, ``platform``, and ``tool_name``. Use to mirror built-in memory writes to your backend. """ diff --git a/agent/model_metadata.py b/agent/model_metadata.py index e3c07684c..850e16662 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -6,6 +6,7 @@ and run_agent.py for pre-flight context checks. import ipaddress import logging +import os import re import time from pathlib import Path @@ -21,6 +22,25 @@ from hermes_constants import OPENROUTER_MODELS_URL logger = logging.getLogger(__name__) + +def _resolve_requests_verify() -> bool | str: + """Resolve SSL verify setting for `requests` calls from env vars. + + The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE + by default. Hermes also honours HERMES_CA_BUNDLE (its own convention) + and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so + that a single env var can cover both `requests` and `httpx` callsites + inside the same process. + + Returns either a filesystem path to a CA bundle, or True to defer to + the requests default (certifi). + """ + for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"): + val = os.getenv(env_var) + if val and os.path.isfile(val): + return val + return True + # Provider names that can appear as a "provider:" prefix before a model ID. # Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b") # are preserved so the full model name reaches cache lookups and server queries. @@ -123,6 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = { "claude": 200000, # OpenAI — GPT-5 family (most have 400k; specific overrides first) # Source: https://developers.openai.com/api/docs/models + # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we + # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of + # Apr 2026) and is resolved via _resolve_codex_oauth_context_length(). + "gpt-5.5": 400000, "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) @@ -183,12 +207,12 @@ DEFAULT_CONTEXT_LENGTHS = { "moonshotai/Kimi-K2.6": 262144, "moonshotai/Kimi-K2-Thinking": 262144, "MiniMaxAI/MiniMax-M2.5": 204800, - "XiaomiMiMo/MiMo-V2-Flash": 256000, - "mimo-v2-pro": 1000000, - "mimo-v2-omni": 256000, - "mimo-v2-flash": 256000, - "mimo-v2.5-pro": 1000000, - "mimo-v2.5": 1000000, + "XiaomiMiMo/MiMo-V2-Flash": 262144, + "mimo-v2-pro": 1048576, + "mimo-v2.5-pro": 1048576, + "mimo-v2.5": 1048576, + "mimo-v2-omni": 262144, + "mimo-v2-flash": 262144, "zai-org/GLM-5": 202752, } @@ -491,7 +515,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any return _model_metadata_cache try: - response = requests.get(OPENROUTER_MODELS_URL, timeout=10) + response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify()) response.raise_for_status() data = response.json() @@ -558,6 +582,7 @@ def fetch_endpoint_model_metadata( server_url.rstrip("/") + "/api/v1/models", headers=headers, timeout=10, + verify=_resolve_requests_verify(), ) response.raise_for_status() payload = response.json() @@ -606,7 +631,7 @@ def fetch_endpoint_model_metadata( for candidate in candidates: url = candidate.rstrip("/") + "/models" try: - response = requests.get(url, headers=headers, timeout=10) + response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify()) response.raise_for_status() payload = response.json() cache: Dict[str, Dict[str, Any]] = {} @@ -637,9 +662,10 @@ def fetch_endpoint_model_metadata( try: # Try /v1/props first (current llama.cpp); fall back to /props for older builds base = candidate.rstrip("/").replace("/v1", "") - props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5) + _verify = _resolve_requests_verify() + props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify) if not props_resp.ok: - props_resp = requests.get(base + "/props", headers=headers, timeout=5) + props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify) if props_resp.ok: props = props_resp.json() gen_settings = props.get("default_generation_settings", {}) @@ -711,6 +737,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]: return cache.get(key) +def _invalidate_cached_context_length(model: str, base_url: str) -> None: + """Drop a stale cache entry so it gets re-resolved on the next lookup.""" + key = f"{model}@{base_url}" + cache = _load_context_cache() + if key not in cache: + return + del cache[key] + path = _get_context_cache_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump({"context_lengths": cache}, f, default_flow_style=False) + except Exception as e: + logger.debug("Failed to invalidate context length cache entry %s: %s", key, e) + + def get_next_probe_tier(current_length: int) -> Optional[int]: """Return the next lower probe tier, or None if already at minimum.""" for tier in CONTEXT_PROBE_TIERS: @@ -988,7 +1030,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) -> "x-api-key": api_key, "anthropic-version": "2023-06-01", } - resp = requests.get(url, headers=headers, timeout=10) + resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify()) if resp.status_code != 200: return None data = resp.json() @@ -1002,6 +1044,116 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) -> return None +# Known ChatGPT Codex OAuth context windows (observed via live +# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the +# `context_window` values, which are what Codex actually enforces — the +# direct OpenAI API has larger limits for the same slugs, but Codex OAuth +# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). +# +# Used as a fallback when the live probe fails (no token, network error). +# Longest keys first so substring match picks the most specific entry. +_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = { + "gpt-5.1-codex-max": 272_000, + "gpt-5.1-codex-mini": 272_000, + "gpt-5.3-codex": 272_000, + "gpt-5.2-codex": 272_000, + "gpt-5.4-mini": 272_000, + "gpt-5.5": 272_000, + "gpt-5.4": 272_000, + "gpt-5.2": 272_000, + "gpt-5": 272_000, +} + + +_codex_oauth_context_cache: Dict[str, int] = {} +_codex_oauth_context_cache_time: float = 0.0 +_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour + + +def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]: + """Probe the ChatGPT Codex /models endpoint for per-slug context windows. + + Codex OAuth imposes its own context limits that differ from the direct + OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The + `context_window` field in each model entry is the authoritative source. + + Returns a ``{slug: context_window}`` dict. Empty on failure. + """ + global _codex_oauth_context_cache, _codex_oauth_context_cache_time + now = time.time() + if ( + _codex_oauth_context_cache + and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL + ): + return _codex_oauth_context_cache + + try: + resp = requests.get( + "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0", + headers={"Authorization": f"Bearer {access_token}"}, + timeout=10, + verify=_resolve_requests_verify(), + ) + if resp.status_code != 200: + logger.debug( + "Codex /models probe returned HTTP %s; falling back to hardcoded defaults", + resp.status_code, + ) + return {} + data = resp.json() + except Exception as exc: + logger.debug("Codex /models probe failed: %s", exc) + return {} + + entries = data.get("models", []) if isinstance(data, dict) else [] + result: Dict[str, int] = {} + for item in entries: + if not isinstance(item, dict): + continue + slug = item.get("slug") + ctx = item.get("context_window") + if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0: + result[slug.strip()] = ctx + + if result: + _codex_oauth_context_cache = result + _codex_oauth_context_cache_time = now + return result + + +def _resolve_codex_oauth_context_length( + model: str, access_token: str = "" +) -> Optional[int]: + """Resolve a Codex OAuth model's real context window. + + Prefers a live probe of chatgpt.com/backend-api/codex/models (when we + have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``. + """ + model_bare = _strip_provider_prefix(model).strip() + if not model_bare: + return None + + if access_token: + live = _fetch_codex_oauth_context_lengths(access_token) + if model_bare in live: + return live[model_bare] + # Case-insensitive match in case casing drifts + model_lower = model_bare.lower() + for slug, ctx in live.items(): + if slug.lower() == model_lower: + return ctx + + # Fallback: longest-key-first substring match over hardcoded defaults. + model_lower = model_bare.lower() + for slug, ctx in sorted( + _CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True + ): + if slug in model_lower: + return ctx + + return None + + def _resolve_nous_context_length(model: str) -> Optional[int]: """Resolve Nous Portal model context length via OpenRouter metadata. @@ -1047,6 +1199,7 @@ def get_model_context_length( Resolution order: 0. Explicit config override (model.context_length or custom_providers per-model) 1. Persistent cache (previously discovered via probing) + 1b. AWS Bedrock static table (must precede custom-endpoint probe) 2. Active endpoint metadata (/models for explicit custom endpoints) 3. Local server query (for local endpoints) 4. Anthropic /v1/models API (API-key users only, not OAuth) @@ -1069,7 +1222,41 @@ def get_model_context_length( if base_url: cached = get_cached_context_length(model, base_url) if cached is not None: - return cached + # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds + # resolved gpt-5.x to the direct-API value (e.g. 1.05M) via + # models.dev and persisted it. Codex OAuth caps at 272K for every + # slug, so any cached Codex entry at or above 400K is a leftover + # from the old resolution path. Drop it and fall through to the + # live /models probe in step 5 below. + if provider == "openai-codex" and cached >= 400_000: + logger.info( + "Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); " + "re-resolving via live /models probe", + model, base_url, f"{cached:,}", + ) + _invalidate_cached_context_length(model, base_url) + else: + return cached + + # 1b. AWS Bedrock — use static context length table. + # Bedrock's ListFoundationModels API doesn't expose context window sizes, + # so we maintain a curated table in bedrock_adapter.py that reflects + # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native + # Anthropic API). This must run BEFORE the custom-endpoint probe at + # step 2 — bedrock-runtime..amazonaws.com is not in + # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint, + # fail the /models probe (Bedrock doesn't expose that shape), and fall + # back to the 128K default before reaching the original step 4b branch. + if provider == "bedrock" or ( + base_url + and base_url_hostname(base_url).startswith("bedrock-runtime.") + and base_url_host_matches(base_url, "amazonaws.com") + ): + try: + from agent.bedrock_adapter import get_bedrock_context_length + return get_bedrock_context_length(model) + except ImportError: + pass # boto3 not installed — fall through to generic resolution # 2. Active endpoint metadata for truly custom/unknown endpoints. # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their @@ -1116,19 +1303,7 @@ def get_model_context_length( if ctx: return ctx - # 4b. AWS Bedrock — use static context length table. - # Bedrock's ListFoundationModels doesn't expose context window sizes, - # so we maintain a curated table in bedrock_adapter.py. - if provider == "bedrock" or ( - base_url - and base_url_hostname(base_url).startswith("bedrock-runtime.") - and base_url_host_matches(base_url, "amazonaws.com") - ): - try: - from agent.bedrock_adapter import get_bedrock_context_length - return get_bedrock_context_length(model) - except ImportError: - pass # boto3 not installed — fall through to generic resolution + # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.) # 5. Provider-aware lookups (before generic OpenRouter cache) # These are provider-specific and take priority over the generic OR cache, @@ -1142,10 +1317,32 @@ def get_model_context_length( if inferred: effective_provider = inferred + # 5a. Copilot live /models API — max_prompt_tokens from the user's account. + # This catches account-specific models (e.g. claude-opus-4.6-1m) that + # don't exist in models.dev. For models that ARE in models.dev, this + # returns the provider-enforced limit which is what users can actually use. + if effective_provider in ("copilot", "copilot-acp", "github-copilot"): + try: + from hermes_cli.models import get_copilot_model_context + ctx = get_copilot_model_context(model, api_key=api_key) + if ctx: + return ctx + except Exception: + pass # Fall through to models.dev + if effective_provider == "nous": ctx = _resolve_nous_context_length(model) if ctx: return ctx + if effective_provider == "openai-codex": + # Codex OAuth enforces lower context limits than the direct OpenAI + # API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K + # on Codex). Authoritative source is Codex's own /models endpoint. + codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "") + if codex_ctx: + if base_url: + save_context_length(model, base_url, codex_ctx) + return codex_ctx if effective_provider: from agent.models_dev import lookup_models_dev_context ctx = lookup_models_dev_context(effective_provider, model) diff --git a/agent/moonshot_schema.py b/agent/moonshot_schema.py new file mode 100644 index 000000000..08585bab4 --- /dev/null +++ b/agent/moonshot_schema.py @@ -0,0 +1,190 @@ +"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset. + +Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI +tool calling. Requests that violate it fail with HTTP 400: + + tools.function.parameters is not a valid moonshot flavored json schema, + details: <...> + +Known rejection modes documented at +https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102 +and MoonshotAI/kimi-cli#1595: + +1. Every property schema must carry a ``type``. Standard JSON Schema allows + type to be omitted (the value is then unconstrained); Moonshot refuses. +2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not + the parent. Presence of both causes "type should be defined in anyOf + items instead of the parent schema". + +The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is +handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it +applies at MCP registration time for all providers. +""" + +from __future__ import annotations + +import copy +from typing import Any, Dict, List + +# Keys whose values are maps of name → schema (not schemas themselves). +# When we recurse, we walk the values of these maps as schemas, but we do +# NOT apply the missing-type repair to the map itself. +_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"}) + +# Keys whose values are lists of schemas. +_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"}) + +# Keys whose values are a single nested schema. +_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"}) + + +def _repair_schema(node: Any, is_schema: bool = True) -> Any: + """Recursively apply Moonshot repairs to a schema node. + + ``is_schema=True`` means this dict is a JSON Schema node and gets the + missing-type + anyOf-parent repairs applied. ``is_schema=False`` means + it's a container map (e.g. the value of ``properties``) and we only + recurse into its values. + """ + if isinstance(node, list): + # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so + # every element is itself a schema. + return [_repair_schema(item, is_schema=True) for item in node] + if not isinstance(node, dict): + return node + + # Walk the dict, deciding per-key whether recursion is into a schema + # node, a container map, or a scalar. + repaired: Dict[str, Any] = {} + for key, value in node.items(): + if key in _SCHEMA_MAP_KEYS and isinstance(value, dict): + # Map of name → schema. Don't treat the map itself as a schema + # (it has no type / properties of its own), but each value is. + repaired[key] = { + sub_key: _repair_schema(sub_val, is_schema=True) + for sub_key, sub_val in value.items() + } + elif key in _SCHEMA_LIST_KEYS and isinstance(value, list): + repaired[key] = [_repair_schema(v, is_schema=True) for v in value] + elif key in _SCHEMA_NODE_KEYS: + # items / not / additionalProperties: single nested schema. + # additionalProperties can also be a bool — leave those alone. + if isinstance(value, dict): + repaired[key] = _repair_schema(value, is_schema=True) + else: + repaired[key] = value + else: + # Scalars (description, title, format, enum values, etc.) pass through. + repaired[key] = value + + if not is_schema: + return repaired + + # Rule 2: when anyOf is present, type belongs only on the children. + if "anyOf" in repaired and isinstance(repaired["anyOf"], list): + repaired.pop("type", None) + return repaired + + # Rule 1: property schemas without type need one. $ref nodes are exempt + # — their type comes from the referenced definition. + if "$ref" in repaired: + return repaired + return _fill_missing_type(repaired) + + +def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]: + """Infer a reasonable ``type`` if this schema node has none.""" + if "type" in node and node["type"] not in (None, ""): + return node + + # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum`` + # → type of first enum value, else fall back to ``string`` (safest scalar). + if "properties" in node or "required" in node or "additionalProperties" in node: + inferred = "object" + elif "items" in node or "prefixItems" in node: + inferred = "array" + elif "enum" in node and isinstance(node["enum"], list) and node["enum"]: + sample = node["enum"][0] + if isinstance(sample, bool): + inferred = "boolean" + elif isinstance(sample, int): + inferred = "integer" + elif isinstance(sample, float): + inferred = "number" + else: + inferred = "string" + else: + inferred = "string" + + return {**node, "type": inferred} + + +def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]: + """Normalize tool parameters to a Moonshot-compatible object schema. + + Returns a deep-copied schema with the two flavored-JSON-Schema repairs + applied. Input is not mutated. + """ + if not isinstance(parameters, dict): + return {"type": "object", "properties": {}} + + repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True) + if not isinstance(repaired, dict): + return {"type": "object", "properties": {}} + + # Top-level must be an object schema + if repaired.get("type") != "object": + repaired["type"] = "object" + if "properties" not in repaired: + repaired["properties"] = {} + + return repaired + + +def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters.""" + if not tools: + return tools + + sanitized: List[Dict[str, Any]] = [] + any_change = False + for tool in tools: + if not isinstance(tool, dict): + sanitized.append(tool) + continue + fn = tool.get("function") + if not isinstance(fn, dict): + sanitized.append(tool) + continue + params = fn.get("parameters") + repaired = sanitize_moonshot_tool_parameters(params) + if repaired is not params: + any_change = True + new_fn = {**fn, "parameters": repaired} + sanitized.append({**tool, "function": new_fn}) + else: + sanitized.append(tool) + + return sanitized if any_change else tools + + +def is_moonshot_model(model: str | None) -> bool: + """True for any Kimi / Moonshot model slug, regardless of aggregator prefix. + + Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator- + prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``). + Detection by model name covers Nous / OpenRouter / other aggregators that + route to Moonshot's inference, where the base URL is the aggregator's, not + ``api.moonshot.ai``. + """ + if not model: + return False + bare = model.strip().lower() + # Last path segment (covers aggregator-prefixed slugs) + tail = bare.rsplit("/", 1)[-1] + if tail.startswith("kimi-") or tail == "kimi": + return True + # Vendor-prefixed forms commonly used on aggregators + if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"): + return True + return False diff --git a/agent/skill_commands.py b/agent/skill_commands.py index a4345ca8c..6b73e83b3 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -1,154 +1,29 @@ -"""Shared slash command helpers for skills and built-in prompt-style modes. +"""Shared slash command helpers for skills. Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces -can invoke skills via /skill-name commands and prompt-only built-ins like -/plan. +can invoke skills via /skill-name commands. """ import json import logging import re -import subprocess -from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional from hermes_constants import display_hermes_home +from agent.skill_preprocessing import ( + expand_inline_shell as _expand_inline_shell, + load_skills_config as _load_skills_config, + substitute_template_vars as _substitute_template_vars, +) logger = logging.getLogger(__name__) _skill_commands: Dict[str, Dict[str, Any]] = {} -_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+") # Patterns for sanitizing skill names into clean hyphen-separated slugs. _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]") _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}") -# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. -# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are -# left as-is so the user can debug them. -_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") - -# Matches inline shell snippets like: !`date +%Y-%m-%d` -# Non-greedy, single-line only — no newlines inside the backticks. -_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") - -# Cap inline-shell output so a runaway command can't blow out the context. -_INLINE_SHELL_MAX_OUTPUT = 4000 - - -def _load_skills_config() -> dict: - """Load the ``skills`` section of config.yaml (best-effort).""" - try: - from hermes_cli.config import load_config - - cfg = load_config() or {} - skills_cfg = cfg.get("skills") - if isinstance(skills_cfg, dict): - return skills_cfg - except Exception: - logger.debug("Could not read skills config", exc_info=True) - return {} - - -def _substitute_template_vars( - content: str, - skill_dir: Path | None, - session_id: str | None, -) -> str: - """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. - - Only substitutes tokens for which a concrete value is available — - unresolved tokens are left in place so the author can spot them. - """ - if not content: - return content - - skill_dir_str = str(skill_dir) if skill_dir else None - - def _replace(match: re.Match) -> str: - token = match.group(1) - if token == "HERMES_SKILL_DIR" and skill_dir_str: - return skill_dir_str - if token == "HERMES_SESSION_ID" and session_id: - return str(session_id) - return match.group(0) - - return _SKILL_TEMPLATE_RE.sub(_replace, content) - - -def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: - """Execute a single inline-shell snippet and return its stdout (trimmed). - - Failures return a short ``[inline-shell error: ...]`` marker instead of - raising, so one bad snippet can't wreck the whole skill message. - """ - try: - completed = subprocess.run( - ["bash", "-c", command], - cwd=str(cwd) if cwd else None, - capture_output=True, - text=True, - timeout=max(1, int(timeout)), - check=False, - ) - except subprocess.TimeoutExpired: - return f"[inline-shell timeout after {timeout}s: {command}]" - except FileNotFoundError: - return f"[inline-shell error: bash not found]" - except Exception as exc: - return f"[inline-shell error: {exc}]" - - output = (completed.stdout or "").rstrip("\n") - if not output and completed.stderr: - output = completed.stderr.rstrip("\n") - if len(output) > _INLINE_SHELL_MAX_OUTPUT: - output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]" - return output - - -def _expand_inline_shell( - content: str, - skill_dir: Path | None, - timeout: int, -) -> str: - """Replace every !`cmd` snippet in ``content`` with its stdout. - - Runs each snippet with the skill directory as CWD so relative paths in - the snippet work the way the author expects. - """ - if "!`" not in content: - return content - - def _replace(match: re.Match) -> str: - cmd = match.group(1).strip() - if not cmd: - return "" - return _run_inline_shell(cmd, skill_dir, timeout) - - return _INLINE_SHELL_RE.sub(_replace, content) - - -def build_plan_path( - user_instruction: str = "", - *, - now: datetime | None = None, -) -> Path: - """Return the default workspace-relative markdown path for a /plan invocation. - - Relative paths are intentional: file tools are task/backend-aware and resolve - them against the active working directory for local, docker, ssh, modal, - daytona, and similar terminal backends. That keeps the plan with the active - workspace instead of the Hermes host's global home directory. - """ - slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else "" - slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-") - if slug: - slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-") - slug = slug or "conversation-plan" - timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S") - return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md" - - def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None: """Load a skill by name/path and return (loaded_payload, skill_dir, display_name).""" raw_identifier = (skill_identifier or "").strip() @@ -167,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu else: normalized = raw_identifier.lstrip("/") - loaded_skill = json.loads(skill_view(normalized, task_id=task_id)) + loaded_skill = json.loads( + skill_view(normalized, task_id=task_id, preprocess=False) + ) except Exception: return None @@ -345,7 +222,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]: _skill_commands = {} try: from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names - from agent.skill_utils import get_external_skills_dirs + from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files disabled = _get_disabled_skill_names() seen_names: set = set() @@ -356,7 +233,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]: dirs_to_scan.extend(get_external_skills_dirs()) for scan_dir in dirs_to_scan: - for skill_md in scan_dir.rglob("SKILL.md"): + for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"): if any(part in ('.git', '.github', '.hub') for part in skill_md.parts): continue try: diff --git a/agent/skill_preprocessing.py b/agent/skill_preprocessing.py new file mode 100644 index 000000000..b95d1ddda --- /dev/null +++ b/agent/skill_preprocessing.py @@ -0,0 +1,131 @@ +"""Shared SKILL.md preprocessing helpers.""" + +import logging +import re +import subprocess +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. +# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are +# left as-is so the user can debug them. +_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") + +# Matches inline shell snippets like: !`date +%Y-%m-%d` +# Non-greedy, single-line only -- no newlines inside the backticks. +_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") + +# Cap inline-shell output so a runaway command can't blow out the context. +_INLINE_SHELL_MAX_OUTPUT = 4000 + + +def load_skills_config() -> dict: + """Load the ``skills`` section of config.yaml (best-effort).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + skills_cfg = cfg.get("skills") + if isinstance(skills_cfg, dict): + return skills_cfg + except Exception: + logger.debug("Could not read skills config", exc_info=True) + return {} + + +def substitute_template_vars( + content: str, + skill_dir: Path | None, + session_id: str | None, +) -> str: + """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. + + Only substitutes tokens for which a concrete value is available -- + unresolved tokens are left in place so the author can spot them. + """ + if not content: + return content + + skill_dir_str = str(skill_dir) if skill_dir else None + + def _replace(match: re.Match) -> str: + token = match.group(1) + if token == "HERMES_SKILL_DIR" and skill_dir_str: + return skill_dir_str + if token == "HERMES_SESSION_ID" and session_id: + return str(session_id) + return match.group(0) + + return _SKILL_TEMPLATE_RE.sub(_replace, content) + + +def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: + """Execute a single inline-shell snippet and return its stdout (trimmed). + + Failures return a short ``[inline-shell error: ...]`` marker instead of + raising, so one bad snippet can't wreck the whole skill message. + """ + try: + completed = subprocess.run( + ["bash", "-c", command], + cwd=str(cwd) if cwd else None, + capture_output=True, + text=True, + timeout=max(1, int(timeout)), + check=False, + ) + except subprocess.TimeoutExpired: + return f"[inline-shell timeout after {timeout}s: {command}]" + except FileNotFoundError: + return "[inline-shell error: bash not found]" + except Exception as exc: + return f"[inline-shell error: {exc}]" + + output = (completed.stdout or "").rstrip("\n") + if not output and completed.stderr: + output = completed.stderr.rstrip("\n") + if len(output) > _INLINE_SHELL_MAX_OUTPUT: + output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]" + return output + + +def expand_inline_shell( + content: str, + skill_dir: Path | None, + timeout: int, +) -> str: + """Replace every !`cmd` snippet in ``content`` with its stdout. + + Runs each snippet with the skill directory as CWD so relative paths in + the snippet work the way the author expects. + """ + if "!`" not in content: + return content + + def _replace(match: re.Match) -> str: + cmd = match.group(1).strip() + if not cmd: + return "" + return run_inline_shell(cmd, skill_dir, timeout) + + return _INLINE_SHELL_RE.sub(_replace, content) + + +def preprocess_skill_content( + content: str, + skill_dir: Path | None, + session_id: str | None = None, + skills_cfg: dict | None = None, +) -> str: + """Apply configured SKILL.md template and inline-shell preprocessing.""" + if not content: + return content + + cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config() + if cfg.get("template_vars", True): + content = substitute_template_vars(content, skill_dir, session_id) + if cfg.get("inline_shell", False): + timeout = int(cfg.get("inline_shell_timeout", 10) or 10) + content = expand_inline_shell(content, skill_dir, timeout) + return content diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 900f59dcf..1cccf7e92 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly. import copy from typing import Any, Dict, List, Optional +from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools from agent.prompt_builder import DEVELOPER_ROLE_MODELS from agent.transports.base import ProviderTransport from agent.transports.types import NormalizedResponse, ToolCall, Usage @@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport): # Tools if tools: + # Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting + # tool parameters here keeps aggregator routes (Nous, OpenRouter, + # etc.) compatible, in addition to direct moonshot.ai endpoints. + if is_moonshot_model(model): + tools = sanitize_moonshot_tools(tools) api_kwargs["tools"] = tools # max_tokens resolution — priority: ephemeral > user > provider default diff --git a/agent/transports/types.py b/agent/transports/types.py index 5199a5db1..74481f85c 100644 --- a/agent/transports/types.py +++ b/agent/transports/types.py @@ -61,6 +61,20 @@ class ToolCall: """Codex response_item_id from provider_data.""" return (self.provider_data or {}).get("response_item_id") + @property + def extra_content(self) -> Optional[Dict[str, Any]]: + """Gemini extra_content (thought_signature) from provider_data. + + Gemini 3 thinking models attach ``extra_content`` with a + ``thought_signature`` to each tool call. This signature must be + replayed on subsequent API calls — without it the API rejects the + request with HTTP 400. The chat_completions transport stores this + in ``provider_data["extra_content"]``; this property exposes it so + ``_build_assistant_message`` can ``getattr(tc, "extra_content")`` + uniformly. + """ + return (self.provider_data or {}).get("extra_content") + @dataclass class Usage: diff --git a/batch_runner.py b/batch_runner.py index 7413ad59f..f3aaefa3d 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -951,13 +951,9 @@ class BatchRunner: root_logger.setLevel(original_level) # Aggregate all batch statistics and update checkpoint - all_completed_prompts = list(completed_prompts_set) total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0} - + for batch_result in results: - # Add newly completed prompts - all_completed_prompts.extend(batch_result.get("completed_prompts", [])) - # Aggregate tool stats for tool_name, stats in batch_result.get("tool_stats", {}).items(): if tool_name not in total_tool_stats: @@ -977,7 +973,7 @@ class BatchRunner: # Save final checkpoint (best-effort; incremental writes already happened) try: - checkpoint_data["completed_prompts"] = all_completed_prompts + checkpoint_data["completed_prompts"] = sorted(completed_prompts_set) self._save_checkpoint(checkpoint_data, lock=checkpoint_lock) except Exception as ckpt_err: print(f"⚠️ Warning: Failed to save final checkpoint: {ckpt_err}") diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 64e73b1ec..7808632cd 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -326,6 +326,16 @@ compression: # To pin a specific model/provider for compression summaries, use the # auxiliary section below (auxiliary.compression.provider / model). +# ============================================================================= +# Anthropic prompt caching TTL +# ============================================================================= +# When prompt caching is active (Claude via OpenRouter or native Anthropic), +# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and +# "1h". Other values are ignored and "5m" is used. +# +prompt_caching: + cache_ttl: "5m" # use "1h" for long sessions with pauses between turns + # ============================================================================= # Auxiliary Models (Advanced — Experimental) # ============================================================================= @@ -507,6 +517,13 @@ agent: # finish, then interrupts anything still running after this timeout. # 0 = no drain, interrupt immediately. # restart_drain_timeout: 60 + + # Max app-level retry attempts for API errors (connection drops, provider + # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this + # to 1 if you use fallback providers and want fast failover on flaky + # primaries (default 3). The OpenAI SDK does its own low-level retries + # underneath this wrapper — this is the Hermes-level loop. + # api_max_retries: 3 # Enable verbose logging verbose: false diff --git a/cli.py b/cli.py index a289e3ab2..00937e9f9 100644 --- a/cli.py +++ b/cli.py @@ -1688,7 +1688,6 @@ def _looks_like_slash_command(text: str) -> bool: from agent.skill_commands import ( scan_skill_commands, build_skill_invocation_message, - build_plan_path, build_preloaded_skills_prompt, ) @@ -3084,6 +3083,8 @@ class HermesCLI: format_runtime_provider_error, ) + _primary_exc = None + runtime = None try: runtime = resolve_runtime_provider( requested=self.requested_provider, @@ -3091,7 +3092,34 @@ class HermesCLI: explicit_base_url=self._explicit_base_url, ) except Exception as exc: - message = format_runtime_provider_error(exc) + _primary_exc = exc + + # Primary provider auth failed — try fallback providers before giving up. + if runtime is None and _primary_exc is not None: + from hermes_cli.auth import AuthError + if isinstance(_primary_exc, AuthError): + _fb_chain = self._fallback_model if isinstance(self._fallback_model, list) else [] + for _fb in _fb_chain: + _fb_provider = (_fb.get("provider") or "").strip().lower() + _fb_model = (_fb.get("model") or "").strip() + if not _fb_provider or not _fb_model: + continue + try: + runtime = resolve_runtime_provider(requested=_fb_provider) + logger.warning( + "Primary provider auth failed (%s). Falling through to fallback: %s/%s", + _primary_exc, _fb_provider, _fb_model, + ) + _cprint(f"⚠️ Primary auth failed — switching to fallback: {_fb_provider} / {_fb_model}") + self.requested_provider = _fb_provider + self.model = _fb_model + _primary_exc = None + break + except Exception: + continue + + if runtime is None: + message = format_runtime_provider_error(_primary_exc) if _primary_exc else "Provider resolution failed." ChatConsole().print(f"[bold red]{message}[/]") return False @@ -3254,6 +3282,23 @@ class HermesCLI: _cprint(f"\033[1;31mSession not found: {self.session_id}{_RST}") _cprint(f"{_DIM}Use a session ID from a previous CLI run (hermes sessions list).{_RST}") return False + # If the requested session is the (empty) head of a compression + # chain, walk to the descendant that actually holds the messages. + # See #15000 and SessionDB.resolve_resume_session_id. + try: + resolved_id = self._session_db.resolve_resume_session_id(self.session_id) + except Exception: + resolved_id = self.session_id + if resolved_id and resolved_id != self.session_id: + ChatConsole().print( + f"[{_DIM}]Session {_escape(self.session_id)} was compressed into " + f"{_escape(resolved_id)}; resuming the descendant with your " + f"transcript.[/]" + ) + self.session_id = resolved_id + resolved_meta = self._session_db.get_session(self.session_id) + if resolved_meta: + session_meta = resolved_meta restored = self._session_db.get_messages_as_conversation(self.session_id) if restored: restored = [m for m in restored if m.get("role") != "session_meta"] @@ -3472,6 +3517,22 @@ class HermesCLI: ) return False + # If the requested session is the (empty) head of a compression chain, + # walk to the descendant that actually holds the messages. See #15000. + try: + resolved_id = self._session_db.resolve_resume_session_id(self.session_id) + except Exception: + resolved_id = self.session_id + if resolved_id and resolved_id != self.session_id: + self._console_print( + f"[dim]Session {self.session_id} was compressed into " + f"{resolved_id}; resuming the descendant with your transcript.[/]" + ) + self.session_id = resolved_id + resolved_meta = self._session_db.get_session(self.session_id) + if resolved_meta: + session_meta = resolved_meta + restored = self._session_db.get_messages_as_conversation(self.session_id) if restored: restored = [m for m in restored if m.get("role") != "session_meta"] @@ -4686,6 +4747,22 @@ class HermesCLI: _cprint(" Use /history or `hermes sessions list` to see available sessions.") return + # If the target is the empty head of a compression chain, redirect to + # the descendant that actually holds the transcript. See #15000. + try: + resolved_id = self._session_db.resolve_resume_session_id(target_id) + except Exception: + resolved_id = target_id + if resolved_id and resolved_id != target_id: + _cprint( + f" Session {target_id} was compressed into {resolved_id}; " + f"resuming the descendant with your transcript." + ) + target_id = resolved_id + resolved_meta = self._session_db.get_session(target_id) + if resolved_meta: + session_meta = resolved_meta + if target_id == self.session_id: _cprint(" Already on that session.") return @@ -5378,79 +5455,6 @@ class HermesCLI: except Exception: return False - def _show_model_and_providers(self): - """Show current model + provider and list all authenticated providers. - - Shows current model + provider, then lists all authenticated - providers with their available models. - """ - from hermes_cli.models import ( - curated_models_for_provider, list_available_providers, - normalize_provider, _PROVIDER_LABELS, - get_pricing_for_provider, format_model_pricing_table, - ) - from hermes_cli.auth import resolve_provider as _resolve_provider - - # Resolve current provider - raw_provider = normalize_provider(self.provider) - if raw_provider == "auto": - try: - current = _resolve_provider( - self.requested_provider, - explicit_api_key=self._explicit_api_key, - explicit_base_url=self._explicit_base_url, - ) - except Exception: - current = "openrouter" - else: - current = raw_provider - current_label = _PROVIDER_LABELS.get(current, current) - - print(f"\n Current: {self.model} via {current_label}") - print() - - # Show all authenticated providers with their models - providers = list_available_providers() - authed = [p for p in providers if p["authenticated"]] - unauthed = [p for p in providers if not p["authenticated"]] - - if authed: - print(" Authenticated providers & models:") - for p in authed: - is_active = p["id"] == current - marker = " ← active" if is_active else "" - print(f" [{p['id']}]{marker}") - curated = curated_models_for_provider(p["id"]) - # Fetch pricing for providers that support it (openrouter, nous) - pricing_map = get_pricing_for_provider(p["id"]) if p["id"] in ("openrouter", "nous") else {} - if curated and pricing_map: - cur_model = self.model if is_active else "" - for line in format_model_pricing_table(curated, pricing_map, current_model=cur_model): - print(line) - elif curated: - for mid, desc in curated: - current_marker = " ← current" if (is_active and mid == self.model) else "" - print(f" {mid}{current_marker}") - elif p["id"] == "custom": - from hermes_cli.models import _get_custom_base_url - custom_url = _get_custom_base_url() - if custom_url: - print(f" endpoint: {custom_url}") - if is_active: - print(f" model: {self.model} ← current") - print(" (use hermes model to change)") - else: - print(" (use hermes model to change)") - print() - - if unauthed: - names = ", ".join(p["label"] for p in unauthed) - print(f" Not configured: {names}") - print(" Run: hermes setup") - print() - - print(" To change model or provider, use: hermes model") - def _output_console(self): """Use prompt_toolkit-safe Rich rendering once the TUI is live.""" if getattr(self, "_app", None): @@ -6026,16 +6030,12 @@ class HermesCLI: self._handle_resume_command(cmd_original) elif canonical == "model": self._handle_model_switch(cmd_original) - elif canonical == "provider": - self._show_model_and_providers() elif canonical == "gquota": self._handle_gquota_command(cmd_original) elif canonical == "personality": # Use original case (handler lowercases the personality name itself) self._handle_personality_command(cmd_original) - elif canonical == "plan": - self._handle_plan_command(cmd_original) elif canonical == "retry": retry_msg = self.retry_last() if retry_msg and hasattr(self, '_pending_input'): @@ -6165,6 +6165,8 @@ class HermesCLI: self._handle_skin_command(cmd_original) elif canonical == "voice": self._handle_voice_command(cmd_original) + elif canonical == "busy": + self._handle_busy_command(cmd_original) else: # Check for user-defined quick commands (bypass agent loop, no LLM call) base_cmd = cmd_lower.split()[0] @@ -6270,32 +6272,6 @@ class HermesCLI: return True - def _handle_plan_command(self, cmd: str): - """Handle /plan [request] — load the bundled plan skill.""" - parts = cmd.strip().split(maxsplit=1) - user_instruction = parts[1].strip() if len(parts) > 1 else "" - - plan_path = build_plan_path(user_instruction) - msg = build_skill_invocation_message( - "/plan", - user_instruction, - task_id=self.session_id, - runtime_note=( - "Save the markdown plan with write_file to this exact relative path " - f"inside the active workspace/backend cwd: {plan_path}" - ), - ) - - if not msg: - ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]") - return - - _cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}") - if hasattr(self, '_pending_input'): - self._pending_input.put(msg) - else: - ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]") - def _handle_background_command(self, cmd: str): """Handle /background — run a prompt in a separate background session. @@ -6685,6 +6661,13 @@ class HermesCLI: print(f" ⚠ Port {_port} is not reachable at {cdp_url}") os.environ["BROWSER_CDP_URL"] = cdp_url + # Eagerly start the CDP supervisor so pending_dialogs + frame_tree + # show up in the next browser_snapshot. No-op if already started. + try: + from tools.browser_tool import _ensure_cdp_supervisor # type: ignore[import-not-found] + _ensure_cdp_supervisor("default") + except Exception: + pass print() print("🌐 Browser connected to live Chrome via CDP") print(f" Endpoint: {cdp_url}") @@ -6706,7 +6689,8 @@ class HermesCLI: if current: os.environ.pop("BROWSER_CDP_URL", None) try: - from tools.browser_tool import cleanup_all_browsers + from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor + _stop_cdp_supervisor("default") cleanup_all_browsers() except Exception: pass @@ -6919,6 +6903,36 @@ class HermesCLI: else: _cprint(f" {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}") + def _handle_busy_command(self, cmd: str): + """Handle /busy — control what Enter does while Hermes is working. + + Usage: + /busy Show current busy input mode + /busy status Show current busy input mode + /busy queue Queue input for the next turn instead of interrupting + /busy interrupt Interrupt the current run on Enter (default) + """ + parts = cmd.strip().split(maxsplit=1) + if len(parts) < 2 or parts[1].strip().lower() == "status": + _cprint(f" {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}") + _cprint(f" {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}") + _cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}") + return + + arg = parts[1].strip().lower() + if arg not in {"queue", "interrupt"}: + _cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}") + _cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}") + return + + self.busy_input_mode = arg + if save_config_value("display.busy_input_mode", arg): + behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy." + _cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}") + _cprint(f" {_DIM}{behavior}{_RST}") + else: + _cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}") + def _handle_fast_command(self, cmd: str): """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode).""" if not self._fast_command_available(): @@ -6997,51 +7011,52 @@ class HermesCLI: focus_topic = parts[1].strip() original_count = len(self.conversation_history) - try: - from agent.model_metadata import estimate_messages_tokens_rough - from agent.manual_compression_feedback import summarize_manual_compression - original_history = list(self.conversation_history) - approx_tokens = estimate_messages_tokens_rough(original_history) - if focus_topic: - print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), " - f"focus: \"{focus_topic}\"...") - else: - print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") + with self._busy_command("Compressing context..."): + try: + from agent.model_metadata import estimate_messages_tokens_rough + from agent.manual_compression_feedback import summarize_manual_compression + original_history = list(self.conversation_history) + approx_tokens = estimate_messages_tokens_rough(original_history) + if focus_topic: + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), " + f"focus: \"{focus_topic}\"...") + else: + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") - compressed, _ = self.agent._compress_context( - original_history, - self.agent._cached_system_prompt or "", - approx_tokens=approx_tokens, - focus_topic=focus_topic or None, - ) - self.conversation_history = compressed - # _compress_context ends the old session and creates a new child - # session on the agent (run_agent.py::_compress_context). Sync the - # CLI's session_id so /status, /resume, exit summary, and title - # generation all point at the live continuation session, not the - # ended parent. Without this, subsequent end_session() calls target - # the already-closed parent and the child is orphaned. - if ( - getattr(self.agent, "session_id", None) - and self.agent.session_id != self.session_id - ): - self.session_id = self.agent.session_id - self._pending_title = None - new_tokens = estimate_messages_tokens_rough(self.conversation_history) - summary = summarize_manual_compression( - original_history, - self.conversation_history, - approx_tokens, - new_tokens, - ) - icon = "🗜️" if summary["noop"] else "✅" - print(f" {icon} {summary['headline']}") - print(f" {summary['token_line']}") - if summary["note"]: - print(f" {summary['note']}") + compressed, _ = self.agent._compress_context( + original_history, + self.agent._cached_system_prompt or "", + approx_tokens=approx_tokens, + focus_topic=focus_topic or None, + ) + self.conversation_history = compressed + # _compress_context ends the old session and creates a new child + # session on the agent (run_agent.py::_compress_context). Sync the + # CLI's session_id so /status, /resume, exit summary, and title + # generation all point at the live continuation session, not the + # ended parent. Without this, subsequent end_session() calls target + # the already-closed parent and the child is orphaned. + if ( + getattr(self.agent, "session_id", None) + and self.agent.session_id != self.session_id + ): + self.session_id = self.agent.session_id + self._pending_title = None + new_tokens = estimate_messages_tokens_rough(self.conversation_history) + summary = summarize_manual_compression( + original_history, + self.conversation_history, + approx_tokens, + new_tokens, + ) + icon = "🗜️" if summary["noop"] else "✅" + print(f" {icon} {summary['headline']}") + print(f" {summary['token_line']}") + if summary["note"]: + print(f" {summary['note']}") - except Exception as e: - print(f" ❌ Compression failed: {e}") + except Exception as e: + print(f" ❌ Compression failed: {e}") def _handle_debug_command(self): """Handle /debug — upload debug report + logs and print paste URLs.""" @@ -9543,9 +9558,20 @@ class HermesCLI: @kb.add('c-d') def handle_ctrl_d(event): - """Handle Ctrl+D - exit.""" - self._should_exit = True - event.app.exit() + """Ctrl+D: delete char under cursor (standard readline behaviour). + Only exit when the input is empty — same as bash/zsh. Pending + attached images count as input and block the EOF-exit so the + user doesn't lose them silently. + """ + buf = event.app.current_buffer + if buf.text: + buf.delete() + elif self._attached_images: + # Empty text but pending attachments — no-op, don't exit. + return + else: + self._should_exit = True + event.app.exit() _modal_prompt_active = Condition( lambda: bool(self._secret_state or self._sudo_state) diff --git a/cron/jobs.py b/cron/jobs.py index 8fb3f868a..158f53654 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -371,6 +371,39 @@ def save_jobs(jobs: List[Dict[str, Any]]): raise +def _normalize_workdir(workdir: Optional[str]) -> Optional[str]: + """Normalize and validate a cron job workdir. + + Rules: + - Empty / None → None (feature off, preserves old behaviour). + - ``~`` is expanded. Relative paths are rejected — cron jobs run detached + from any shell cwd, so relative paths have no stable meaning. + - The path must exist and be a directory at create/update time. We do + NOT re-check at run time (a user might briefly unmount the dir; the + scheduler will just fall back to old behaviour with a logged warning). + + Returns the absolute path string, or None when disabled. + Raises ValueError on invalid input. + """ + if workdir is None: + return None + raw = str(workdir).strip() + if not raw: + return None + expanded = Path(raw).expanduser() + if not expanded.is_absolute(): + raise ValueError( + f"Cron workdir must be an absolute path (got {raw!r}). " + f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous." + ) + resolved = expanded.resolve() + if not resolved.exists(): + raise ValueError(f"Cron workdir does not exist: {resolved}") + if not resolved.is_dir(): + raise ValueError(f"Cron workdir is not a directory: {resolved}") + return str(resolved) + + def create_job( prompt: str, schedule: str, @@ -384,6 +417,8 @@ def create_job( provider: Optional[str] = None, base_url: Optional[str] = None, script: Optional[str] = None, + enabled_toolsets: Optional[List[str]] = None, + workdir: Optional[str] = None, ) -> Dict[str, Any]: """ Create a new cron job. @@ -403,6 +438,15 @@ def create_job( script: Optional path to a Python script whose stdout is injected into the prompt each run. The script runs before the agent turn, and its output is prepended as context. Useful for data collection / change detection. + enabled_toolsets: Optional list of toolset names to restrict the agent to. + When set, only tools from these toolsets are loaded, reducing + token overhead. When omitted, all default tools are loaded. + workdir: Optional absolute path. When set, the job runs as if launched + from that directory: AGENTS.md / CLAUDE.md / .cursorrules from + that directory are injected into the system prompt, and the + terminal/file/code_exec tools use it as their working directory + (via TERMINAL_CWD). When unset, the old behaviour is preserved + (no context files injected, tools use the scheduler's cwd). Returns: The created job dict @@ -433,6 +477,9 @@ def create_job( normalized_base_url = normalized_base_url or None normalized_script = str(script).strip() if isinstance(script, str) else None normalized_script = normalized_script or None + normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None + normalized_toolsets = normalized_toolsets or None + normalized_workdir = _normalize_workdir(workdir) label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job" job = { @@ -464,6 +511,8 @@ def create_job( # Delivery configuration "deliver": deliver, "origin": origin, # Tracks where job was created for "origin" delivery + "enabled_toolsets": normalized_toolsets, + "workdir": normalized_workdir, } jobs = load_jobs() @@ -497,6 +546,15 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]] if job["id"] != job_id: continue + # Validate / normalize workdir if present in updates. Empty string or + # None both mean "clear the field" (restore old behaviour). + if "workdir" in updates: + _wd = updates["workdir"] + if _wd in (None, "", False): + updates["workdir"] = None + else: + updates["workdir"] = _normalize_workdir(_wd) + updated = _apply_skill_fields({**job, **updates}) schedule_changed = "schedule" in updates diff --git a/cron/scheduler.py b/cron/scheduler.py index e7a22dfbe..3dbb54c7d 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now logger = logging.getLogger(__name__) + +def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None: + """Resolve the toolset list for a cron job. + + Precedence: + 1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update). + Keeps the agent's job-scoped toolset override intact — #6130. + 2. Per-platform ``hermes tools`` config for the ``cron`` platform. + Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``) + so users can gate cron toolsets globally without recreating every job. + 3. ``None`` on any lookup failure — AIAgent loads the full default set + (legacy behavior before this change, preserved as the safety net). + + _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by + ``_get_platform_tools`` for unconfigured platforms, so fresh installs + get cron WITHOUT ``moa`` by default (issue reported by Norbert — + surprise $4.63 run). + """ + per_job = job.get("enabled_toolsets") + if per_job: + return per_job + try: + from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load + return sorted(_get_platform_tools(cfg or {}, "cron")) + except Exception as exc: + logger.warning( + "Cron toolset resolution failed, falling back to full default toolset: %s", + exc, + ) + return None + # Valid delivery platforms — used to validate user-supplied platform names # in cron delivery targets, preventing env var enumeration via crafted names. _KNOWN_DELIVERY_PLATFORMS = frozenset({ @@ -764,6 +795,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: chat_name=origin.get("chat_name", "") if origin else "", ) + # Per-job working directory. When set (and validated at create/update + # time), we point TERMINAL_CWD at it so: + # - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md / + # .cursorrules from the job's project dir, AND + # - the terminal, file, and code-exec tools run commands from there. + # + # tick() serializes workdir-jobs outside the parallel pool, so mutating + # os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less + # jobs we leave TERMINAL_CWD untouched — preserves the original behaviour + # (skip_context_files=True, tools use whatever cwd the scheduler has). + _job_workdir = (job.get("workdir") or "").strip() or None + if _job_workdir and not Path(_job_workdir).is_dir(): + # Directory was removed between create-time validation and now. Log + # and drop back to old behaviour rather than crashing the job. + logger.warning( + "Job '%s': configured workdir %r no longer exists — running without it", + job_id, _job_workdir, + ) + _job_workdir = None + _prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_") + if _job_workdir: + os.environ["TERMINAL_CWD"] = _job_workdir + logger.info("Job '%s': using workdir %s", job_id, _job_workdir) + try: # Re-read .env and config.yaml fresh every run so provider/key # changes take effect without a gateway restart. @@ -840,6 +895,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: resolve_runtime_provider, format_runtime_provider_error, ) + from hermes_cli.auth import AuthError try: runtime_kwargs = { "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"), @@ -847,6 +903,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: if job.get("base_url"): runtime_kwargs["explicit_base_url"] = job.get("base_url") runtime = resolve_runtime_provider(**runtime_kwargs) + except AuthError as auth_exc: + # Primary provider auth failed — try fallback chain before giving up. + logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc) + fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model") + fb_list = (fb if isinstance(fb, list) else [fb]) if fb else [] + runtime = None + for entry in fb_list: + if not isinstance(entry, dict): + continue + try: + fb_kwargs = {"requested": entry.get("provider")} + if entry.get("base_url"): + fb_kwargs["explicit_base_url"] = entry["base_url"] + if entry.get("api_key"): + fb_kwargs["explicit_api_key"] = entry["api_key"] + runtime = resolve_runtime_provider(**fb_kwargs) + logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider")) + break + except Exception as fb_exc: + logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc) + if runtime is None: + raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc except Exception as exc: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc @@ -886,9 +964,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), provider_sort=pr.get("sort"), + enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg), disabled_toolsets=["cronjob", "messaging", "clarify"], quiet_mode=True, - skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd + # When a workdir is configured, inject AGENTS.md / CLAUDE.md / + # .cursorrules from that directory; otherwise preserve the old + # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd). + skip_context_files=not bool(_job_workdir), skip_memory=True, # Cron system prompts would corrupt user representations platform="cron", session_id=_cron_session_id, @@ -1027,6 +1109,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: return False, output, "", error_msg finally: + # Restore TERMINAL_CWD to whatever it was before this job ran. We + # only ever mutate it when the job has a workdir; see the setup block + # at the top of run_job for the serialization guarantee. + if _job_workdir: + if _prior_terminal_cwd == "_UNSET_": + os.environ.pop("TERMINAL_CWD", None) + else: + os.environ["TERMINAL_CWD"] = _prior_terminal_cwd # Clean up ContextVar session/delivery state for this job. clear_session_vars(_ctx_tokens) if _session_db: @@ -1154,14 +1244,28 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: mark_job_run(job["id"], False, str(e)) return False - # Run all due jobs concurrently, each in its own ContextVar copy - # so session/delivery state stays isolated per-thread. - with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool: - _futures = [] - for job in due_jobs: - _ctx = contextvars.copy_context() - _futures.append(_tick_pool.submit(_ctx.run, _process_job, job)) - _results = [f.result() for f in _futures] + # Partition due jobs: those with a per-job workdir mutate + # os.environ["TERMINAL_CWD"] inside run_job, which is process-global — + # so they MUST run sequentially to avoid corrupting each other. Jobs + # without a workdir leave env untouched and stay parallel-safe. + workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()] + parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()] + + _results: list = [] + + # Sequential pass for workdir jobs. + for job in workdir_jobs: + _ctx = contextvars.copy_context() + _results.append(_ctx.run(_process_job, job)) + + # Parallel pass for the rest — same behaviour as before. + if parallel_jobs: + with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool: + _futures = [] + for job in parallel_jobs: + _ctx = contextvars.copy_context() + _futures.append(_tick_pool.submit(_ctx.run, _process_job, job)) + _results.extend(f.result() for f in _futures) return sum(_results) finally: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..a0fe1a100 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,52 @@ +# +# docker-compose.yml for Hermes Agent +# +# Usage: +# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d +# +# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so +# files created inside the container stay readable/writable on the host. +# The entrypoint remaps the internal `hermes` user to these values via +# usermod/groupmod + gosu. +# +# Security notes: +# - The dashboard service binds to 127.0.0.1 by default. It stores API +# keys; exposing it on LAN without auth is unsafe. If you want remote +# access, use an SSH tunnel or put it behind a reverse proxy that +# adds authentication — do NOT pass --insecure --host 0.0.0.0. +# - The gateway's API server is off unless you uncomment API_SERVER_KEY +# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing +# this on an internet-facing host. +# +services: + gateway: + build: . + image: hermes-agent + container_name: hermes + restart: unless-stopped + network_mode: host + volumes: + - ~/.hermes:/opt/data + environment: + - HERMES_UID=${HERMES_UID:-10000} + - HERMES_GID=${HERMES_GID:-10000} + # To expose the OpenAI-compatible API server beyond localhost, + # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth): + # - API_SERVER_HOST=0.0.0.0 + # - API_SERVER_KEY=${API_SERVER_KEY} + command: ["gateway", "run"] + + dashboard: + image: hermes-agent + container_name: hermes-dashboard + restart: unless-stopped + network_mode: host + depends_on: + - gateway + volumes: + - ~/.hermes:/opt/data + environment: + - HERMES_UID=${HERMES_UID:-10000} + - HERMES_GID=${HERMES_GID:-10000} + # Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`. + command: ["dashboard", "--host", "127.0.0.1", "--no-open"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 67d193f13..0be1d656c 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -22,9 +22,18 @@ if [ "$(id -u)" = "0" ]; then groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true fi + # Fix ownership of the data volume. When HERMES_UID remaps the hermes user, + # files created by previous runs (under the old UID) become inaccessible. + # Always chown -R when UID was remapped; otherwise only if top-level is wrong. actual_hermes_uid=$(id -u hermes) - if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then - echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing" + needs_chown=false + if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then + needs_chown=true + elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then + needs_chown=true + fi + if [ "$needs_chown" = true ]; then + echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)" # In rootless Podman the container's "root" is mapped to an unprivileged # host UID — chown will fail. That's fine: the volume is already owned # by the mapped user on the host side. diff --git a/gateway/config.py b/gateway/config.py index 67ebf7346..509737279 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -135,7 +135,7 @@ class SessionResetPolicy: mode=mode if mode is not None else "both", at_hour=at_hour if at_hour is not None else 4, idle_minutes=idle_minutes if idle_minutes is not None else 1440, - notify=notify if notify is not None else True, + notify=_coerce_bool(notify, True), notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"), ) @@ -178,7 +178,7 @@ class PlatformConfig: home_channel = HomeChannel.from_dict(data["home_channel"]) return cls( - enabled=data.get("enabled", False), + enabled=_coerce_bool(data.get("enabled"), False), token=data.get("token"), api_key=data.get("api_key"), home_channel=home_channel, @@ -435,7 +435,7 @@ class GatewayConfig: reset_triggers=data.get("reset_triggers", ["/new", "/reset"]), quick_commands=quick_commands, sessions_dir=sessions_dir, - always_log_local=data.get("always_log_local", True), + always_log_local=_coerce_bool(data.get("always_log_local"), True), stt_enabled=_coerce_bool(stt_enabled, True), group_sessions_per_user=_coerce_bool(group_sessions_per_user, True), thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False), @@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig: os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower() if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"): os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip() + if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"): + gac = telegram_cfg["group_allowed_chats"] + if isinstance(gac, list): + gac = ",".join(str(v) for v in gac) + os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac) if "disable_link_previews" in telegram_cfg: plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {}) if not isinstance(plat_data, dict): diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index a6b52ff32..db3304a09 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter): If the client disconnects mid-stream, ``agent.interrupt()`` is called so the agent stops issuing upstream LLM calls, then the - asyncio task is cancelled. When ``store=True`` the full response - is persisted to the ResponseStore in a ``finally`` block so GET - /v1/responses/{id} and ``previous_response_id`` chaining work the - same as the batch path. + asyncio task is cancelled. When ``store=True`` an initial + ``in_progress`` snapshot is persisted immediately after + ``response.created`` and disconnects update it to an + ``incomplete`` snapshot so GET /v1/responses/{id} and + ``previous_response_id`` chaining still have something to + recover from. """ import queue as _q @@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter): final_response_text = "" agent_error: Optional[str] = None usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + terminal_snapshot_persisted = False + + def _persist_response_snapshot( + response_env: Dict[str, Any], + *, + conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None, + ) -> None: + if not store: + return + if conversation_history_snapshot is None: + conversation_history_snapshot = list(conversation_history) + conversation_history_snapshot.append({"role": "user", "content": user_message}) + self._response_store.put(response_id, { + "response": response_env, + "conversation_history": conversation_history_snapshot, + "instructions": instructions, + "session_id": session_id, + }) + if conversation: + self._response_store.set_conversation(conversation, response_id) + + def _persist_incomplete_if_needed() -> None: + """Persist an ``incomplete`` snapshot if no terminal one was written. + + Called from both the client-disconnect (``ConnectionResetError``) + and server-cancellation (``asyncio.CancelledError``) paths so + GET /v1/responses/{id} and ``previous_response_id`` chaining keep + working after abrupt stream termination. + """ + if not store or terminal_snapshot_persisted: + return + incomplete_text = "".join(final_text_parts) or final_response_text + incomplete_items: List[Dict[str, Any]] = list(emitted_items) + if incomplete_text: + incomplete_items.append({ + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": incomplete_text}], + }) + incomplete_env = _envelope("incomplete") + incomplete_env["output"] = incomplete_items + incomplete_env["usage"] = { + "input_tokens": usage.get("input_tokens", 0), + "output_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), + } + incomplete_history = list(conversation_history) + incomplete_history.append({"role": "user", "content": user_message}) + if incomplete_text: + incomplete_history.append({"role": "assistant", "content": incomplete_text}) + _persist_response_snapshot( + incomplete_env, + conversation_history_snapshot=incomplete_history, + ) try: # response.created — initial envelope, status=in_progress @@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter): "type": "response.created", "response": created_env, }) + _persist_response_snapshot(created_env) last_activity = time.monotonic() async def _open_message_item() -> None: @@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter): "output_tokens": usage.get("output_tokens", 0), "total_tokens": usage.get("total_tokens", 0), } + _failed_history = list(conversation_history) + _failed_history.append({"role": "user", "content": user_message}) + if final_response_text or agent_error: + _failed_history.append({ + "role": "assistant", + "content": final_response_text or agent_error, + }) + _persist_response_snapshot( + failed_env, + conversation_history_snapshot=_failed_history, + ) + terminal_snapshot_persisted = True await _write_event("response.failed", { "type": "response.failed", "response": failed_env, @@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter): "output_tokens": usage.get("output_tokens", 0), "total_tokens": usage.get("total_tokens", 0), } + full_history = list(conversation_history) + full_history.append({"role": "user", "content": user_message}) + if isinstance(result, dict) and result.get("messages"): + full_history.extend(result["messages"]) + else: + full_history.append({"role": "assistant", "content": final_response_text}) + _persist_response_snapshot( + completed_env, + conversation_history_snapshot=full_history, + ) + terminal_snapshot_persisted = True await _write_event("response.completed", { "type": "response.completed", "response": completed_env, }) - # Persist for future chaining / GET retrieval, mirroring - # the batch path behavior. - if store: - full_history = list(conversation_history) - full_history.append({"role": "user", "content": user_message}) - if isinstance(result, dict) and result.get("messages"): - full_history.extend(result["messages"]) - else: - full_history.append({"role": "assistant", "content": final_response_text}) - self._response_store.put(response_id, { - "response": completed_env, - "conversation_history": full_history, - "instructions": instructions, - "session_id": session_id, - }) - if conversation: - self._response_store.set_conversation(conversation, response_id) - except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError): + _persist_incomplete_if_needed() # Client disconnected — interrupt the agent so it stops # making upstream LLM calls, then cancel the task. agent = agent_ref[0] if agent_ref else None @@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter): except (asyncio.CancelledError, Exception): pass logger.info("SSE client disconnected; interrupted agent task %s", response_id) + except asyncio.CancelledError: + # Server-side cancellation (e.g. shutdown, request timeout) — + # persist an incomplete snapshot so GET /v1/responses/{id} and + # previous_response_id chaining still work, then re-raise so the + # runtime's cancellation semantics are respected. + _persist_incomplete_if_needed() + agent = agent_ref[0] if agent_ref else None + if agent is not None: + try: + agent.interrupt("SSE task cancelled") + except Exception: + pass + if not agent_task.done(): + agent_task.cancel() + logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id) + raise return response diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index db7603498..fd325fde4 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None: return None -def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: +def _split_host_port(value: str) -> tuple[str, int | None]: + raw = str(value or "").strip() + if not raw: + return "", None + if "://" in raw: + parsed = urlsplit(raw) + return (parsed.hostname or "").lower().rstrip("."), parsed.port + if raw.startswith("[") and "]" in raw: + host, _, rest = raw[1:].partition("]") + port = None + if rest.startswith(":") and rest[1:].isdigit(): + port = int(rest[1:]) + return host.lower().rstrip("."), port + if raw.count(":") == 1: + host, _, maybe_port = raw.rpartition(":") + if maybe_port.isdigit(): + return host.lower().rstrip("."), int(maybe_port) + return raw.lower().strip("[]").rstrip("."), None + + +def _no_proxy_entries() -> list[str]: + entries: list[str] = [] + for key in ("NO_PROXY", "no_proxy"): + raw = os.environ.get(key, "") + entries.extend(part.strip() for part in raw.split(",") if part.strip()) + return entries + + +def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool: + token = str(entry or "").strip().lower() + if not token: + return False + if token == "*": + return True + + token_host, token_port = _split_host_port(token) + if token_port is not None and port is not None and token_port != port: + return False + if token_port is not None and port is None: + return False + if not token_host: + return False + + try: + network = ipaddress.ip_network(token_host, strict=False) + try: + return ipaddress.ip_address(host) in network + except ValueError: + return False + except ValueError: + pass + + try: + token_ip = ipaddress.ip_address(token_host) + try: + return ipaddress.ip_address(host) == token_ip + except ValueError: + return False + except ValueError: + pass + + if token_host.startswith("*."): + suffix = token_host[1:] + return host.endswith(suffix) + if token_host.startswith("."): + return host == token_host[1:] or host.endswith(token_host) + return host == token_host or host.endswith(f".{token_host}") + + +def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool: + """Return True when NO_PROXY/no_proxy matches at least one target host. + + Supports exact hosts, domain suffixes, wildcard suffixes, IP literals, + CIDR ranges, optional host:port entries, and ``*``. + """ + entries = _no_proxy_entries() + if not entries or not target_hosts: + return False + if isinstance(target_hosts, str): + candidates = [target_hosts] + else: + candidates = list(target_hosts) + for candidate in candidates: + host, port = _split_host_port(str(candidate)) + if not host: + continue + if any(_no_proxy_entry_matches(entry, host, port) for entry in entries): + return True + return False + + +def resolve_proxy_url( + platform_env_var: str | None = None, + *, + target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None, +) -> str | None: """Return a proxy URL from env vars, or macOS system proxy. Check order: @@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: 1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants) 2. macOS system proxy via ``scutil --proxy`` (auto-detect) - Returns *None* if no proxy is found. + Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one + of ``target_hosts``. """ if platform_env_var: value = (os.environ.get(platform_env_var) or "").strip() if value: + if should_bypass_proxy(target_hosts): + return None return normalize_proxy_url(value) for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = (os.environ.get(key) or "").strip() if value: + if should_bypass_proxy(target_hosts): + return None return normalize_proxy_url(value) - return normalize_proxy_url(_detect_macos_system_proxy()) + detected = normalize_proxy_url(_detect_macos_system_proxy()) + if detected and should_bypass_proxy(target_hosts): + return None + return detected def proxy_kwargs_for_bot(proxy_url: str | None) -> dict: diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py index 39d4e537e..afcbf1a7e 100644 --- a/gateway/platforms/bluebubbles.py +++ b/gateway/platforms/bluebubbles.py @@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str: class BlueBubblesAdapter(BasePlatformAdapter): platform = Platform.BLUEBUBBLES + SUPPORTS_MESSAGE_EDITING = False MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH def __init__(self, config: PlatformConfig): @@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter): # Text sending # ------------------------------------------------------------------ + @staticmethod + def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]: + # Use the base splitter but skip pagination indicators — iMessage + # bubbles flow naturally without "(1/3)" suffixes. + chunks = BasePlatformAdapter.truncate_message(content, max_length) + return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks] + async def send( self, chat_id: str, @@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - text = strip_markdown(content or "") + text = self.format_message(content) if not text: return SendResult(success=False, error="BlueBubbles send requires text") - chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH) + # Split on paragraph breaks first (double newlines) so each thought + # becomes its own iMessage bubble, then truncate any that are still + # too long. + paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()] + chunks: List[str] = [] + for para in (paragraphs or [text]): + if len(para) <= self.MAX_MESSAGE_LENGTH: + chunks.append(para) + else: + chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH)) last = SendResult(success=True) for chunk in chunks: guid = await self._resolve_chat_guid(chat_id) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index a148c5f4b..3eaf6ac05 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any logger = logging.getLogger(__name__) VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080} +_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"} try: import discord @@ -802,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter): if not self._client: return try: - synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30) - logger.info("[%s] Synced %d slash command(s)", self.name, len(synced)) + sync_policy = self._get_discord_command_sync_policy() + if sync_policy == "off": + logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name) + return + + if sync_policy == "bulk": + synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30) + logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced)) + return + + summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30) + logger.info( + "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d", + self.name, + summary["total"], + summary["unchanged"], + summary["updated"], + summary["recreated"], + summary["created"], + summary["deleted"], + ) except asyncio.TimeoutError: logger.warning("[%s] Slash command sync timed out after 30s", self.name) except asyncio.CancelledError: @@ -811,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True) + def _get_discord_command_sync_policy(self) -> str: + raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower() + if raw in _DISCORD_COMMAND_SYNC_POLICIES: + return raw + if raw: + logger.warning( + "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'", + self.name, + raw, + ) + return "safe" + + def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]: + """Reduce command payloads to the semantic fields Hermes manages.""" + contexts = payload.get("contexts") + integration_types = payload.get("integration_types") + return { + "type": int(payload.get("type", 1) or 1), + "name": str(payload.get("name", "") or ""), + "description": str(payload.get("description", "") or ""), + "default_member_permissions": self._normalize_permissions( + payload.get("default_member_permissions") + ), + "dm_permission": bool(payload.get("dm_permission", True)), + "nsfw": bool(payload.get("nsfw", False)), + "contexts": sorted(int(c) for c in contexts) if contexts else None, + "integration_types": ( + sorted(int(i) for i in integration_types) if integration_types else None + ), + "options": [ + self._canonicalize_app_command_option(item) + for item in payload.get("options", []) or [] + if isinstance(item, dict) + ], + } + + @staticmethod + def _normalize_permissions(value: Any) -> Optional[str]: + """Discord emits default_member_permissions as str server-side but discord.py + sets it as int locally. Normalize to str-or-None so the comparison is stable.""" + if value is None: + return None + return str(value) + + def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]: + """Build a canonical-ready dict from an AppCommand. + + discord.py's AppCommand.to_dict() does NOT include nsfw, + dm_permission, or default_member_permissions (they live only on the + attributes). Pull them from the attributes so the canonicalizer sees + the real server-side values instead of defaults — otherwise any + command using non-default permissions would diff on every startup. + """ + payload = dict(command.to_dict()) + nsfw = getattr(command, "nsfw", None) + if nsfw is not None: + payload["nsfw"] = bool(nsfw) + guild_only = getattr(command, "guild_only", None) + if guild_only is not None: + payload["dm_permission"] = not bool(guild_only) + default_permissions = getattr(command, "default_member_permissions", None) + if default_permissions is not None: + payload["default_member_permissions"] = getattr( + default_permissions, "value", default_permissions + ) + return payload + + def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]: + return { + "type": int(payload.get("type", 0) or 0), + "name": str(payload.get("name", "") or ""), + "description": str(payload.get("description", "") or ""), + "required": bool(payload.get("required", False)), + "autocomplete": bool(payload.get("autocomplete", False)), + "choices": [ + { + "name": str(choice.get("name", "") or ""), + "value": choice.get("value"), + } + for choice in payload.get("choices", []) or [] + if isinstance(choice, dict) + ], + "channel_types": list(payload.get("channel_types", []) or []), + "min_value": payload.get("min_value"), + "max_value": payload.get("max_value"), + "min_length": payload.get("min_length"), + "max_length": payload.get("max_length"), + "options": [ + self._canonicalize_app_command_option(item) + for item in payload.get("options", []) or [] + if isinstance(item, dict) + ], + } + + def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]: + """Fields supported by discord.py's edit_global_command route.""" + canonical = self._canonicalize_app_command_payload(payload) + return { + "name": canonical["name"], + "description": canonical["description"], + "options": canonical["options"], + } + + async def _safe_sync_slash_commands(self) -> Dict[str, int]: + """Diff existing global commands and only mutate the commands that changed.""" + if not self._client: + return { + "total": 0, + "unchanged": 0, + "updated": 0, + "recreated": 0, + "created": 0, + "deleted": 0, + } + + tree = self._client.tree + app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None) + if not app_id: + raise RuntimeError("Discord application ID is unavailable for slash command sync") + + desired_payloads = [command.to_dict(tree) for command in tree.get_commands()] + desired_by_key = { + (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload + for payload in desired_payloads + } + existing_commands = await tree.fetch_commands() + existing_by_key = { + ( + int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1), + str(command.name or "").lower(), + ): command + for command in existing_commands + } + + unchanged = 0 + updated = 0 + recreated = 0 + created = 0 + deleted = 0 + http = self._client.http + + for key, desired in desired_by_key.items(): + current = existing_by_key.pop(key, None) + if current is None: + await http.upsert_global_command(app_id, desired) + created += 1 + continue + + current_existing_payload = self._existing_command_to_payload(current) + current_payload = self._canonicalize_app_command_payload(current_existing_payload) + desired_payload = self._canonicalize_app_command_payload(desired) + if current_payload == desired_payload: + unchanged += 1 + continue + + if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired): + await http.delete_global_command(app_id, current.id) + await http.upsert_global_command(app_id, desired) + recreated += 1 + continue + + await http.edit_global_command(app_id, current.id, desired) + updated += 1 + + for current in existing_by_key.values(): + await http.delete_global_command(app_id, current.id) + deleted += 1 + + return { + "total": len(desired_payloads), + "unchanged": unchanged, + "updated": updated, + "recreated": recreated, + "created": created, + "deleted": deleted, + } + async def _add_reaction(self, message: Any, emoji: str) -> bool: """Add an emoji reaction to a Discord message.""" if not message or not hasattr(message, "add_reaction"): @@ -2049,10 +2246,6 @@ class DiscordAdapter(BasePlatformAdapter): async def slash_usage(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/usage") - @tree.command(name="provider", description="Show available providers") - async def slash_provider(interaction: discord.Interaction): - await self._run_simple_slash(interaction, "/provider") - @tree.command(name="help", description="Show available commands") async def slash_help(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/help") @@ -2522,7 +2715,12 @@ class DiscordAdapter(BasePlatformAdapter): return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off") def _discord_free_response_channels(self) -> set: - """Return Discord channel IDs where no bot mention is required.""" + """Return Discord channel IDs where no bot mention is required. + + A single ``"*"`` entry (either from a list or a comma-separated + string) is preserved in the returned set so callers can short-circuit + on wildcard membership, consistent with ``allowed_channels``. + """ raw = self.config.extra.get("free_response_channels") if raw is None: raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "") @@ -3015,14 +3213,14 @@ class DiscordAdapter(BasePlatformAdapter): allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "") if allowed_channels_raw: allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()} - if not (channel_ids & allowed_channels): + if "*" not in allowed_channels and not (channel_ids & allowed_channels): logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids) return # Check ignored channels - never respond even when mentioned ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "") ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()} - if channel_ids & ignored_channels: + if "*" in ignored_channels or (channel_ids & ignored_channels): logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids) return @@ -3036,7 +3234,11 @@ class DiscordAdapter(BasePlatformAdapter): voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()} current_channel_id = str(message.channel.id) is_voice_linked_channel = current_channel_id in voice_linked_ids - is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel + is_free_channel = ( + "*" in free_channels + or bool(channel_ids & free_channels) + or is_voice_linked_channel + ) # Skip the mention check if the message is in a thread where # the bot has previously participated (auto-created or replied in). @@ -3669,6 +3871,15 @@ if DISCORD_AVAILABLE: self.resolved = True model_id = interaction.data["values"][0] + self.clear_items() + await interaction.response.edit_message( + embed=discord.Embed( + title="⚙ Switching Model", + description=f"Switching to `{model_id}`...", + color=discord.Color.blue(), + ), + view=None, + ) try: result_text = await self.on_model_selected( @@ -3679,14 +3890,13 @@ if DISCORD_AVAILABLE: except Exception as exc: result_text = f"Error switching model: {exc}" - self.clear_items() - await interaction.response.edit_message( + await interaction.edit_original_response( embed=discord.Embed( title="⚙ Model Switched", description=result_text, color=discord.Color.green(), ), - view=self, + view=None, ) async def _on_back(self, interaction: discord.Interaction): diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index bec0d690a..be1bf494c 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter): "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0), } - proxy_url = resolve_proxy_url("TELEGRAM_PROXY") disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on")) fallback_ips = self._fallback_ips() if not fallback_ips: @@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter): ", ".join(fallback_ips), ) + proxy_targets = ["api.telegram.org", *fallback_ips] + proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets) if fallback_ips and not proxy_url and not disable_fallback: logger.info( "[%s] Telegram fallback IPs active: %s", diff --git a/gateway/platforms/telegram_network.py b/gateway/platforms/telegram_network.py index ed2d60d79..b099adc50 100644 --- a/gateway/platforms/telegram_network.py +++ b/gateway/platforms/telegram_network.py @@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"] -def _resolve_proxy_url() -> str | None: +def _resolve_proxy_url(target_hosts=None) -> str | None: # Delegate to shared implementation (env vars + macOS system proxy detection) from gateway.platforms.base import resolve_proxy_url - return resolve_proxy_url("TELEGRAM_PROXY") + return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts) class TelegramFallbackTransport(httpx.AsyncBaseTransport): @@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport): def __init__(self, fallback_ips: Iterable[str], **transport_kwargs): self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))] - proxy_url = _resolve_proxy_url() + proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips]) if proxy_url and "proxy" not in transport_kwargs: transport_kwargs["proxy"] = proxy_url self._primary = httpx.AsyncHTTPTransport(**transport_kwargs) diff --git a/gateway/run.py b/gateway/run.py index dcee18e51..14bd3ff0d 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -14,6 +14,7 @@ Usage: """ import asyncio +import dataclasses import json import logging import os @@ -297,50 +298,16 @@ from gateway.restart import ( ) -def _normalize_whatsapp_identifier(value: str) -> str: - """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.""" - return ( - str(value or "") - .strip() - .replace("+", "", 1) - .split(":", 1)[0] - .split("@", 1)[0] - ) +from gateway.whatsapp_identity import ( + canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401 + expand_whatsapp_aliases as _expand_whatsapp_auth_aliases, + normalize_whatsapp_identifier as _normalize_whatsapp_identifier, +) -def _expand_whatsapp_auth_aliases(identifier: str) -> set: - """Resolve WhatsApp phone/LID aliases using bridge session mapping files.""" - normalized = _normalize_whatsapp_identifier(identifier) - if not normalized: - return set() - - session_dir = _hermes_home / "whatsapp" / "session" - resolved = set() - queue = [normalized] - - while queue: - current = queue.pop(0) - if not current or current in resolved: - continue - - resolved.add(current) - for suffix in ("", "_reverse"): - mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" - if not mapping_path.exists(): - continue - try: - mapped = _normalize_whatsapp_identifier( - json.loads(mapping_path.read_text(encoding="utf-8")) - ) - except Exception: - continue - if mapped and mapped not in resolved: - queue.append(mapped) - - return resolved - logger = logging.getLogger(__name__) + # Sentinel placed into _running_agents immediately when a session starts # processing, *before* any await. Prevents a second message for the same # session from bypassing the "already running" guard during the async gap @@ -349,16 +316,30 @@ _AGENT_PENDING_SENTINEL = object() def _resolve_runtime_agent_kwargs() -> dict: - """Resolve provider credentials for gateway-created AIAgent instances.""" + """Resolve provider credentials for gateway-created AIAgent instances. + + If the primary provider fails with an authentication error, attempt to + resolve credentials using the fallback provider chain from config.yaml + before giving up. + """ from hermes_cli.runtime_provider import ( resolve_runtime_provider, format_runtime_provider_error, ) + from hermes_cli.auth import AuthError try: runtime = resolve_runtime_provider( requested=os.getenv("HERMES_INFERENCE_PROVIDER"), ) + except AuthError as auth_exc: + # Primary provider auth failed (expired token, revoked key, etc.). + # Try the fallback provider chain before raising. + logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc) + fb_config = _try_resolve_fallback_provider() + if fb_config is not None: + return fb_config + raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc except Exception as exc: raise RuntimeError(format_runtime_provider_error(exc)) from exc @@ -373,6 +354,48 @@ def _resolve_runtime_agent_kwargs() -> dict: } +def _try_resolve_fallback_provider() -> dict | None: + """Attempt to resolve credentials from the fallback_model/fallback_providers config.""" + from hermes_cli.runtime_provider import resolve_runtime_provider + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if not cfg_path.exists(): + return None + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + fb = cfg.get("fallback_providers") or cfg.get("fallback_model") + if not fb: + return None + # Normalize to list + fb_list = fb if isinstance(fb, list) else [fb] + for entry in fb_list: + if not isinstance(entry, dict): + continue + try: + runtime = resolve_runtime_provider( + requested=entry.get("provider"), + explicit_base_url=entry.get("base_url"), + explicit_api_key=entry.get("api_key"), + ) + logger.info("Fallback provider resolved: %s", runtime.get("provider")) + return { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + "command": runtime.get("command"), + "args": list(runtime.get("args") or []), + "credential_pool": runtime.get("credential_pool"), + } + except Exception as fb_exc: + logger.debug("Fallback entry %s failed: %s", entry.get("provider"), fb_exc) + continue + except Exception: + pass + return None + + def _build_media_placeholder(event) -> str: """Build a text placeholder for media-only events so they aren't dropped. @@ -1551,27 +1574,23 @@ class GatewayRunner: ) return True - # --- Normal busy case (agent actively running a task) --- - # The user sent a message while the agent is working. Interrupt the - # agent immediately so it stops the current tool-calling loop and - # processes the new message. The pending message is stored in the - # adapter so the base adapter picks it up once the interrupted run - # returns. A brief ack tells the user what's happening (debounced - # to avoid spam when they fire multiple messages quickly). - + # Normal busy case (agent actively running a task) adapter = self.adapters.get(event.source.platform) if not adapter: return False # let default path handle it # Store the message so it's processed as the next turn after the - # interrupt causes the current run to exit. + # current run finishes (or is interrupted). from gateway.platforms.base import merge_pending_message_event merge_pending_message_event(adapter._pending_messages, session_key, event) - # Interrupt the running agent — this aborts in-flight tool calls and - # causes the agent loop to exit at the next check point. + is_queue_mode = self._busy_input_mode == "queue" + + # If not in queue mode, interrupt the running agent immediately. + # This aborts in-flight tool calls and causes the agent loop to exit + # at the next check point. running_agent = self._running_agents.get(session_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL: try: running_agent.interrupt(event.text) except Exception: @@ -1583,7 +1602,7 @@ class GatewayRunner: now = time.time() last_ack = self._busy_ack_ts.get(session_key, 0) if now - last_ack < _BUSY_ACK_COOLDOWN: - return True # interrupt sent, ack already delivered recently + return True # interrupt sent (if not queue), ack already delivered recently self._busy_ack_ts[session_key] = now @@ -1608,10 +1627,16 @@ class GatewayRunner: pass status_detail = f" ({', '.join(status_parts)})" if status_parts else "" - message = ( - f"⚡ Interrupting current task{status_detail}. " - f"I'll respond to your message shortly." - ) + if is_queue_mode: + message = ( + f"⏳ Queued for the next turn{status_detail}. " + f"I'll respond once the current task finishes." + ) + else: + message = ( + f"⚡ Interrupting current task{status_detail}. " + f"I'll respond to your message shortly." + ) thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None try: @@ -2307,6 +2332,17 @@ class GatewayRunner: for key, entry in _expired_entries: try: await self._async_flush_memories(entry.session_id, key) + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _parts = key.split(":") + _platform = _parts[2] if len(_parts) > 2 else "" + _invoke_hook( + "on_session_finalize", + session_id=entry.session_id, + platform=_platform, + ) + except Exception: + pass # Shut down memory provider and close tool resources # on the cached agent. Idle agents live in # _agent_cache (not _running_agents), so look there. @@ -2560,6 +2596,40 @@ class GatewayRunner: return async def _stop_impl() -> None: + def _kill_tool_subprocesses(phase: str) -> None: + """Kill tool subprocesses + tear down terminal envs + browsers. + + Called twice in the shutdown path: once eagerly after a + drain timeout forces agent interrupt (so we reclaim bash/ + sleep children before systemd TimeoutStopSec escalates to + SIGKILL on the cgroup — #8202), and once as a final + catch-all at the end of _stop_impl() for the graceful + path or anything respawned mid-teardown. + + All steps are best-effort; exceptions are swallowed so + one subsystem's failure doesn't block the rest. + """ + try: + from tools.process_registry import process_registry + _killed = process_registry.kill_all() + if _killed: + logger.info( + "Shutdown (%s): killed %d tool subprocess(es)", + phase, _killed, + ) + except Exception as _e: + logger.debug("process_registry.kill_all (%s) error: %s", phase, _e) + try: + from tools.terminal_tool import cleanup_all_environments + cleanup_all_environments() + except Exception as _e: + logger.debug("cleanup_all_environments (%s) error: %s", phase, _e) + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception as _e: + logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e) + logger.info( "Stopping gateway%s...", " for restart" if self._restart_requested else "", @@ -2621,6 +2691,16 @@ class GatewayRunner: self._update_runtime_status("draining") await asyncio.sleep(0.1) + # Kill lingering tool subprocesses NOW, before we spend more + # budget on adapter disconnect / session DB close. Under + # systemd (TimeoutStopSec bounded by drain_timeout+headroom), + # deferring this to the end of stop() risks systemd escalating + # to SIGKILL on the cgroup first — at which point bash/sleep + # children left behind by an interrupted terminal tool get + # killed by systemd instead of us (issue #8202). The final + # catch-all cleanup below still runs for the graceful path. + _kill_tool_subprocesses("post-interrupt") + if self._restart_requested and self._restart_detached: try: await self._launch_detached_restart_command() @@ -2656,22 +2736,13 @@ class GatewayRunner: self._shutdown_event.set() # Global cleanup: kill any remaining tool subprocesses not tied - # to a specific agent (catch-all for zombie prevention). - try: - from tools.process_registry import process_registry - process_registry.kill_all() - except Exception: - pass - try: - from tools.terminal_tool import cleanup_all_environments - cleanup_all_environments() - except Exception: - pass - try: - from tools.browser_tool import cleanup_all_browsers - cleanup_all_browsers() - except Exception: - pass + # to a specific agent (catch-all for zombie prevention). On the + # drain-timeout path we already did this earlier after agent + # interrupt — this second call catches (a) the graceful path + # where drain succeeded without interrupt, and (b) anything + # that got respawned between the earlier call and adapter + # disconnect (defense in depth; safe to call repeatedly). + _kill_tool_subprocesses("final-cleanup") # Close SQLite session DBs so the WAL write lock is released. # Without this, --replace and similar restart flows leave the @@ -2932,6 +3003,7 @@ class GatewayRunner: Platform.QQBOT: "QQ_ALLOWED_USERS", } platform_group_env_map = { + Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS", Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS", } platform_allow_all_map = { @@ -2988,7 +3060,7 @@ class GatewayRunner: # Check platform-specific and global allowlists platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip() group_allowlist = "" - if source.chat_type == "group": + if source.chat_type in {"group", "forum"}: group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip() global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip() @@ -2997,7 +3069,7 @@ class GatewayRunner: return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") # Some platforms authorize group traffic by chat ID rather than sender ID. - if group_allowlist and source.chat_type == "group" and source.chat_id: + if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id: allowed_group_ids = { chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip() } @@ -3108,7 +3180,50 @@ class GatewayRunner: # Internal events (e.g. background-process completion notifications) # are system-generated and must skip user authorization. - if getattr(event, "internal", False): + is_internal = bool(getattr(event, "internal", False)) + + # Fire pre_gateway_dispatch plugin hook for user-originated messages. + # Plugins receive the MessageEvent and may return a dict influencing flow: + # {"action": "skip", "reason": ...} -> drop (no reply, plugin handled) + # {"action": "rewrite", "text": ...} -> replace event.text, continue + # {"action": "allow"} / None -> normal dispatch + # Hook runs BEFORE auth so plugins can handle unauthorized senders + # (e.g. customer handover ingest) without triggering the pairing flow. + if not is_internal: + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _hook_results = _invoke_hook( + "pre_gateway_dispatch", + event=event, + gateway=self, + session_store=self.session_store, + ) + except Exception as _hook_exc: + logger.warning("pre_gateway_dispatch invocation failed: %s", _hook_exc) + _hook_results = [] + + for _result in _hook_results: + if not isinstance(_result, dict): + continue + _action = _result.get("action") + if _action == "skip": + logger.info( + "pre_gateway_dispatch skip: reason=%s platform=%s chat=%s", + _result.get("reason"), + source.platform.value if source.platform else "unknown", + source.chat_id or "unknown", + ) + return None + if _action == "rewrite": + _new_text = _result.get("text") + if isinstance(_new_text, str): + event = dataclasses.replace(event, text=_new_text) + source = event.source + break + if _action == "allow": + break + + if is_internal: pass elif source.user_id is None: # Messages with no user identity (Telegram service messages, @@ -3405,7 +3520,7 @@ class GatewayRunner: # running-agent guard. Reject gracefully rather than falling # through to interrupt + discard. Without this, commands # like /model, /reasoning, /voice, /insights, /title, - # /resume, /retry, /undo, /compress, /usage, /provider, + # /resume, /retry, /undo, /compress, /usage, # /reload-mcp, /sethome, /reset (all registered as Discord # slash commands) would interrupt the agent AND get # silently discarded by the slash-command safety net, @@ -3476,6 +3591,10 @@ class GatewayRunner: if self._queue_during_drain_enabled() else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now." ) + if self._busy_input_mode == "queue": + logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20]) + self._queue_or_replace_pending_event(_quick_key, event) + return None logger.debug("PRIORITY interrupt for session %s", _quick_key[:20]) running_agent.interrupt(event.text) if _quick_key in self._pending_messages: @@ -3592,34 +3711,9 @@ class GatewayRunner: if canonical == "model": return await self._handle_model_command(event) - if canonical == "provider": - return await self._handle_provider_command(event) - if canonical == "personality": return await self._handle_personality_command(event) - if canonical == "plan": - try: - from agent.skill_commands import build_plan_path, build_skill_invocation_message - - user_instruction = event.get_command_args().strip() - plan_path = build_plan_path(user_instruction) - event.text = build_skill_invocation_message( - "/plan", - user_instruction, - task_id=_quick_key, - runtime_note=( - "Save the markdown plan with write_file to this exact relative path " - f"inside the active workspace/backend cwd: {plan_path}" - ), - ) - if not event.text: - return "Failed to load the bundled /plan skill." - canonical = None - except Exception as e: - logger.exception("Failed to prepare /plan command") - return f"Failed to enter plan mode: {e}" - if canonical == "retry": return await self._handle_retry_command(event) @@ -5742,63 +5836,6 @@ class GatewayRunner: return "\n".join(lines) - async def _handle_provider_command(self, event: MessageEvent) -> str: - """Handle /provider command - show available providers.""" - import yaml - from hermes_cli.models import ( - list_available_providers, - normalize_provider, - _PROVIDER_LABELS, - ) - - # Resolve current provider from config - current_provider = "openrouter" - model_cfg = {} - config_path = _hermes_home / 'config.yaml' - try: - if config_path.exists(): - with open(config_path, encoding="utf-8") as f: - cfg = yaml.safe_load(f) or {} - model_cfg = cfg.get("model", {}) - if isinstance(model_cfg, dict): - current_provider = model_cfg.get("provider", current_provider) - except Exception: - pass - - current_provider = normalize_provider(current_provider) - if current_provider == "auto": - try: - from hermes_cli.auth import resolve_provider as _resolve_provider - current_provider = _resolve_provider(current_provider) - except Exception: - current_provider = "openrouter" - - # Detect custom endpoint from config base_url - if current_provider == "openrouter": - _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else "" - if _cfg_base and "openrouter.ai" not in _cfg_base: - current_provider = "custom" - - current_label = _PROVIDER_LABELS.get(current_provider, current_provider) - - lines = [ - f"🔌 **Current provider:** {current_label} (`{current_provider}`)", - "", - "**Available providers:**", - ] - - providers = list_available_providers() - for p in providers: - marker = " ← active" if p["id"] == current_provider else "" - auth = "✅" if p["authenticated"] else "❌" - aliases = f" _(also: {', '.join(p['aliases'])})_" if p["aliases"] else "" - lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}") - - lines.append("") - lines.append("Switch: `/model provider:model-name`") - lines.append("Setup: `hermes setup`") - return "\n".join(lines) - async def _handle_personality_command(self, event: MessageEvent) -> str: """Handle /personality command - list or set a personality.""" import yaml @@ -7065,10 +7102,7 @@ class GatewayRunner: tmp_agent._print_fn = lambda *a, **kw: None compressor = tmp_agent.context_compressor - compress_start = compressor.protect_first_n - compress_start = compressor._align_boundary_forward(msgs, compress_start) - compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) - if compress_start >= compress_end: + if not compressor.has_content_to_compress(msgs): return "Nothing to compress yet (the transcript is still all protected context)." loop = asyncio.get_running_loop() @@ -10338,9 +10372,9 @@ class GatewayRunner: # Periodic "still working" notifications for long-running tasks. # Fires every N seconds so the user knows the agent hasn't died. # Config: agent.gateway_notify_interval in config.yaml, or - # HERMES_AGENT_NOTIFY_INTERVAL env var. Default 600s (10 min). + # HERMES_AGENT_NOTIFY_INTERVAL env var. Default 180s (3 min). # 0 = disable notifications. - _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600)) + _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180)) _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None _notify_start = time.time() @@ -10919,6 +10953,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = from gateway.status import ( acquire_gateway_runtime_lock, get_running_pid, + get_process_start_time, release_gateway_runtime_lock, remove_pid_file, terminate_pid, @@ -10926,6 +10961,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = existing_pid = get_running_pid() if existing_pid is not None and existing_pid != os.getpid(): if replace: + existing_start_time = get_process_start_time(existing_pid) logger.info( "Replacing existing gateway instance (PID %d) with --replace.", existing_pid, @@ -10994,7 +11030,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = # leaving stale lock files that block the new gateway from starting. try: from gateway.status import release_all_scoped_locks - _released = release_all_scoped_locks() + _released = release_all_scoped_locks( + owner_pid=existing_pid, + owner_start_time=existing_start_time, + ) if _released: logger.info("Released %d stale scoped lock(s) from old gateway.", _released) except Exception: diff --git a/gateway/session.py b/gateway/session.py index db90d3121..fe12e6ab3 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -60,6 +60,10 @@ from .config import ( SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py HomeChannel, ) +from .whatsapp_identity import ( + canonical_whatsapp_identifier, + normalize_whatsapp_identifier, +) @dataclass @@ -281,6 +285,18 @@ def build_session_context_prompt( "Do not promise to perform these actions. If the user asks, explain " "that you can only read messages sent directly to you and respond." ) + elif context.source.platform == Platform.BLUEBUBBLES: + lines.append("") + lines.append( + "**Platform notes:** You are responding via iMessage. " + "Keep responses short and conversational — think texts, not essays. " + "Structure longer replies as separate short thoughts, each separated " + "by a blank line (double newline). Each block between blank lines " + "will be delivered as its own iMessage bubble, so write accordingly: " + "one idea per bubble, 1–3 sentences each. " + "If the user needs a detailed answer, give the short version first " + "and offer to elaborate." + ) # Connected platforms platforms_list = ["local (files on this machine)"] @@ -518,15 +534,24 @@ def build_session_key( """ platform = source.platform.value if source.chat_type == "dm": - if source.chat_id: + dm_chat_id = source.chat_id + if source.platform == Platform.WHATSAPP: + dm_chat_id = canonical_whatsapp_identifier(source.chat_id) + + if dm_chat_id: if source.thread_id: - return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}" - return f"agent:main:{platform}:dm:{source.chat_id}" + return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}" + return f"agent:main:{platform}:dm:{dm_chat_id}" if source.thread_id: return f"agent:main:{platform}:dm:{source.thread_id}" return f"agent:main:{platform}:dm" participant_id = source.user_id_alt or source.user_id + if participant_id and source.platform == Platform.WHATSAPP: + # Same JID/LID-flip bug as the DM case: without canonicalisation, a + # single group member gets two isolated per-user sessions when the + # bridge reshuffles alias forms. + participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id key_parts = ["agent:main", platform, source.chat_type] if source.chat_id: diff --git a/gateway/status.py b/gateway/status.py index 9e373564d..7f7df182f 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -113,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]: return None +def get_process_start_time(pid: int) -> Optional[int]: + """Public wrapper for retrieving a process start time when available.""" + return _get_process_start_time(pid) + + def _read_process_cmdline(pid: int) -> Optional[str]: """Return the process command line as a space-separated string.""" cmdline_path = Path(f"/proc/{pid}/cmdline") @@ -562,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None: pass -def release_all_scoped_locks() -> int: - """Remove all scoped lock files in the lock directory. +def release_all_scoped_locks( + *, + owner_pid: Optional[int] = None, + owner_start_time: Optional[int] = None, +) -> int: + """Remove scoped lock files in the lock directory. Called during --replace to clean up stale locks left by stopped/killed - gateway processes that did not release their locks gracefully. + gateway processes that did not release their locks gracefully. When an + ``owner_pid`` is provided, only lock records belonging to that gateway + process are removed. ``owner_start_time`` further narrows the match to + protect against PID reuse. + + When no owner is provided, preserves the legacy behavior and removes every + scoped lock file in the directory. + Returns the number of lock files removed. """ lock_dir = _get_lock_dir() removed = 0 if lock_dir.exists(): for lock_file in lock_dir.glob("*.lock"): + if owner_pid is not None: + record = _read_json_file(lock_file) + if not isinstance(record, dict): + continue + try: + record_pid = int(record.get("pid")) + except (TypeError, ValueError): + continue + if record_pid != owner_pid: + continue + if ( + owner_start_time is not None + and record.get("start_time") != owner_start_time + ): + continue try: lock_file.unlink(missing_ok=True) removed += 1 diff --git a/gateway/whatsapp_identity.py b/gateway/whatsapp_identity.py new file mode 100644 index 000000000..b0792daf7 --- /dev/null +++ b/gateway/whatsapp_identity.py @@ -0,0 +1,135 @@ +"""Shared helpers for canonicalising WhatsApp sender identity. + +WhatsApp's bridge can surface the same human under two different JID shapes +within a single conversation: + +- LID form: ``999999999999999@lid`` +- Phone form: ``15551234567@s.whatsapp.net`` + +Both the authorisation path (:mod:`gateway.run`) and the session-key path +(:mod:`gateway.session`) need to collapse these aliases to a single stable +identity. This module is the single source of truth for that resolution so +the two paths can never drift apart. + +Public helpers: + +- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax + down to the bare numeric identifier. +- :func:`canonical_whatsapp_identifier` — walk the bridge's + ``lid-mapping-*.json`` files and return a stable canonical identity + across phone/LID variants. +- :func:`expand_whatsapp_aliases` — return the full alias set for an + identifier. Used by authorisation code that needs to match any known + form of a sender against an allow-list. + +Plugins that need per-sender behaviour on WhatsApp (role-based routing, +per-contact authorisation, policy gating in a gateway hook) should use +``canonical_whatsapp_identifier`` so their bookkeeping lines up with +Hermes' own session keys. +""" + +from __future__ import annotations + +import json +from typing import Set + +from hermes_constants import get_hermes_home + + +def normalize_whatsapp_identifier(value: str) -> str: + """Strip WhatsApp JID/LID syntax down to its stable numeric identifier. + + Accepts any of the identifier shapes the WhatsApp bridge may emit: + ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``, + ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``. + Returns just the numeric identifier (``"60123456789"``) suitable for + equality comparisons. + + Useful for plugins that want to match sender IDs against + user-supplied config (phone numbers in ``config.yaml``) without + worrying about which variant the bridge happens to deliver. + """ + return ( + str(value or "") + .strip() + .replace("+", "", 1) + .split(":", 1)[0] + .split("@", 1)[0] + ) + + +def expand_whatsapp_aliases(identifier: str) -> Set[str]: + """Resolve WhatsApp phone/LID aliases via bridge session mapping files. + + Returns the set of all identifiers transitively reachable through the + bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files, + starting from ``identifier``. The result always includes the + normalized input itself, so callers can safely ``in`` check against + the return value without a separate fallback branch. + + Returns an empty set if ``identifier`` normalizes to empty. + """ + normalized = normalize_whatsapp_identifier(identifier) + if not normalized: + return set() + + session_dir = get_hermes_home() / "whatsapp" / "session" + resolved: Set[str] = set() + queue = [normalized] + + while queue: + current = queue.pop(0) + if not current or current in resolved: + continue + + resolved.add(current) + for suffix in ("", "_reverse"): + mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" + if not mapping_path.exists(): + continue + try: + mapped = normalize_whatsapp_identifier( + json.loads(mapping_path.read_text(encoding="utf-8")) + ) + except Exception: + continue + if mapped and mapped not in resolved: + queue.append(mapped) + + return resolved + + +def canonical_whatsapp_identifier(identifier: str) -> str: + """Return a stable WhatsApp sender identity across phone-JID/LID variants. + + WhatsApp may surface the same person under either a phone-format JID + (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This + applies to a DM ``chat_id`` *and* to the ``participant_id`` of a + member inside a group chat — both represent a user identity, and the + bridge may flip between the two for the same human. + + This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json`` + files, walks the mapping transitively, and picks the shortest + (numeric-preferred) alias as the canonical identity. + :func:`gateway.session.build_session_key` uses this for both WhatsApp + DM chat_ids and WhatsApp group participant_ids, so callers get the + same session-key identity Hermes itself uses. + + Plugins that need per-sender behaviour (role-based routing, + authorisation, per-contact policy) should use this so their + bookkeeping lines up with Hermes' session bookkeeping even when + the bridge reshuffles aliases. + + Returns an empty string if ``identifier`` normalizes to empty. If no + mapping files exist yet (fresh bridge install), returns the + normalized input unchanged. + """ + normalized = normalize_whatsapp_identifier(identifier) + if not normalized: + return "" + + # expand_whatsapp_aliases always includes `normalized` itself in the + # returned set, so the min() below degrades gracefully to `normalized` + # when no lid-mapping files are present. + aliases = expand_whatsapp_aliases(normalized) + return min(aliases, key=lambda candidate: (len(candidate), candidate)) diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py index b9879e3b5..2bf9acb40 100644 --- a/hermes_cli/__init__.py +++ b/hermes_cli/__init__.py @@ -11,5 +11,5 @@ Provides subcommands for: - hermes cron - Manage cron jobs """ -__version__ = "0.10.0" -__release_date__ = "2026.4.16" +__version__ = "0.11.0" +__release_date__ = "2026.4.23" diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 98ac4edb3..00685436d 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -22,6 +22,7 @@ import shutil import shlex import ssl import stat +import sys import base64 import hashlib import subprocess @@ -32,8 +33,10 @@ import webbrowser from contextlib import contextmanager from dataclasses import dataclass, field from datetime import datetime, timezone +from http.server import BaseHTTPRequestHandler, HTTPServer from pathlib import Path from typing import Any, Dict, List, Optional +from urllib.parse import parse_qs, urlencode, urlparse import httpx import yaml @@ -80,6 +83,27 @@ CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56" QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token" QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 +DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL = "https://accounts.spotify.com" +DEFAULT_SPOTIFY_API_BASE_URL = "https://api.spotify.com/v1" +DEFAULT_SPOTIFY_REDIRECT_URI = "http://127.0.0.1:43827/spotify/callback" +SPOTIFY_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify" +SPOTIFY_DASHBOARD_URL = "https://developer.spotify.com/dashboard" +SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 +DEFAULT_SPOTIFY_SCOPE = " ".join(( + "user-modify-playback-state", + "user-read-playback-state", + "user-read-currently-playing", + "user-read-recently-played", + "playlist-read-private", + "playlist-read-collaborative", + "playlist-modify-public", + "playlist-modify-private", + "user-library-read", + "user-library-modify", +)) +SERVICE_PROVIDER_NAMES: Dict[str, str] = { + "spotify": "Spotify", +} # Google Gemini OAuth (google-gemini-cli provider, Cloud Code Assist backend) DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google" @@ -224,6 +248,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("DASHSCOPE_API_KEY",), base_url_env_var="DASHSCOPE_BASE_URL", ), + "alibaba-coding-plan": ProviderConfig( + id="alibaba-coding-plan", + name="Alibaba Cloud (Coding Plan)", + auth_type="api_key", + inference_base_url="https://coding-intl.dashscope.aliyuncs.com/v1", + api_key_env_vars=("ALIBABA_CODING_PLAN_API_KEY", "DASHSCOPE_API_KEY"), + base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL", + ), "minimax-cn": ProviderConfig( id="minimax-cn", name="MiniMax (China)", @@ -417,10 +449,10 @@ def _resolve_api_key_provider_secret( if provider_id == "copilot": # Use the dedicated copilot auth module for proper token validation try: - from hermes_cli.copilot_auth import resolve_copilot_token + from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token token, source = resolve_copilot_token() if token: - return token, source + return get_copilot_api_token(token), source except ValueError as exc: logger.warning("Copilot token validation failed: %s", exc) except Exception: @@ -619,7 +651,25 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any # ============================================================================= def _auth_file_path() -> Path: - return get_hermes_home() / "auth.json" + path = get_hermes_home() / "auth.json" + # Seat belt: if pytest is running and HERMES_HOME resolves to the real + # user's auth store, refuse rather than silently corrupt it. This catches + # tests that forgot to monkeypatch HERMES_HOME, tests invoked without the + # hermetic conftest, or sandbox escapes via threads/subprocesses. In + # production (no PYTEST_CURRENT_TEST) this is a single dict lookup. + if os.environ.get("PYTEST_CURRENT_TEST"): + real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False) + try: + resolved = path.resolve(strict=False) + except Exception: + resolved = path + if resolved == real_home_auth: + raise RuntimeError( + f"Refusing to touch real user auth store during test run: {path}. " + "Set HERMES_HOME to a tmp_path in your test fixture, or run " + "via scripts/run_tests.sh for hermetic CI-parity env." + ) + return path def _auth_lock_path() -> Path: @@ -693,7 +743,18 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]: try: raw = json.loads(auth_file.read_text()) - except Exception: + except Exception as exc: + corrupt_path = auth_file.with_suffix(".json.corrupt") + try: + import shutil + shutil.copy2(auth_file, corrupt_path) + except Exception: + pass + logger.warning( + "auth: failed to parse %s (%s) — starting with empty store. " + "Corrupt file preserved at %s", + auth_file, exc, corrupt_path, + ) return {"version": AUTH_STORE_VERSION, "providers": {}} if isinstance(raw, dict) and ( @@ -768,6 +829,34 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di auth_store["active_provider"] = provider_id +def _store_provider_state( + auth_store: Dict[str, Any], + provider_id: str, + state: Dict[str, Any], + *, + set_active: bool = True, +) -> None: + providers = auth_store.setdefault("providers", {}) + if not isinstance(providers, dict): + auth_store["providers"] = {} + providers = auth_store["providers"] + providers[provider_id] = state + if set_active: + auth_store["active_provider"] = provider_id + + +def is_known_auth_provider(provider_id: str) -> bool: + normalized = (provider_id or "").strip().lower() + return normalized in PROVIDER_REGISTRY or normalized in SERVICE_PROVIDER_NAMES + + +def get_auth_provider_display_name(provider_id: str) -> str: + normalized = (provider_id or "").strip().lower() + if normalized in PROVIDER_REGISTRY: + return PROVIDER_REGISTRY[normalized].name + return SERVICE_PROVIDER_NAMES.get(normalized, provider_id) + + def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]: """Return the persisted credential pool, or one provider slice.""" auth_store = _load_auth_store() @@ -928,10 +1017,12 @@ def clear_provider_auth(provider_id: Optional[str] = None) -> bool: del pool[target] cleared = True - if not cleared: - return False if auth_store.get("active_provider") == target: auth_store["active_provider"] = None + cleared = True + + if not cleared: + return False _save_auth_store(auth_store) return True @@ -1006,6 +1097,8 @@ def resolve_provider( "step": "stepfun", "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", + "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan", + "alibaba_coding_plan": "alibaba-coding-plan", "claude": "anthropic", "claude-code": "anthropic", "github": "copilot", "github-copilot": "copilot", "github-models": "copilot", "github-model": "copilot", @@ -1398,8 +1491,597 @@ def get_gemini_oauth_auth_status() -> Dict[str, Any]: "email": creds.email, "project_id": creds.project_id, } +# Spotify auth — PKCE tokens stored in ~/.hermes/auth.json +# ============================================================================= +def _spotify_scope_list(raw_scope: Optional[str] = None) -> List[str]: + scope_text = (raw_scope or DEFAULT_SPOTIFY_SCOPE).strip() + scopes = [part for part in scope_text.split() if part] + seen: set[str] = set() + ordered: List[str] = [] + for scope in scopes: + if scope not in seen: + seen.add(scope) + ordered.append(scope) + return ordered + + +def _spotify_scope_string(raw_scope: Optional[str] = None) -> str: + return " ".join(_spotify_scope_list(raw_scope)) + + +def _spotify_client_id( + explicit: Optional[str] = None, + state: Optional[Dict[str, Any]] = None, +) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + explicit, + get_env_value("HERMES_SPOTIFY_CLIENT_ID"), + get_env_value("SPOTIFY_CLIENT_ID"), + state.get("client_id") if isinstance(state, dict) else None, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip() + if cleaned: + return cleaned + raise AuthError( + "Spotify client_id is required. Set HERMES_SPOTIFY_CLIENT_ID or pass --client-id.", + provider="spotify", + code="spotify_client_id_missing", + ) + + +def _spotify_redirect_uri( + explicit: Optional[str] = None, + state: Optional[Dict[str, Any]] = None, +) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + explicit, + get_env_value("HERMES_SPOTIFY_REDIRECT_URI"), + get_env_value("SPOTIFY_REDIRECT_URI"), + state.get("redirect_uri") if isinstance(state, dict) else None, + DEFAULT_SPOTIFY_REDIRECT_URI, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip() + if cleaned: + return cleaned + return DEFAULT_SPOTIFY_REDIRECT_URI + + +def _spotify_api_base_url(state: Optional[Dict[str, Any]] = None) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + get_env_value("HERMES_SPOTIFY_API_BASE_URL"), + state.get("api_base_url") if isinstance(state, dict) else None, + DEFAULT_SPOTIFY_API_BASE_URL, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip().rstrip("/") + if cleaned: + return cleaned + return DEFAULT_SPOTIFY_API_BASE_URL + + +def _spotify_accounts_base_url(state: Optional[Dict[str, Any]] = None) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + get_env_value("HERMES_SPOTIFY_ACCOUNTS_BASE_URL"), + state.get("accounts_base_url") if isinstance(state, dict) else None, + DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip().rstrip("/") + if cleaned: + return cleaned + return DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL + + +def _spotify_code_verifier(length: int = 64) -> str: + raw = base64.urlsafe_b64encode(os.urandom(length)).decode("ascii") + return raw.rstrip("=")[:128] + + +def _spotify_code_challenge(code_verifier: str) -> str: + digest = hashlib.sha256(code_verifier.encode("utf-8")).digest() + return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=") + + +def _spotify_build_authorize_url( + *, + client_id: str, + redirect_uri: str, + scope: str, + state: str, + code_challenge: str, + accounts_base_url: str, +) -> str: + query = urlencode({ + "client_id": client_id, + "response_type": "code", + "redirect_uri": redirect_uri, + "scope": scope, + "state": state, + "code_challenge_method": "S256", + "code_challenge": code_challenge, + }) + return f"{accounts_base_url}/authorize?{query}" + + +def _spotify_validate_redirect_uri(redirect_uri: str) -> tuple[str, int, str]: + parsed = urlparse(redirect_uri) + if parsed.scheme != "http": + raise AuthError( + "Spotify PKCE redirect_uri must use http://localhost or http://127.0.0.1.", + provider="spotify", + code="spotify_redirect_invalid", + ) + host = parsed.hostname or "" + if host not in {"127.0.0.1", "localhost"}: + raise AuthError( + "Spotify PKCE redirect_uri must point to localhost or 127.0.0.1.", + provider="spotify", + code="spotify_redirect_invalid", + ) + if not parsed.port: + raise AuthError( + "Spotify PKCE redirect_uri must include an explicit localhost port.", + provider="spotify", + code="spotify_redirect_invalid", + ) + return host, parsed.port, parsed.path or "/" + + +def _make_spotify_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequestHandler], dict[str, Any]]: + result: dict[str, Any] = { + "code": None, + "state": None, + "error": None, + "error_description": None, + } + + class _SpotifyCallbackHandler(BaseHTTPRequestHandler): + def do_GET(self) -> None: # noqa: N802 + parsed = urlparse(self.path) + if parsed.path != expected_path: + self.send_response(404) + self.end_headers() + self.wfile.write(b"Not found.") + return + + params = parse_qs(parsed.query) + result["code"] = params.get("code", [None])[0] + result["state"] = params.get("state", [None])[0] + result["error"] = params.get("error", [None])[0] + result["error_description"] = params.get("error_description", [None])[0] + + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + if result["error"]: + body = "

Spotify authorization failed.

You can close this tab." + else: + body = "

Spotify authorization received.

You can close this tab." + self.wfile.write(body.encode("utf-8")) + + def log_message(self, format: str, *args: Any) -> None: # noqa: A003 + return + + return _SpotifyCallbackHandler, result + + +def _spotify_wait_for_callback( + redirect_uri: str, + *, + timeout_seconds: float = 180.0, +) -> dict[str, Any]: + host, port, path = _spotify_validate_redirect_uri(redirect_uri) + handler_cls, result = _make_spotify_callback_handler(path) + + class _ReuseHTTPServer(HTTPServer): + allow_reuse_address = True + + try: + server = _ReuseHTTPServer((host, port), handler_cls) + except OSError as exc: + raise AuthError( + f"Could not bind Spotify callback server on {host}:{port}: {exc}", + provider="spotify", + code="spotify_callback_bind_failed", + ) from exc + + thread = threading.Thread(target=server.serve_forever, kwargs={"poll_interval": 0.1}, daemon=True) + thread.start() + deadline = time.time() + max(5.0, timeout_seconds) + try: + while time.time() < deadline: + if result["code"] or result["error"]: + return result + time.sleep(0.1) + finally: + server.shutdown() + server.server_close() + thread.join(timeout=1.0) + raise AuthError( + "Spotify authorization timed out waiting for the local callback.", + provider="spotify", + code="spotify_callback_timeout", + ) + + +def _spotify_token_payload_to_state( + token_payload: Dict[str, Any], + *, + client_id: str, + redirect_uri: str, + requested_scope: str, + accounts_base_url: str, + api_base_url: str, + previous_state: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + now = datetime.now(timezone.utc) + expires_in = _coerce_ttl_seconds(token_payload.get("expires_in", 0)) + expires_at = datetime.fromtimestamp(now.timestamp() + expires_in, tz=timezone.utc) + state = dict(previous_state or {}) + state.update({ + "client_id": client_id, + "redirect_uri": redirect_uri, + "accounts_base_url": accounts_base_url, + "api_base_url": api_base_url, + "scope": requested_scope, + "granted_scope": str(token_payload.get("scope") or requested_scope).strip(), + "token_type": str(token_payload.get("token_type", "Bearer") or "Bearer").strip() or "Bearer", + "access_token": str(token_payload.get("access_token", "") or "").strip(), + "refresh_token": str( + token_payload.get("refresh_token") + or state.get("refresh_token") + or "" + ).strip(), + "obtained_at": now.isoformat(), + "expires_at": expires_at.isoformat(), + "expires_in": expires_in, + "auth_type": "oauth_pkce", + }) + return state + + +def _spotify_exchange_code_for_tokens( + *, + client_id: str, + code: str, + redirect_uri: str, + code_verifier: str, + accounts_base_url: str, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + try: + response = httpx.post( + f"{accounts_base_url}/api/token", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "client_id": client_id, + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "code_verifier": code_verifier, + }, + timeout=timeout_seconds, + ) + except Exception as exc: + raise AuthError( + f"Spotify token exchange failed: {exc}", + provider="spotify", + code="spotify_token_exchange_failed", + ) from exc + + if response.status_code >= 400: + detail = response.text.strip() + raise AuthError( + "Spotify token exchange failed." + + (f" Response: {detail}" if detail else ""), + provider="spotify", + code="spotify_token_exchange_failed", + ) + payload = response.json() + if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip(): + raise AuthError( + "Spotify token response did not include an access_token.", + provider="spotify", + code="spotify_token_exchange_invalid", + ) + return payload + + +def _refresh_spotify_oauth_state( + state: Dict[str, Any], + *, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + refresh_token = str(state.get("refresh_token", "") or "").strip() + if not refresh_token: + raise AuthError( + "Spotify refresh token missing. Run `hermes auth spotify` again.", + provider="spotify", + code="spotify_refresh_token_missing", + relogin_required=True, + ) + + client_id = _spotify_client_id(state=state) + accounts_base_url = _spotify_accounts_base_url(state) + try: + response = httpx.post( + f"{accounts_base_url}/api/token", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + }, + timeout=timeout_seconds, + ) + except Exception as exc: + raise AuthError( + f"Spotify token refresh failed: {exc}", + provider="spotify", + code="spotify_refresh_failed", + ) from exc + + if response.status_code >= 400: + detail = response.text.strip() + raise AuthError( + "Spotify token refresh failed. Run `hermes auth spotify` again." + + (f" Response: {detail}" if detail else ""), + provider="spotify", + code="spotify_refresh_failed", + relogin_required=True, + ) + + payload = response.json() + if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip(): + raise AuthError( + "Spotify refresh response did not include an access_token.", + provider="spotify", + code="spotify_refresh_invalid", + relogin_required=True, + ) + + return _spotify_token_payload_to_state( + payload, + client_id=client_id, + redirect_uri=_spotify_redirect_uri(state=state), + requested_scope=str(state.get("scope") or DEFAULT_SPOTIFY_SCOPE), + accounts_base_url=accounts_base_url, + api_base_url=_spotify_api_base_url(state), + previous_state=state, + ) + + +def resolve_spotify_runtime_credentials( + *, + force_refresh: bool = False, + refresh_if_expiring: bool = True, + refresh_skew_seconds: int = SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, +) -> Dict[str, Any]: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "spotify") + if not state: + raise AuthError( + "Spotify is not authenticated. Run `hermes auth spotify` first.", + provider="spotify", + code="spotify_auth_missing", + relogin_required=True, + ) + + should_refresh = bool(force_refresh) + if not should_refresh and refresh_if_expiring: + should_refresh = _is_expiring(state.get("expires_at"), refresh_skew_seconds) + if should_refresh: + state = _refresh_spotify_oauth_state(state) + _store_provider_state(auth_store, "spotify", state, set_active=False) + _save_auth_store(auth_store) + + access_token = str(state.get("access_token", "") or "").strip() + if not access_token: + raise AuthError( + "Spotify access token missing. Run `hermes auth spotify` again.", + provider="spotify", + code="spotify_access_token_missing", + relogin_required=True, + ) + + return { + "provider": "spotify", + "access_token": access_token, + "api_key": access_token, + "token_type": str(state.get("token_type", "Bearer") or "Bearer"), + "base_url": _spotify_api_base_url(state), + "scope": str(state.get("granted_scope") or state.get("scope") or "").strip(), + "client_id": _spotify_client_id(state=state), + "redirect_uri": _spotify_redirect_uri(state=state), + "expires_at": state.get("expires_at"), + "refresh_token": str(state.get("refresh_token", "") or "").strip(), + } + + +def get_spotify_auth_status() -> Dict[str, Any]: + state = get_provider_auth_state("spotify") + if not state: + return {"logged_in": False} + + expires_at = state.get("expires_at") + refresh_token = str(state.get("refresh_token", "") or "").strip() + return { + "logged_in": bool(refresh_token or not _is_expiring(expires_at, 0)), + "auth_type": state.get("auth_type", "oauth_pkce"), + "client_id": state.get("client_id"), + "redirect_uri": state.get("redirect_uri"), + "scope": state.get("granted_scope") or state.get("scope"), + "expires_at": expires_at, + "api_base_url": state.get("api_base_url"), + "has_refresh_token": bool(refresh_token), + } + + +def _spotify_interactive_setup(redirect_uri_hint: str) -> str: + """Walk the user through creating a Spotify developer app, persist the + resulting client_id to ~/.hermes/.env, and return it. + + Raises SystemExit if the user aborts or submits an empty value. + """ + from hermes_cli.config import save_env_value + + print() + print("=" * 70) + print("Spotify first-time setup") + print("=" * 70) + print() + print("Spotify requires every user to register their own lightweight") + print("developer app. This takes about two minutes and only has to be") + print("done once per machine.") + print() + print(f"Full guide: {SPOTIFY_DOCS_URL}") + print() + print("Steps:") + print(f" 1. Opening {SPOTIFY_DASHBOARD_URL} in your browser...") + print(" 2. Click 'Create app' and fill in:") + print(" App name: anything (e.g. hermes-agent)") + print(" Description: anything") + print(f" Redirect URI: {redirect_uri_hint}") + print(" API/SDK: Web API") + print(" 3. Agree to the terms, click Save.") + print(" 4. Open the app's Settings page and copy the Client ID.") + print(" 5. Paste it below.") + print() + + if not _is_remote_session(): + try: + webbrowser.open(SPOTIFY_DASHBOARD_URL) + except Exception: + pass + + try: + raw = input("Spotify Client ID: ").strip() + except (EOFError, KeyboardInterrupt): + print() + raise SystemExit("Spotify setup cancelled.") + + if not raw: + print() + print(f"No Client ID entered. See {SPOTIFY_DOCS_URL} for the full guide.") + raise SystemExit("Spotify setup cancelled: empty Client ID.") + + # Persist so subsequent `hermes auth spotify` runs skip the wizard. + save_env_value("HERMES_SPOTIFY_CLIENT_ID", raw) + # Only persist the redirect URI if it's non-default, to avoid pinning + # users to a value the default might later change to. + if redirect_uri_hint and redirect_uri_hint != DEFAULT_SPOTIFY_REDIRECT_URI: + save_env_value("HERMES_SPOTIFY_REDIRECT_URI", redirect_uri_hint) + + print() + print("Saved HERMES_SPOTIFY_CLIENT_ID to ~/.hermes/.env") + print() + return raw + + +def login_spotify_command(args) -> None: + existing_state = get_provider_auth_state("spotify") or {} + + # Interactive wizard: if no client_id is configured anywhere, walk the + # user through creating the Spotify developer app instead of crashing + # with "HERMES_SPOTIFY_CLIENT_ID is required". + explicit_client_id = getattr(args, "client_id", None) + try: + client_id = _spotify_client_id(explicit_client_id, existing_state) + except AuthError as exc: + if getattr(exc, "code", "") != "spotify_client_id_missing": + raise + client_id = _spotify_interactive_setup( + redirect_uri_hint=getattr(args, "redirect_uri", None) or DEFAULT_SPOTIFY_REDIRECT_URI, + ) + + redirect_uri = _spotify_redirect_uri(getattr(args, "redirect_uri", None), existing_state) + scope = _spotify_scope_string(getattr(args, "scope", None) or existing_state.get("scope")) + accounts_base_url = _spotify_accounts_base_url(existing_state) + api_base_url = _spotify_api_base_url(existing_state) + open_browser = not getattr(args, "no_browser", False) + + code_verifier = _spotify_code_verifier() + code_challenge = _spotify_code_challenge(code_verifier) + state_nonce = uuid.uuid4().hex + authorize_url = _spotify_build_authorize_url( + client_id=client_id, + redirect_uri=redirect_uri, + scope=scope, + state=state_nonce, + code_challenge=code_challenge, + accounts_base_url=accounts_base_url, + ) + + print("Starting Spotify PKCE login...") + print(f"Client ID: {client_id}") + print(f"Redirect URI: {redirect_uri}") + print("Make sure this redirect URI is allow-listed in your Spotify app settings.") + print() + print("Open this URL to authorize Hermes:") + print(authorize_url) + print() + print(f"Full setup guide: {SPOTIFY_DOCS_URL}") + print() + + if open_browser and not _is_remote_session(): + try: + opened = webbrowser.open(authorize_url) + except Exception: + opened = False + if opened: + print("Browser opened for Spotify authorization.") + else: + print("Could not open the browser automatically; use the URL above.") + + callback = _spotify_wait_for_callback( + redirect_uri, + timeout_seconds=float(getattr(args, "timeout", None) or 180.0), + ) + if callback.get("error"): + detail = callback.get("error_description") or callback["error"] + raise SystemExit(f"Spotify authorization failed: {detail}") + if callback.get("state") != state_nonce: + raise SystemExit("Spotify authorization failed: state mismatch.") + + token_payload = _spotify_exchange_code_for_tokens( + client_id=client_id, + code=str(callback.get("code") or ""), + redirect_uri=redirect_uri, + code_verifier=code_verifier, + accounts_base_url=accounts_base_url, + timeout_seconds=float(getattr(args, "timeout", None) or 20.0), + ) + spotify_state = _spotify_token_payload_to_state( + token_payload, + client_id=client_id, + redirect_uri=redirect_uri, + requested_scope=scope, + accounts_base_url=accounts_base_url, + api_base_url=api_base_url, + ) + + with _auth_store_lock(): + auth_store = _load_auth_store() + _store_provider_state(auth_store, "spotify", spotify_state, set_active=False) + saved_to = _save_auth_store(auth_store) + + print("Spotify login successful!") + print(f" Auth state: {saved_to}") + print(" Provider state saved under providers.spotify") + print(f" Docs: {SPOTIFY_DOCS_URL}") # ============================================================================= # SSH / remote session detection @@ -1516,12 +2198,21 @@ def refresh_codex_oauth_pure( try: err = response.json() if isinstance(err, dict): - err_code = err.get("error") - if isinstance(err_code, str) and err_code.strip(): - code = err_code.strip() - err_desc = err.get("error_description") or err.get("message") - if isinstance(err_desc, str) and err_desc.strip(): - message = f"Codex token refresh failed: {err_desc.strip()}" + err_obj = err.get("error") + # OpenAI shape: {"error": {"code": "...", "message": "...", "type": "..."}} + if isinstance(err_obj, dict): + nested_code = err_obj.get("code") or err_obj.get("type") + if isinstance(nested_code, str) and nested_code.strip(): + code = nested_code.strip() + nested_msg = err_obj.get("message") + if isinstance(nested_msg, str) and nested_msg.strip(): + message = f"Codex token refresh failed: {nested_msg.strip()}" + # OAuth spec shape: {"error": "code_str", "error_description": "..."} + elif isinstance(err_obj, str) and err_obj.strip(): + code = err_obj.strip() + err_desc = err.get("error_description") or err.get("message") + if isinstance(err_desc, str) and err_desc.strip(): + message = f"Codex token refresh failed: {err_desc.strip()}" except Exception: pass if code in {"invalid_grant", "invalid_token", "invalid_request"}: @@ -1680,6 +2371,24 @@ def resolve_codex_runtime_credentials( # TLS verification helper # ============================================================================= +def _default_verify() -> bool | ssl.SSLContext: + """Platform-aware default SSL verify for httpx clients. + + On macOS with Homebrew Python, the system OpenSSL cannot locate the + system trust store and valid public certs fail verification. When + certifi is importable we pin its bundle explicitly; elsewhere we + defer to httpx's built-in default (certifi via its own dependency). + Mirrors the weixin fix in 3a0ec1d93. + """ + if sys.platform == "darwin": + try: + import certifi + return ssl.create_default_context(cafile=certifi.where()) + except ImportError: + pass + return True + + def _resolve_verify( *, insecure: Optional[bool] = None, @@ -1698,6 +2407,7 @@ def _resolve_verify( or tls_state.get("ca_bundle") or os.getenv("HERMES_CA_BUNDLE") or os.getenv("SSL_CERT_FILE") + or os.getenv("REQUESTS_CA_BUNDLE") ) if effective_insecure: @@ -1709,9 +2419,9 @@ def _resolve_verify( "CA bundle path does not exist: %s — falling back to default certificates", ca_path, ) - return True + return _default_verify() return ssl.create_default_context(cafile=ca_path) - return True + return _default_verify() # ============================================================================= @@ -1830,6 +2540,28 @@ def _refresh_access_token( code = str(error_payload.get("error", "invalid_grant")) description = str(error_payload.get("error_description") or "Refresh token exchange failed") relogin = code in {"invalid_grant", "invalid_token"} + + # Detect the OAuth 2.1 "refresh token reuse" signal from the Nous portal + # server and surface an actionable message. This fires when an external + # process (health-check script, monitoring tool, custom self-heal hook) + # called POST /api/oauth/token with Hermes's refresh_token without + # persisting the rotated token back to auth.json — the server then + # retires the original RT, Hermes's next refresh uses it, and the whole + # session chain gets revoked as a token-theft signal (#15099). + lowered = description.lower() + if "reuse" in lowered or "reuse detected" in lowered: + description = ( + "Nous Portal detected refresh-token reuse and revoked this session.\n" + "This usually means an external process (monitoring script, " + "custom self-heal hook, or another Hermes install sharing " + "~/.hermes/auth.json) called POST /api/oauth/token with Hermes's " + "refresh token without persisting the rotated token back.\n" + "Nous refresh tokens are single-use — only Hermes may call the " + "refresh endpoint. For health checks, use `hermes auth status` " + "instead.\n" + "Re-authenticate with: hermes auth add nous" + ) + raise AuthError(description, provider="nous", code=code, relogin_required=relogin) @@ -2438,59 +3170,116 @@ def resolve_nous_runtime_credentials( # Status helpers # ============================================================================= -def get_nous_auth_status() -> Dict[str, Any]: - """Status snapshot for `hermes status` output. +def _empty_nous_auth_status() -> Dict[str, Any]: + return { + "logged_in": False, + "portal_base_url": None, + "inference_base_url": None, + "access_expires_at": None, + "agent_key_expires_at": None, + "has_refresh_token": False, + } - Checks the credential pool first (where the dashboard device-code flow - and ``hermes auth`` store credentials), then falls back to the legacy - auth-store provider state. + +def _snapshot_nous_pool_status() -> Dict[str, Any]: + """Best-effort status from the credential pool. + + This is a fallback only. The auth-store provider state is the runtime source + of truth because it is what ``resolve_nous_runtime_credentials()`` refreshes + and mints against. """ - # Check credential pool first — the dashboard device-code flow saves - # here but may not have written to the auth store yet. try: from agent.credential_pool import load_pool - pool = load_pool("nous") - if pool and pool.has_credentials(): - entry = pool.select() - if entry is not None: - access_token = ( - getattr(entry, "access_token", None) - or getattr(entry, "runtime_api_key", "") - ) - if access_token: - return { - "logged_in": True, - "portal_base_url": getattr(entry, "portal_base_url", None) - or getattr(entry, "base_url", None), - "inference_base_url": getattr(entry, "inference_base_url", None) - or getattr(entry, "base_url", None), - "access_token": access_token, - "access_expires_at": getattr(entry, "expires_at", None), - "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None), - "has_refresh_token": bool(getattr(entry, "refresh_token", None)), - } - except Exception: - pass - # Fall back to auth-store provider state - state = get_provider_auth_state("nous") - if not state: + pool = load_pool("nous") + if not pool or not pool.has_credentials(): + return _empty_nous_auth_status() + + entries = list(pool.entries()) + if not entries: + return _empty_nous_auth_status() + + def _entry_sort_key(entry: Any) -> tuple[float, float, int]: + agent_exp = _parse_iso_timestamp(getattr(entry, "agent_key_expires_at", None)) or 0.0 + access_exp = _parse_iso_timestamp(getattr(entry, "expires_at", None)) or 0.0 + priority = int(getattr(entry, "priority", 0) or 0) + return (agent_exp, access_exp, -priority) + + entry = max(entries, key=_entry_sort_key) + access_token = ( + getattr(entry, "access_token", None) + or getattr(entry, "runtime_api_key", "") + ) + if not access_token: + return _empty_nous_auth_status() + return { - "logged_in": False, - "portal_base_url": None, - "inference_base_url": None, - "access_expires_at": None, - "agent_key_expires_at": None, - "has_refresh_token": False, + "logged_in": True, + "portal_base_url": getattr(entry, "portal_base_url", None) + or getattr(entry, "base_url", None), + "inference_base_url": getattr(entry, "inference_base_url", None) + or getattr(entry, "base_url", None), + "access_token": access_token, + "access_expires_at": getattr(entry, "expires_at", None), + "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None), + "has_refresh_token": bool(getattr(entry, "refresh_token", None)), + "source": f"pool:{getattr(entry, 'label', 'unknown')}", } - return { - "logged_in": bool(state.get("access_token")), - "portal_base_url": state.get("portal_base_url"), - "inference_base_url": state.get("inference_base_url"), - "access_expires_at": state.get("expires_at"), - "agent_key_expires_at": state.get("agent_key_expires_at"), - "has_refresh_token": bool(state.get("refresh_token")), - } + except Exception: + return _empty_nous_auth_status() + + +def get_nous_auth_status() -> Dict[str, Any]: + """Status snapshot for Nous auth. + + Prefer the auth-store provider state, because that is the live source of + truth for refresh + mint operations. When provider state exists, validate it + by resolving runtime credentials so revoked refresh sessions do not show up + as a healthy login. If provider state is absent, fall back to the credential + pool for the just-logged-in / not-yet-promoted case. + """ + state = get_provider_auth_state("nous") + if state: + base_status = { + "logged_in": bool(state.get("access_token")), + "portal_base_url": state.get("portal_base_url"), + "inference_base_url": state.get("inference_base_url"), + "access_expires_at": state.get("expires_at"), + "agent_key_expires_at": state.get("agent_key_expires_at"), + "has_refresh_token": bool(state.get("refresh_token")), + "access_token": state.get("access_token"), + "source": "auth_store", + } + try: + creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=60) + refreshed_state = get_provider_auth_state("nous") or state + base_status.update( + { + "logged_in": True, + "portal_base_url": refreshed_state.get("portal_base_url") or base_status.get("portal_base_url"), + "inference_base_url": creds.get("base_url") + or refreshed_state.get("inference_base_url") + or base_status.get("inference_base_url"), + "access_expires_at": refreshed_state.get("expires_at") or base_status.get("access_expires_at"), + "agent_key_expires_at": creds.get("expires_at") + or refreshed_state.get("agent_key_expires_at") + or base_status.get("agent_key_expires_at"), + "has_refresh_token": bool(refreshed_state.get("refresh_token")), + "source": f"runtime:{creds.get('source', 'portal')}", + "key_id": creds.get("key_id"), + } + ) + return base_status + except AuthError as exc: + base_status.update({ + "logged_in": False, + "error": str(exc), + "relogin_required": bool(getattr(exc, "relogin_required", False)), + "error_code": getattr(exc, "code", None), + }) + return base_status + + return _snapshot_nous_pool_status() def get_codex_auth_status() -> Dict[str, Any]: @@ -2606,6 +3395,8 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]: def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: """Generic auth status dispatcher.""" target = provider_id or get_active_provider() + if target == "spotify": + return get_spotify_auth_status() if target == "nous": return get_nous_auth_status() if target == "openai-codex": @@ -2778,6 +3569,46 @@ def _update_config_for_provider( return config_path +def _get_config_provider() -> Optional[str]: + """Return model.provider from config.yaml, normalized, if present.""" + try: + config = read_raw_config() + except Exception: + return None + if not config: + return None + model = config.get("model") + if not isinstance(model, dict): + return None + provider = model.get("provider") + if not isinstance(provider, str): + return None + provider = provider.strip().lower() + return provider or None + + +def _config_provider_matches(provider_id: Optional[str]) -> bool: + """Return True when config.yaml currently selects *provider_id*.""" + if not provider_id: + return False + return _get_config_provider() == provider_id.strip().lower() + + +def _logout_default_provider_from_config() -> Optional[str]: + """Fallback logout target when auth.json has no active provider. + + `hermes logout` historically keyed off auth.json.active_provider only. + That left users stuck when auth state had already been cleared but + config.yaml still selected an OAuth provider such as openai-codex for the + agent model: there was no active auth provider to target, so logout printed + "No provider is currently logged in" and never reset model.provider. + """ + provider = _get_config_provider() + if provider in {"nous", "openai-codex"}: + return provider + return None + + def _reset_config_provider() -> Path: """Reset config.yaml provider back to auto after logout.""" config_path = get_config_path() @@ -2998,52 +3829,61 @@ def login_command(args) -> None: raise SystemExit(0) -def _login_openai_codex(args, pconfig: ProviderConfig) -> None: +def _login_openai_codex( + args, + pconfig: ProviderConfig, + *, + force_new_login: bool = False, +) -> None: """OpenAI Codex login via device code flow. Tokens stored in ~/.hermes/auth.json.""" + del args, pconfig # kept for parity with other provider login helpers + # Check for existing Hermes-owned credentials - try: - existing = resolve_codex_runtime_credentials() - # Verify the resolved token is actually usable (not expired). - # resolve_codex_runtime_credentials attempts refresh, so if we get - # here the token should be valid — but double-check before telling - # the user "Login successful!". - _resolved_key = existing.get("api_key", "") - if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60): - print("Existing Codex credentials found in Hermes auth store.") - try: - reuse = input("Use existing credentials? [Y/n]: ").strip().lower() - except (EOFError, KeyboardInterrupt): - reuse = "y" - if reuse in ("", "y", "yes"): - config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL)) - print() - print("Login successful!") - print(f" Config updated: {config_path} (model.provider=openai-codex)") - return - else: - print("Existing Codex credentials are expired. Starting fresh login...") - except AuthError: - pass + if not force_new_login: + try: + existing = resolve_codex_runtime_credentials() + # Verify the resolved token is actually usable (not expired). + # resolve_codex_runtime_credentials attempts refresh, so if we get + # here the token should be valid — but double-check before telling + # the user "Login successful!". + _resolved_key = existing.get("api_key", "") + if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60): + print("Existing Codex credentials found in Hermes auth store.") + try: + reuse = input("Use existing credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + reuse = "y" + if reuse in ("", "y", "yes"): + config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL)) + print() + print("Login successful!") + print(f" Config updated: {config_path} (model.provider=openai-codex)") + return + else: + print("Existing Codex credentials are expired. Starting fresh login...") + except AuthError: + pass # Check for existing Codex CLI tokens we can import - cli_tokens = _import_codex_cli_tokens() - if cli_tokens: - print("Found existing Codex CLI credentials at ~/.codex/auth.json") - print("Hermes will create its own session to avoid conflicts with Codex CLI / VS Code.") - try: - do_import = input("Import these credentials? (a separate login is recommended) [y/N]: ").strip().lower() - except (EOFError, KeyboardInterrupt): - do_import = "n" - if do_import in ("y", "yes"): - _save_codex_tokens(cli_tokens) - base_url = os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") or DEFAULT_CODEX_BASE_URL - config_path = _update_config_for_provider("openai-codex", base_url) - print() - print("Credentials imported. Note: if Codex CLI refreshes its token,") - print("Hermes will keep working independently with its own session.") - print(f" Config updated: {config_path} (model.provider=openai-codex)") - return + if not force_new_login: + cli_tokens = _import_codex_cli_tokens() + if cli_tokens: + print("Found existing Codex CLI credentials at ~/.codex/auth.json") + print("Hermes will create its own session to avoid conflicts with Codex CLI / VS Code.") + try: + do_import = input("Import these credentials? (a separate login is recommended) [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + do_import = "n" + if do_import in ("y", "yes"): + _save_codex_tokens(cli_tokens) + base_url = os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") or DEFAULT_CODEX_BASE_URL + config_path = _update_config_for_provider("openai-codex", base_url) + print() + print("Credentials imported. Note: if Codex CLI refreshes its token,") + print("Hermes will keep working independently with its own session.") + print(f" Config updated: {config_path} (model.provider=openai-codex)") + return # Run a fresh device code flow — Hermes gets its own OAuth session print() @@ -3471,20 +4311,21 @@ def logout_command(args) -> None: """Clear auth state for a provider.""" provider_id = getattr(args, "provider", None) - if provider_id and provider_id not in PROVIDER_REGISTRY: + if provider_id and not is_known_auth_provider(provider_id): print(f"Unknown provider: {provider_id}") raise SystemExit(1) active = get_active_provider() - target = provider_id or active + target = provider_id or active or _logout_default_provider_from_config() if not target: print("No provider is currently logged in.") return - provider_name = PROVIDER_REGISTRY[target].name if target in PROVIDER_REGISTRY else target + config_matches = _config_provider_matches(target) + provider_name = get_auth_provider_display_name(target) - if clear_provider_auth(target): + if clear_provider_auth(target) or config_matches: _reset_config_provider() print(f"Logged out of {provider_name}.") if os.getenv("OPENROUTER_API_KEY"): diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 9c3320010..94ea2559c 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -110,18 +110,40 @@ def _display_source(source: str) -> str: return source.split(":", 1)[1] if source.startswith("manual:") else source +def _classify_exhausted_status(entry) -> tuple[str, bool]: + code = getattr(entry, "last_error_code", None) + reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower() + message = str(getattr(entry, "last_error_message", "") or "").strip().lower() + + if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any( + token in message for token in ("rate limit", "usage limit", "quota", "too many requests") + ): + return "rate-limited", True + + if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any( + token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication") + ): + return "auth failed", False + + return "exhausted", True + + + def _format_exhausted_status(entry) -> str: if entry.last_status != STATUS_EXHAUSTED: return "" + label, show_retry_window = _classify_exhausted_status(entry) reason = getattr(entry, "last_error_reason", None) reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else "" code = f" ({entry.last_error_code})" if entry.last_error_code else "" + if not show_retry_window: + return f" {label}{reason_text}{code} (re-auth may be required)" exhausted_until = _exhausted_until(entry) if exhausted_until is None: - return f" exhausted{reason_text}{code}" + return f" {label}{reason_text}{code}" remaining = max(0, int(math.ceil(exhausted_until - time.time()))) if remaining <= 0: - return f" exhausted{reason_text}{code} (ready to retry)" + return f" {label}{reason_text}{code} (ready to retry)" minutes, seconds = divmod(remaining, 60) hours, minutes = divmod(minutes, 60) days, hours = divmod(hours, 24) @@ -133,7 +155,7 @@ def _format_exhausted_status(entry) -> str: wait = f"{minutes}m {seconds}s" else: wait = f"{seconds}s" - return f" exhausted{reason_text}{code} ({wait} left)" + return f" {label}{reason_text}{code} ({wait} left)" def auth_add_command(args) -> None: @@ -386,6 +408,44 @@ def auth_reset_command(args) -> None: print(f"Reset status on {count} {provider} credentials") +def auth_status_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "") or "") + if not provider: + raise SystemExit("Provider is required. Example: `hermes auth status spotify`.") + status = auth_mod.get_auth_status(provider) + if not status.get("logged_in"): + reason = status.get("error") + if reason: + print(f"{provider}: logged out ({reason})") + else: + print(f"{provider}: logged out") + return + + print(f"{provider}: logged in") + for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"): + value = status.get(key) + if value: + print(f" {key}: {value}") + + +def auth_logout_command(args) -> None: + auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None))) + + +def auth_spotify_command(args) -> None: + action = str(getattr(args, "spotify_action", "") or "login").strip().lower() + if action in {"", "login"}: + auth_mod.login_spotify_command(args) + return + if action == "status": + auth_status_command(SimpleNamespace(provider="spotify")) + return + if action == "logout": + auth_logout_command(SimpleNamespace(provider="spotify")) + return + raise SystemExit(f"Unknown Spotify auth action: {action}") + + def _interactive_auth() -> None: """Interactive credential pool management when `hermes auth` is called bare.""" # Show current pool status first @@ -583,5 +643,14 @@ def auth_command(args) -> None: if action == "reset": auth_reset_command(args) return + if action == "status": + auth_status_command(args) + return + if action == "logout": + auth_logout_command(args) + return + if action == "spotify": + auth_spotify_command(args) + return # No subcommand — launch interactive mode _interactive_auth() diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index fb6068a81..0f792592f 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]: return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)} +_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag" +_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved + + +def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]: + """Return ``(tag, release_url)`` for the latest git tag, or None. + + Local-only — runs ``git describe --tags --abbrev=0`` against the + Hermes checkout. Cached per-process. Release URL always points at the + canonical NousResearch/hermes-agent repo (forks don't get a link). + """ + global _latest_release_cache + if _latest_release_cache is not None: + return _latest_release_cache or None + + repo_dir = repo_dir or _resolve_repo_dir() + if repo_dir is None: + _latest_release_cache = () # falsy sentinel — skip future lookups + return None + + try: + result = subprocess.run( + ["git", "describe", "--tags", "--abbrev=0"], + capture_output=True, + text=True, + timeout=3, + cwd=str(repo_dir), + ) + except Exception: + _latest_release_cache = () + return None + + if result.returncode != 0: + _latest_release_cache = () + return None + + tag = (result.stdout or "").strip() + if not tag: + _latest_release_cache = () + return None + + url = f"{_RELEASE_URL_BASE}/{tag}" + _latest_release_cache = (tag, url) + return _latest_release_cache + + def format_banner_version_label() -> str: """Return the version label shown in the startup banner title.""" base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})" @@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str, agent_name = _skin_branding("agent_name", "Hermes Agent") title_color = _skin_color("banner_title", "#FFD700") border_color = _skin_color("banner_border", "#CD7F32") + version_label = format_banner_version_label() + release_info = get_latest_release_tag() + if release_info: + _tag, _url = release_info + title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]" + else: + title_markup = f"[bold {title_color}]{version_label}[/]" outer_panel = Panel( layout_table, - title=f"[bold {title_color}]{format_banner_version_label()}[/]", + title=title_markup, border_style=border_color, padding=(0, 2), ) diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py index 9e2181b50..e39b2c594 100644 --- a/hermes_cli/codex_models.py +++ b/hermes_cli/codex_models.py @@ -12,6 +12,7 @@ import os logger = logging.getLogger(__name__) DEFAULT_CODEX_MODELS: List[str] = [ + "gpt-5.5", "gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex", @@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [ ] _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [ + ("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")), ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.3-codex", ("gpt-5.2-codex",)), diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 87d73af58..efff57180 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -77,7 +77,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("rollback", "List or restore filesystem checkpoints", "Session", args_hint="[number]"), CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session", - aliases=("snap",), args_hint="[create|restore |prune]"), + cli_only=True, aliases=("snap",), args_hint="[create|restore |prune]"), CommandDef("stop", "Kill all running background processes", "Session"), CommandDef("approve", "Approve a pending dangerous command", "Session", gateway_only=True, args_hint="[session|always]"), @@ -104,9 +104,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("config", "Show current configuration", "Configuration", cli_only=True), CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"), - CommandDef("provider", "Show available providers and current provider", - "Configuration"), - CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"), + CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info", + cli_only=True), CommandDef("personality", "Set a predefined personality", "Configuration", args_hint="[name]"), @@ -124,9 +123,12 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[normal|fast|status]", subcommands=("normal", "fast", "status", "on", "off")), CommandDef("skin", "Show or change the display skin/theme", "Configuration", - args_hint="[name]"), + cli_only=True, args_hint="[name]"), CommandDef("voice", "Toggle voice mode", "Configuration", args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")), + CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration", + cli_only=True, args_hint="[queue|interrupt|status]", + subcommands=("queue", "interrupt", "status")), # Tools & Skills CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills", @@ -139,7 +141,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("cron", "Manage scheduled tasks", "Tools & Skills", cli_only=True, args_hint="[subcommand]", subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")), - CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"), + CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills", + cli_only=True), CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills", aliases=("reload_mcp",)), CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills", @@ -317,7 +320,7 @@ def should_bypass_active_session(command_name: str | None) -> bool: safety net in gateway.run discards any command text that reaches the pending queue — which meant a mid-run /model (or /reasoning, /voice, /insights, /title, /resume, /retry, /undo, /compress, - /usage, /provider, /reload-mcp, /sethome, /reset) would silently + /usage, /reload-mcp, /sethome, /reset) would silently interrupt the agent AND get discarded, producing a zero-char response. See issue #5057 / PRs #6252, #10370, #4665. diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6d4c49fd4..7678287a0 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -361,6 +361,15 @@ DEFAULT_CONFIG = { # to finish, then interrupts any remaining runs after the timeout. # 0 = no drain, interrupt immediately. "restart_drain_timeout": 60, + # Max app-level retry attempts for API errors (connection drops, + # provider timeouts, 5xx, etc.) before the agent surfaces the + # failure. The OpenAI SDK already does its own low-level retries + # (max_retries=2 default) for transient network errors; this is + # the Hermes-level retry loop that wraps the whole call. Lower + # this to 1 if you use fallback providers and want fast failover + # on flaky primaries; raise it if you prefer to tolerate longer + # provider hiccups on a single provider. + "api_max_retries": 3, "service_tier": "", # Tool-use enforcement: injects system prompt guidance that tells the # model to actually call tools instead of describing intended actions. @@ -375,7 +384,11 @@ DEFAULT_CONFIG = { # Periodic "still working" notification interval (seconds). # Sends a status message every N seconds so the user knows the # agent hasn't died during long tasks. 0 = disable notifications. - "gateway_notify_interval": 600, + # Lower values mean faster feedback on slow tasks but more chat + # noise; 180s is a compromise that catches spinning weak-model runs + # (60+ tool iterations with tiny output) before users assume the + # bot is dead and /restart. + "gateway_notify_interval": 180, }, "terminal": { @@ -453,6 +466,12 @@ DEFAULT_CONFIG = { "record_sessions": False, # Auto-record browser sessions as WebM videos "allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.) "cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome + # CDP supervisor — dialog + frame detection via a persistent WebSocket. + # Active only when a CDP-capable backend is attached (Browserbase or + # local Chrome via /browser connect). See + # website/docs/developer-guide/browser-supervisor.md. + "dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept + "dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond "camofox": { # When true, Hermes sends a stable profile-scoped userId to Camofox # so the server maps it to a persistent Firefox profile automatically. @@ -473,7 +492,27 @@ DEFAULT_CONFIG = { # exceed this are rejected with guidance to use offset+limit. # 100K chars ≈ 25–35K tokens across typical tokenisers. "file_read_max_chars": 100_000, - + + # Tool-output truncation thresholds. When terminal output or a + # single read_file page exceeds these limits, Hermes truncates the + # payload sent to the model (keeping head + tail for terminal, + # enforcing pagination for read_file). Tuning these trades context + # footprint against how much raw output the model can see in one + # shot. Ported from anomalyco/opencode PR #23770. + # + # - max_bytes: terminal_tool output cap, in chars + # (default 50_000 ≈ 12-15K tokens). + # - max_lines: read_file pagination cap — the maximum `limit` + # a single read_file call can request before + # being clamped (default 2000). + # - max_line_length: per-line cap applied when read_file emits a + # line-numbered view (default 2000 chars). + "tool_output": { + "max_bytes": 50_000, + "max_lines": 2000, + "max_line_length": 2000, + }, + "compression": { "enabled": True, "threshold": 0.50, # compress when context usage exceeds this ratio @@ -482,6 +521,12 @@ DEFAULT_CONFIG = { }, + # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API). + # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored. + "prompt_caching": { + "cache_ttl": "5m", + }, + # AWS Bedrock provider configuration. # Only used when model.provider is "bedrock". "bedrock": { @@ -726,6 +771,10 @@ DEFAULT_CONFIG = { "inherit_mcp_toolsets": True, "max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget, # independent of the parent's max_iterations) + "child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s, + # no ceiling). High-reasoning models on large tasks + # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets; + # raise if children time out before producing output. "reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium", # "low", "minimal", "none" (empty = inherit parent's level) "max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling @@ -760,6 +809,17 @@ DEFAULT_CONFIG = { "inline_shell": False, # Timeout (seconds) for each !`cmd` snippet when inline_shell is on. "inline_shell_timeout": 10, + # Run the keyword/pattern security scanner on skills the agent + # writes via skill_manage (create/edit/patch). Off by default + # because the agent can already execute the same code paths via + # terminal() with no gate, so the scan adds friction (blocks + # skills that mention risky keywords in prose) without meaningful + # security. Turn on if you want the belt-and-suspenders — a + # dangerous verdict will then surface as a tool error to the + # agent, which can retry with the flagged content removed. + # External hub installs (trusted/community sources) are always + # scanned regardless of this setting. + "guard_agent_created": False, }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. @@ -1280,7 +1340,7 @@ OPTIONAL_ENV_VARS = { "advanced": True, }, "XIAOMI_API_KEY": { - "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)", + "description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)", "prompt": "Xiaomi MiMo API Key", "url": "https://platform.xiaomimimo.com", "password": True, diff --git a/hermes_cli/copilot_auth.py b/hermes_cli/copilot_auth.py index 24859da1a..348e4efe8 100644 --- a/hermes_cli/copilot_auth.py +++ b/hermes_cli/copilot_auth.py @@ -275,6 +275,99 @@ def copilot_device_code_login( return None +# ─── Copilot Token Exchange ──────────────────────────────────────────────── + +# Module-level cache for exchanged Copilot API tokens. +# Maps raw_token_fingerprint -> (api_token, expires_at_epoch). +_jwt_cache: dict[str, tuple[str, float]] = {} +_JWT_REFRESH_MARGIN_SECONDS = 120 # refresh 2 min before expiry + +# Token exchange endpoint and headers (matching VS Code / Copilot CLI) +_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token" +_EDITOR_VERSION = "vscode/1.104.1" +_EXCHANGE_USER_AGENT = "GitHubCopilotChat/0.26.7" + + +def _token_fingerprint(raw_token: str) -> str: + """Short fingerprint of a raw token for cache keying (avoids storing full token).""" + import hashlib + return hashlib.sha256(raw_token.encode()).hexdigest()[:16] + + +def exchange_copilot_token(raw_token: str, *, timeout: float = 10.0) -> tuple[str, float]: + """Exchange a raw GitHub token for a short-lived Copilot API token. + + Calls ``GET https://api.github.com/copilot_internal/v2/token`` with + the raw GitHub token and returns ``(api_token, expires_at)``. + + The returned token is a semicolon-separated string (not a standard JWT) + used as ``Authorization: Bearer `` for Copilot API requests. + + Results are cached in-process and reused until close to expiry. + Raises ``ValueError`` on failure. + """ + import urllib.request + + fp = _token_fingerprint(raw_token) + + # Check cache first + cached = _jwt_cache.get(fp) + if cached: + api_token, expires_at = cached + if time.time() < expires_at - _JWT_REFRESH_MARGIN_SECONDS: + return api_token, expires_at + + req = urllib.request.Request( + _TOKEN_EXCHANGE_URL, + method="GET", + headers={ + "Authorization": f"token {raw_token}", + "User-Agent": _EXCHANGE_USER_AGENT, + "Accept": "application/json", + "Editor-Version": _EDITOR_VERSION, + }, + ) + + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + except Exception as exc: + raise ValueError(f"Copilot token exchange failed: {exc}") from exc + + api_token = data.get("token", "") + expires_at = data.get("expires_at", 0) + if not api_token: + raise ValueError("Copilot token exchange returned empty token") + + # Convert expires_at to float if needed + expires_at = float(expires_at) if expires_at else time.time() + 1800 + + _jwt_cache[fp] = (api_token, expires_at) + logger.debug( + "Copilot token exchanged, expires_at=%s", + expires_at, + ) + return api_token, expires_at + + +def get_copilot_api_token(raw_token: str) -> str: + """Exchange a raw GitHub token for a Copilot API token, with fallback. + + Convenience wrapper: returns the exchanged token on success, or the + raw token unchanged if the exchange fails (e.g. network error, unsupported + account type). This preserves existing behaviour for accounts that don't + need exchange while enabling access to internal-only models for those that do. + """ + if not raw_token: + return raw_token + try: + api_token, _ = exchange_copilot_token(raw_token) + return api_token + except Exception as exc: + logger.debug("Copilot token exchange failed, using raw token: %s", exc) + return raw_token + + # ─── Copilot API Headers ─────────────────────────────────────────────────── def copilot_request_headers( diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index e0ab6007a..78639d465 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -93,6 +93,9 @@ def cron_list(show_all: bool = False): script = job.get("script") if script: print(f" Script: {script}") + workdir = job.get("workdir") + if workdir: + print(f" Workdir: {workdir}") # Execution history last_status = job.get("last_status") @@ -168,6 +171,7 @@ def cron_create(args): skill=getattr(args, "skill", None), skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)), script=getattr(args, "script", None), + workdir=getattr(args, "workdir", None), ) if not result.get("success"): print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED)) @@ -180,6 +184,8 @@ def cron_create(args): job_data = result.get("job", {}) if job_data.get("script"): print(f" Script: {job_data['script']}") + if job_data.get("workdir"): + print(f" Workdir: {job_data['workdir']}") print(f" Next run: {result['next_run_at']}") return 0 @@ -218,6 +224,7 @@ def cron_edit(args): repeat=getattr(args, "repeat", None), skills=final_skills, script=getattr(args, "script", None), + workdir=getattr(args, "workdir", None), ) if not result.get("success"): print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED)) @@ -233,6 +240,8 @@ def cron_edit(args): print(" Skills: none") if updated.get("script"): print(f" Script: {updated['script']}") + if updated.get("workdir"): + print(f" Workdir: {updated['workdir']}") return 0 diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 064b1d68d..cba4ebcdd 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -29,6 +29,7 @@ if _env_path.exists(): load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8") from hermes_cli.colors import Colors, color +from hermes_cli.models import _HERMES_USER_AGENT from hermes_constants import OPENROUTER_MODELS_URL from utils import base_url_host_matches @@ -295,16 +296,33 @@ def run_doctor(args): except Exception: pass try: - from hermes_cli.auth import resolve_provider as _resolve_provider + from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers + from hermes_cli.providers import resolve_provider_full as _resolve_provider_full except Exception: - _resolve_provider = None + _compatible_custom_providers = None + _resolve_provider_full = None + + custom_providers = [] + if _compatible_custom_providers is not None: + try: + custom_providers = _compatible_custom_providers(cfg) + except Exception: + custom_providers = [] + + user_providers = cfg.get("providers") + if isinstance(user_providers, dict): + known_providers.update(str(name).strip().lower() for name in user_providers if str(name).strip()) + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = str(entry.get("name") or "").strip() + if name: + known_providers.add("custom:" + name.lower().replace(" ", "-")) canonical_provider = provider - if provider and _resolve_provider is not None and provider != "auto": - try: - canonical_provider = _resolve_provider(provider) - except Exception: - canonical_provider = None + if provider and _resolve_provider_full is not None and provider != "auto": + provider_def = _resolve_provider_full(provider, user_providers, custom_providers) + canonical_provider = provider_def.id if provider_def is not None else None if provider and provider != "auto": if canonical_provider is None or (known_providers and canonical_provider not in known_providers): @@ -957,7 +975,10 @@ def run_doctor(args): if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"): _base = _base.rstrip("/") + "/v1" _url = (_base.rstrip("/") + "/models") if _base else _default_url - _headers = {"Authorization": f"Bearer {_key}"} + _headers = { + "Authorization": f"Bearer {_key}", + "User-Agent": _HERMES_USER_AGENT, + } if base_url_host_matches(_base, "api.kimi.com"): _headers["User-Agent"] = "claude-code/0.1.0" _resp = httpx.get( diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index 90364a261..3d7280244 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -267,6 +267,8 @@ def run_dump(args): ("ANTHROPIC_API_KEY", "anthropic"), ("ANTHROPIC_TOKEN", "anthropic_token"), ("NOUS_API_KEY", "nous"), + ("GOOGLE_API_KEY", "google/gemini"), + ("GEMINI_API_KEY", "gemini"), ("GLM_API_KEY", "glm/zai"), ("ZAI_API_KEY", "zai"), ("KIMI_API_KEY", "kimi"), diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 7796cc575..3b828fecf 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool: return True +def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool: + """Send SIGUSR1 to a gateway PID and wait for it to exit gracefully. + + SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)`` + which drains in-flight agent runs (up to ``agent.restart_drain_timeout`` + seconds), then exits with code 75. Both systemd (``Restart=on-failure`` + + ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit + = false``) relaunch the process after the graceful exit. + + This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``, + which SIGKILL in-flight agents after a short timeout. + + Args: + pid: Gateway process PID (systemd MainPID, launchd PID, or bare + process PID). + drain_timeout: Seconds to wait for the process to exit after sending + SIGUSR1. Should be slightly larger than the gateway's + ``agent.restart_drain_timeout`` to allow the drain loop to + finish cleanly. + + Returns: + True if the PID was signalled and exited within the timeout. + False if SIGUSR1 couldn't be sent or the process didn't exit in + time (caller should fall back to a harder restart path). + """ + if not hasattr(signal, "SIGUSR1"): + return False + if pid <= 0: + return False + try: + os.kill(pid, signal.SIGUSR1) + except ProcessLookupError: + # Already gone — nothing to drain. + return True + except (PermissionError, OSError): + return False + + import time as _time + + deadline = _time.monotonic() + max(drain_timeout, 1.0) + while _time.monotonic() < deadline: + try: + os.kill(pid, 0) # signal 0 — probe liveness + except ProcessLookupError: + return True + except PermissionError: + # Process still exists but we can't signal it. Treat as alive + # so the caller falls back. + pass + _time.sleep(0.5) + # Drain didn't finish in time. + return False + + def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None: if pid is None or pid <= 0: return @@ -1469,7 +1523,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) path_entries.append(resolved_node_dir) common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"] - restart_timeout = max(60, int(_get_restart_drain_timeout() or 0)) + # systemd's TimeoutStopSec must exceed the gateway's drain_timeout so + # there's budget left for post-interrupt cleanup (tool subprocess kill, + # adapter disconnect, session DB close) before systemd escalates to + # SIGKILL on the cgroup — otherwise bash/sleep tool-call children left + # by a force-interrupted agent get reaped by systemd instead of us + # (#8202). 30s of headroom covers the worst case we've observed. + _drain_timeout = int(_get_restart_drain_timeout() or 0) + restart_timeout = max(60, _drain_timeout) + 30 if system: username, group_name, home_dir = _system_service_identity(run_as_user) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index ec0441f8b..7de68d2cb 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -166,6 +166,27 @@ from hermes_cli.env_loader import load_hermes_dotenv load_hermes_dotenv(project_env=PROJECT_ROOT / ".env") +# Bridge security.redact_secrets from config.yaml → HERMES_REDACT_SECRETS env +# var BEFORE hermes_logging imports agent.redact (which snapshots the flag at +# module-import time). Without this, config.yaml's toggle is ignored because +# the setup_logging() call below imports agent.redact, which reads the env var +# exactly once. Env var in .env still wins — this is config.yaml fallback only. +try: + if "HERMES_REDACT_SECRETS" not in os.environ: + import yaml as _yaml_early + _cfg_path = get_hermes_home() / "config.yaml" + if _cfg_path.exists(): + with open(_cfg_path, encoding="utf-8") as _f: + _early_sec_cfg = (_yaml_early.safe_load(_f) or {}).get("security", {}) + if isinstance(_early_sec_cfg, dict): + _early_redact = _early_sec_cfg.get("redact_secrets") + if _early_redact is not None: + os.environ["HERMES_REDACT_SECRETS"] = str(_early_redact).lower() + del _early_sec_cfg + del _cfg_path +except Exception: + pass # best-effort — redaction stays at default (enabled) on config errors + # Initialize centralized file logging early — all `hermes` subcommands # (chat, setup, gateway, config, etc.) write to agent.log + errors.log. try: @@ -1429,6 +1450,7 @@ def select_provider_and_model(args=None): load_config, get_env_value, ) + from hermes_cli.providers import resolve_provider_full config = load_config() current_model = config.get("model") @@ -1446,14 +1468,30 @@ def select_provider_and_model(args=None): effective_provider = ( config_provider or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto" ) - try: - active = resolve_provider(effective_provider) - except AuthError as exc: - warning = format_auth_error(exc) - print(f"Warning: {warning} Falling back to auto provider detection.") + compatible_custom_providers = get_compatible_custom_providers(config) + active = None + if effective_provider != "auto": + active_def = resolve_provider_full( + effective_provider, + config.get("providers"), + compatible_custom_providers, + ) + if active_def is not None: + active = active_def.id + else: + warning = ( + f"Unknown provider '{effective_provider}'. Check 'hermes model' for " + "available providers, or run 'hermes doctor' to diagnose config " + "issues." + ) + print(f"Warning: {warning} Falling back to auto provider detection.") + if active is None: try: active = resolve_provider("auto") - except AuthError: + except AuthError as exc: + if effective_provider == "auto": + warning = format_auth_error(exc) + print(f"Warning: {warning} Falling back to auto provider detection.") active = None # no provider yet; default to first in list # Detect custom endpoint @@ -2311,7 +2349,41 @@ def _model_flow_openai_codex(config, current_model=""): from hermes_cli.codex_models import get_codex_model_ids status = get_codex_auth_status() - if not status.get("logged_in"): + if status.get("logged_in"): + print(" OpenAI Codex credentials: ✓") + print() + print(" 1. Use existing credentials") + print(" 2. Reauthenticate (new OAuth login)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + choice = "1" + + if choice == "2": + print("Starting a fresh OpenAI Codex login...") + print() + try: + mock_args = argparse.Namespace() + _login_openai_codex( + mock_args, + PROVIDER_REGISTRY["openai-codex"], + force_new_login=True, + ) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + status = get_codex_auth_status() + if not status.get("logged_in"): + print("Login failed.") + return + elif choice == "3": + return + else: print("Not logged into OpenAI Codex. Starting login...") print() try: @@ -2828,11 +2900,16 @@ def _model_flow_named_custom(config, provider_info): name = provider_info["name"] base_url = provider_info["base_url"] + api_mode = provider_info.get("api_mode", "") api_key = provider_info.get("api_key", "") key_env = provider_info.get("key_env", "") saved_model = provider_info.get("model", "") provider_key = (provider_info.get("provider_key") or "").strip() + # Resolve key from env var if api_key not set directly + if not api_key and key_env: + api_key = os.environ.get(key_env, "") + print(f" Provider: {name}") print(f" URL: {base_url}") if saved_model: @@ -2840,7 +2917,10 @@ def _model_flow_named_custom(config, provider_info): print() print("Fetching available models...") - models = fetch_api_models(api_key, base_url, timeout=8.0) + models = fetch_api_models( + api_key, base_url, timeout=8.0, + api_mode=api_mode or None, + ) if models: default_idx = 0 @@ -3930,12 +4010,71 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): print("Cancelled.") return save_env_value(key_env, new_key) + existing_key = new_key print("API key saved.") print() else: print(f" {pconfig.name} API key: {existing_key[:8]}... ✓") print() + # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash) + # are exhausted in a handful of agent turns, so refuse to wire up the + # provider with a free-tier key. Probe is best-effort; network or auth + # errors fall through without blocking. + if provider_id == "gemini" and existing_key: + try: + from agent.gemini_native_adapter import probe_gemini_tier + except Exception: + probe_gemini_tier = None + if probe_gemini_tier is not None: + print(" Checking Gemini API tier...") + probe_base = ( + (get_env_value(base_url_env) if base_url_env else "") + or os.getenv(base_url_env or "", "") + or pconfig.inference_base_url + ) + tier = probe_gemini_tier(existing_key, probe_base) + if tier == "free": + print() + print( + "❌ This Google API key is on the free tier " + "(<= 250 requests/day for gemini-2.5-flash)." + ) + print( + " Hermes typically makes 3-10 API calls per user turn " + "(tool iterations + auxiliary tasks)," + ) + print( + " so the free tier is exhausted after a handful of " + "messages and cannot sustain" + ) + print(" an agent session.") + print() + print( + " To use Gemini with Hermes, enable billing on your " + "Google Cloud project and regenerate" + ) + print( + " the key in a billing-enabled project: " + "https://aistudio.google.com/apikey" + ) + print() + print( + " Alternatives with workable free usage: DeepSeek, " + "OpenRouter (free models), Groq, Nous." + ) + print() + print("Not saving Gemini as the default provider.") + return + if tier == "paid": + print(" Tier check: paid ✓") + else: + # "unknown" -- network issue, auth problem, unexpected response. + # Don't block; the runtime 429 handler will surface free-tier + # guidance if the key turns out to be free tier. + print(" Tier check: could not verify (proceeding anyway).") + print() + # Optional base URL override current_base = "" if base_url_env: @@ -3984,7 +4123,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): pass if mdev_models: - model_list = mdev_models + # Merge models.dev with curated list so newly added models + # (not yet in models.dev) still appear in the picker. + if curated: + seen = {m.lower() for m in mdev_models} + merged = list(mdev_models) + for m in curated: + if m.lower() not in seen: + merged.append(m) + seen.add(m.lower()) + model_list = merged + else: + model_list = mdev_models print(f" Found {len(model_list)} model(s) from models.dev registry") elif curated and len(curated) >= 8: # Curated list is substantial — use it directly, skip live probe @@ -4166,6 +4316,8 @@ def _model_flow_anthropic(config, current_model=""): from agent.anthropic_adapter import ( read_claude_code_credentials, is_claude_code_token_valid, + _is_oauth_token, + _resolve_claude_code_token_from_credentials, ) cc_creds = read_claude_code_credentials() @@ -4174,7 +4326,14 @@ def _model_flow_anthropic(config, current_model=""): except Exception: pass - has_creds = bool(existing_key) or cc_available + # Stale-OAuth guard: if the only existing cred is an expired OAuth token + # (no valid cc_creds to fall back on), treat it as missing so the re-auth + # path is offered instead of silently accepting a broken token. + existing_is_stale_oauth = False + if existing_key and _is_oauth_token(existing_key) and not cc_available: + existing_is_stale_oauth = True + + has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available needs_auth = not has_creds if has_creds: @@ -5853,12 +6012,15 @@ def _cmd_update_impl(args, gateway_mode: bool): # Write exit code *before* the gateway restart attempt. # When running as ``hermes update --gateway`` (spawned by the gateway's # /update command), this process lives inside the gateway's systemd - # cgroup. ``systemctl restart hermes-gateway`` kills everything in the - # cgroup (KillMode=mixed → SIGKILL to remaining processes), including - # us and the wrapping bash shell. The shell never reaches its - # ``printf $status > .update_exit_code`` epilogue, so the exit-code - # marker file is never created. The new gateway's update watcher then - # polls for 30 minutes and sends a spurious timeout message. + # cgroup. A graceful SIGUSR1 restart keeps the drain loop alive long + # enough for the exit-code marker to be written below, but the + # fallback ``systemctl restart`` path (see below) kills everything in + # the cgroup (KillMode=mixed → SIGKILL to remaining processes), + # including us and the wrapping bash shell. The shell never reaches + # its ``printf $status > .update_exit_code`` epilogue, so the + # exit-code marker file would never be created. The new gateway's + # update watcher would then poll for 30 minutes and send a spurious + # timeout message. # # Writing the marker here — after git pull + pip install succeed but # before we attempt the restart — ensures the new gateway sees it @@ -5880,9 +6042,37 @@ def _cmd_update_impl(args, gateway_mode: bool): _ensure_user_systemd_env, find_gateway_pids, _get_service_pids, + _graceful_restart_via_sigusr1, ) import signal as _signal + # Drain budget for graceful SIGUSR1 restarts. The gateway drains + # for up to ``agent.restart_drain_timeout`` (default 60s) before + # exiting with code 75; we wait slightly longer so the drain + # completes before we fall back to a hard restart. On older + # systemd units without SIGUSR1 wiring this wait just times out + # and we fall back to ``systemctl restart`` (the old behaviour). + try: + from hermes_constants import ( + DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN, + ) + except Exception: + _DEFAULT_DRAIN = 60.0 + _cfg_drain = None + try: + from hermes_cli.config import load_config + _cfg_agent = (load_config().get("agent") or {}) + _cfg_drain = _cfg_agent.get("restart_drain_timeout") + except Exception: + pass + try: + _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN) + except (TypeError, ValueError): + _drain_budget = float(_DEFAULT_DRAIN) + # Add a 15s margin so the drain loop + final exit finish before + # we escalate to ``systemctl restart`` / SIGTERM. + _drain_budget = max(_drain_budget, 30.0) + 15.0 + restarted_services = [] killed_pids = set() @@ -5929,59 +6119,114 @@ def _cmd_update_impl(args, gateway_mode: bool): text=True, timeout=5, ) - if check.stdout.strip() == "active": - restart = subprocess.run( - scope_cmd + ["restart", svc_name], + if check.stdout.strip() != "active": + continue + + # Prefer a graceful SIGUSR1 restart so in-flight + # agent runs drain instead of being SIGKILLed. + # The gateway's SIGUSR1 handler calls + # request_restart(via_service=True) → drain → + # exit(75); systemd's Restart=on-failure (and + # RestartForceExitStatus=75) respawns the unit. + _main_pid = 0 + try: + _show = subprocess.run( + scope_cmd + [ + "show", svc_name, + "--property=MainPID", "--value", + ], + capture_output=True, text=True, timeout=5, + ) + _main_pid = int((_show.stdout or "").strip() or 0) + except (ValueError, subprocess.TimeoutExpired, FileNotFoundError): + _main_pid = 0 + + _graceful_ok = False + if _main_pid > 0: + print( + f" → {svc_name}: draining (up to {int(_drain_budget)}s)..." + ) + _graceful_ok = _graceful_restart_via_sigusr1( + _main_pid, drain_timeout=_drain_budget, + ) + + if _graceful_ok: + # Gateway exited 75; systemd should relaunch + # via Restart=on-failure. Verify the new + # process came up. + _time.sleep(3) + verify = subprocess.run( + scope_cmd + ["is-active", svc_name], + capture_output=True, text=True, timeout=5, + ) + if verify.stdout.strip() == "active": + restarted_services.append(svc_name) + continue + # Process exited but wasn't respawned (older + # unit without Restart=on-failure or + # RestartForceExitStatus=75). Fall through + # to systemctl start/restart. + print( + f" ⚠ {svc_name} drained but didn't relaunch — forcing restart" + ) + + # Fallback: blunt systemctl restart. This is + # what the old code always did; we get here only + # when the graceful path failed (unit missing + # SIGUSR1 wiring, drain exceeded the budget, + # restart-policy mismatch). + restart = subprocess.run( + scope_cmd + ["restart", svc_name], + capture_output=True, + text=True, + timeout=15, + ) + if restart.returncode == 0: + # Verify the service actually survived the + # restart. systemctl restart returns 0 even + # if the new process crashes immediately. + _time.sleep(3) + verify = subprocess.run( + scope_cmd + ["is-active", svc_name], capture_output=True, text=True, - timeout=15, + timeout=5, ) - if restart.returncode == 0: - # Verify the service actually survived the - # restart. systemctl restart returns 0 even - # if the new process crashes immediately. + if verify.stdout.strip() == "active": + restarted_services.append(svc_name) + else: + # Retry once — transient startup failures + # (stale module cache, import race) often + # resolve on the second attempt. + print( + f" ⚠ {svc_name} died after restart, retrying..." + ) + retry = subprocess.run( + scope_cmd + ["restart", svc_name], + capture_output=True, + text=True, + timeout=15, + ) _time.sleep(3) - verify = subprocess.run( + verify2 = subprocess.run( scope_cmd + ["is-active", svc_name], capture_output=True, text=True, timeout=5, ) - if verify.stdout.strip() == "active": + if verify2.stdout.strip() == "active": restarted_services.append(svc_name) + print(f" ✓ {svc_name} recovered on retry") else: - # Retry once — transient startup failures - # (stale module cache, import race) often - # resolve on the second attempt. print( - f" ⚠ {svc_name} died after restart, retrying..." + f" ✗ {svc_name} failed to stay running after restart.\n" + f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n" + f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}" ) - retry = subprocess.run( - scope_cmd + ["restart", svc_name], - capture_output=True, - text=True, - timeout=15, - ) - _time.sleep(3) - verify2 = subprocess.run( - scope_cmd + ["is-active", svc_name], - capture_output=True, - text=True, - timeout=5, - ) - if verify2.stdout.strip() == "active": - restarted_services.append(svc_name) - print(f" ✓ {svc_name} recovered on retry") - else: - print( - f" ✗ {svc_name} failed to stay running after restart.\n" - f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n" - f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}" - ) - else: - print( - f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}" - ) + else: + print( + f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}" + ) except (FileNotFoundError, subprocess.TimeoutExpired): pass @@ -6470,9 +6715,15 @@ def cmd_dashboard(args): try: import fastapi # noqa: F401 import uvicorn # noqa: F401 - except ImportError: - print("Web UI dependencies not installed.") - print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'") + except ImportError as e: + print("Web UI dependencies not installed (need fastapi + uvicorn).") + print( + f"Re-install the package into this interpreter so metadata updates apply:\n" + f" cd {PROJECT_ROOT}\n" + f" {sys.executable} -m pip install -e .\n" + "If `pip` is missing in this venv, use: uv pip install -e ." + ) + print(f"Import error: {e}") sys.exit(1) if "HERMES_WEB_DIST" not in os.environ: @@ -6481,11 +6732,13 @@ def cmd_dashboard(args): from hermes_cli.web_server import start_server + embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1" start_server( host=args.host, port=args.port, open_browser=not args.no_open, allow_public=getattr(args, "insecure", False), + embedded_chat=embedded_chat, ) @@ -7088,7 +7341,7 @@ For more help on a command: ) logout_parser.add_argument( "--provider", - choices=["nous", "openai-codex"], + choices=["nous", "openai-codex", "spotify"], default=None, help="Provider to log out from (default: active provider)", ) @@ -7145,6 +7398,17 @@ For more help on a command: "reset", help="Clear exhaustion status for all credentials for a provider" ) auth_reset.add_argument("provider", help="Provider id") + auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider") + auth_status.add_argument("provider", help="Provider id") + auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state") + auth_logout.add_argument("provider", help="Provider id") + auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE") + auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login") + auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)") + auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app") + auth_spotify.add_argument("--scope", help="Override requested Spotify scopes") + auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically") + auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds") auth_parser.set_defaults(func=cmd_auth) # ========================================================================= @@ -7201,6 +7465,10 @@ For more help on a command: "--script", help="Path to a Python script whose stdout is injected into the prompt each run", ) + cron_create.add_argument( + "--workdir", + help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).", + ) # cron edit cron_edit = cron_subparsers.add_parser( @@ -7239,6 +7507,10 @@ For more help on a command: "--script", help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.", ) + cron_edit.add_argument( + "--workdir", + help="Absolute path for the job to run from (injects AGENTS.md etc. and sets terminal cwd). Pass empty string to clear.", + ) # lifecycle actions cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job") @@ -8652,6 +8924,14 @@ Examples: action="store_true", help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)", ) + dashboard_parser.add_argument( + "--tui", + action="store_true", + help=( + "Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). " + "Alternatively set HERMES_DASHBOARD_TUI=1." + ), + ) dashboard_parser.set_defaults(func=cmd_dashboard) # ========================================================================= diff --git a/hermes_cli/model_normalize.py b/hermes_cli/model_normalize.py index 76dace065..99e6c34e4 100644 --- a/hermes_cli/model_normalize.py +++ b/hermes_cli/model_normalize.py @@ -12,8 +12,12 @@ Different LLM providers expect model identifiers in different formats: model IDs, but Claude still uses hyphenated native names like ``claude-sonnet-4-6``. - **OpenCode Go** preserves dots in model names: ``minimax-m2.7``. -- **DeepSeek** only accepts two model identifiers: - ``deepseek-chat`` and ``deepseek-reasoner``. +- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner`` + (R1-family), and the first-class V-series IDs (``deepseek-v4-pro``, + ``deepseek-v4-flash``, and any future ``deepseek-v-*``). Older + Hermes revisions folded every non-reasoner input into + ``deepseek-chat``, which on aggregators routes to V3 — so a user + picking V4 Pro was silently downgraded. - **Custom** and remaining providers pass the name through as-is. This module centralises that translation so callers can simply write:: @@ -25,6 +29,7 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern. from __future__ import annotations +import re from typing import Optional # --------------------------------------------------------------------------- @@ -100,6 +105,15 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({ "custom", }) +# Providers whose APIs require lowercase model IDs. Xiaomi's +# ``api.xiaomimimo.com`` rejects mixed-case names like ``MiMo-V2.5-Pro`` +# that users might copy from marketing docs — it only accepts +# ``mimo-v2.5-pro``. After stripping a matching provider prefix, these +# providers also get ``.lower()`` applied. +_LOWERCASE_MODEL_PROVIDERS: frozenset[str] = frozenset({ + "xiaomi", +}) + # --------------------------------------------------------------------------- # DeepSeek special handling # --------------------------------------------------------------------------- @@ -115,17 +129,30 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({ }) _DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({ - "deepseek-chat", - "deepseek-reasoner", + "deepseek-chat", # V3 on DeepSeek direct and most aggregators + "deepseek-reasoner", # R1-family reasoning model + "deepseek-v4-pro", # V4 Pro — first-class model ID + "deepseek-v4-flash", # V4 Flash — first-class model ID }) +# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``, +# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``). +# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns +# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called +# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases +# of ``deepseek-chat`` and must not be folded into it. +_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$") + def _normalize_for_deepseek(model_name: str) -> str: - """Map any model input to one of DeepSeek's two accepted identifiers. + """Map a model input to a DeepSeek-accepted identifier. Rules: - - Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through. - - Contains any reasoner keyword (r1, think, reasoning, cot, reasoner) + - Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/ + ``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through. + - Matches the V-series pattern ``deepseek-v...`` -> pass through + (covers future ``deepseek-v5-*`` and dated variants without a release). + - Contains a reasoner keyword (r1, think, reasoning, cot, reasoner) -> ``deepseek-reasoner``. - Everything else -> ``deepseek-chat``. @@ -133,13 +160,17 @@ def _normalize_for_deepseek(model_name: str) -> str: model_name: The bare model name (vendor prefix already stripped). Returns: - One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``. + A DeepSeek-accepted model identifier. """ bare = _strip_vendor_prefix(model_name).lower() if bare in _DEEPSEEK_CANONICAL_MODELS: return bare + # V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants) + if _DEEPSEEK_V_SERIES_RE.match(bare): + return bare + # Check for reasoner-like keywords anywhere in the name for keyword in _DEEPSEEK_REASONER_KEYWORDS: if keyword in bare: @@ -347,6 +378,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str: >>> normalize_model_for_provider("claude-sonnet-4.6", "zai") 'claude-sonnet-4.6' + + >>> normalize_model_for_provider("MiMo-V2.5-Pro", "xiaomi") + 'mimo-v2.5-pro' """ name = (model_input or "").strip() if not name: @@ -410,7 +444,12 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str: # --- Direct providers: repair matching provider prefixes only --- if provider in _MATCHING_PREFIX_STRIP_PROVIDERS: - return _strip_matching_provider_prefix(name, provider) + result = _strip_matching_provider_prefix(name, provider) + # Some providers require lowercase model IDs (e.g. Xiaomi's API + # rejects "MiMo-V2.5-Pro" but accepts "mimo-v2.5-pro"). + if provider in _LOWERCASE_MODEL_PROVIDERS: + result = result.lower() + return result # --- Authoritative native providers: preserve user-facing slugs as-is --- if provider in _AUTHORITATIVE_NATIVE_PROVIDERS: diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index a7c98d9c8..6402fa469 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]: # Alias resolution # --------------------------------------------------------------------------- +def _model_sort_key(model_id: str, prefix: str) -> tuple: + """Sort key for model version preference. + + Extracts version numbers after the family prefix and returns a sort key + that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.) + are used as tiebreakers, with common quality indicators ranked. + + Examples (with prefix ``"mimo"``):: + + mimo-v2.5-pro → (-2.5, 0, 'pro') # highest version wins + mimo-v2.5 → (-2.5, 1, '') # no suffix = lower than pro + mimo-v2-pro → (-2.0, 0, 'pro') + mimo-v2-omni → (-2.0, 1, 'omni') + mimo-v2-flash → (-2.0, 1, 'flash') + """ + # Strip the prefix (and optional "/" separator for aggregator slugs) + rest = model_id[len(prefix):] + if rest.startswith("/"): + rest = rest[1:] + rest = rest.lstrip("-").strip() + + # Parse version and suffix from the remainder. + # "v2.5-pro" → version [2.5], suffix "pro" + # "-omni" → version [], suffix "omni" + # State machine: start → in_version → between → in_suffix + nums: list[float] = [] + suffix_buf = "" + state = "start" + num_buf = "" + + for ch in rest: + if state == "start": + if ch in "vV": + state = "in_version" + elif ch.isdigit(): + state = "in_version" + num_buf += ch + elif ch in "-_.": + pass # skip separators before any content + else: + state = "in_suffix" + suffix_buf += ch + elif state == "in_version": + if ch.isdigit(): + num_buf += ch + elif ch == ".": + if "." in num_buf: + # Second dot — flush current number, start new component + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + num_buf = "" + else: + num_buf += ch + elif ch in "-_.": + if num_buf: + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + num_buf = "" + state = "between" + else: + if num_buf: + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + num_buf = "" + state = "in_suffix" + suffix_buf += ch + elif state == "between": + if ch.isdigit(): + state = "in_version" + num_buf = ch + elif ch in "vV": + state = "in_version" + elif ch in "-_.": + pass + else: + state = "in_suffix" + suffix_buf += ch + elif state == "in_suffix": + suffix_buf += ch + + # Flush remaining buffer (strip trailing dots — "5.4." → "5.4") + if num_buf and state == "in_version": + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + + suffix = suffix_buf.lower().strip("-_.") + suffix = suffix.strip() + + # Negate versions so higher → sorts first + version_key = tuple(-n for n in nums) + + # Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite + # Lower number = preferred + _SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0} + suffix_rank = _SUFFIX_RANK.get(suffix, 1) + + return version_key + (suffix_rank, suffix) + + def resolve_alias( raw_input: str, current_provider: str, @@ -311,9 +418,9 @@ def resolve_alias( """Resolve a short alias against the current provider's catalog. Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the - current provider's models.dev catalog for the first model whose ID - starts with ``vendor/family`` (or just ``family`` for non-aggregator - providers). + current provider's models.dev catalog for the model whose ID starts + with ``vendor/family`` (or just ``family`` for non-aggregator + providers) and has the **highest version**. Returns: ``(provider, resolved_model_id, alias_name)`` if a match is @@ -341,28 +448,44 @@ def resolve_alias( vendor, family = identity - # Search the provider's catalog from models.dev + # Build catalog from models.dev, then merge in static _PROVIDER_MODELS + # entries that models.dev may be missing (e.g. newly added models not + # yet synced to the registry). catalog = list_provider_models(current_provider) - if not catalog: - return None + try: + from hermes_cli.models import _PROVIDER_MODELS + static = _PROVIDER_MODELS.get(current_provider, []) + if static: + seen = {m.lower() for m in catalog} + for m in static: + if m.lower() not in seen: + catalog.append(m) + except Exception: + pass # For aggregators, models are vendor/model-name format aggregator = is_aggregator(current_provider) - for model_id in catalog: - mid_lower = model_id.lower() - if aggregator: - # Match vendor/family prefix -- e.g. "anthropic/claude-sonnet" - prefix = f"{vendor}/{family}".lower() - if mid_lower.startswith(prefix): - return (current_provider, model_id, key) - else: - # Non-aggregator: bare names -- e.g. "claude-sonnet-4-6" - family_lower = family.lower() - if mid_lower.startswith(family_lower): - return (current_provider, model_id, key) + if aggregator: + prefix = f"{vendor}/{family}".lower() + matches = [ + mid for mid in catalog + if mid.lower().startswith(prefix) + ] + else: + family_lower = family.lower() + matches = [ + mid for mid in catalog + if mid.lower().startswith(family_lower) + ] - return None + if not matches: + return None + + # Sort by version descending — prefer the latest/highest version + prefix_for_sort = f"{vendor}/{family}" if aggregator else family + matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort)) + return (current_provider, matches[0], key) def get_authenticated_provider_slugs( @@ -648,7 +771,10 @@ def switch_model( if provider_changed or explicit_provider: try: - runtime = resolve_runtime_provider(requested=target_provider) + runtime = resolve_runtime_provider( + requested=target_provider, + target_model=new_model, + ) api_key = runtime.get("api_key", "") base_url = runtime.get("base_url", "") api_mode = runtime.get("api_mode", "") @@ -665,7 +791,10 @@ def switch_model( ) else: try: - runtime = resolve_runtime_provider(requested=current_provider) + runtime = resolve_runtime_provider( + requested=current_provider, + target_model=new_model, + ) api_key = runtime.get("api_key", "") base_url = runtime.get("base_url", "") api_mode = runtime.get("api_mode", "") @@ -692,6 +821,7 @@ def switch_model( target_provider, api_key=api_key, base_url=base_url, + api_mode=api_mode or None, ) except Exception as e: validation = { @@ -813,7 +943,7 @@ def list_authenticated_providers( from hermes_cli.auth import PROVIDER_REGISTRY from hermes_cli.models import ( OPENROUTER_MODELS, _PROVIDER_MODELS, - _MODELS_DEV_PREFERRED, _merge_with_models_dev, + _MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids, ) results: List[dict] = [] @@ -861,6 +991,14 @@ def list_authenticated_providers( # Check if any env var is set has_creds = any(os.environ.get(ev) for ev in env_vars) + if not has_creds: + try: + from hermes_cli.auth import _load_auth_store + store = _load_auth_store() + if store and hermes_id in store.get("credential_pool", {}): + has_creds = True + except Exception: + pass if not has_creds: continue @@ -972,11 +1110,14 @@ def list_authenticated_providers( if not has_creds: continue - # Use curated list — look up by Hermes slug, fall back to overlay key - model_ids = curated.get(hermes_slug, []) or curated.get(pid, []) - # Merge with models.dev for preferred providers (same rationale as above). - if hermes_slug in _MODELS_DEV_PREFERRED: - model_ids = _merge_with_models_dev(hermes_slug, model_ids) + if hermes_slug in {"copilot", "copilot-acp"}: + model_ids = provider_model_ids(hermes_slug) + else: + # Use curated list — look up by Hermes slug, fall back to overlay key + model_ids = curated.get(hermes_slug, []) or curated.get(pid, []) + # Merge with models.dev for preferred providers (same rationale as above). + if hermes_slug in _MODELS_DEV_PREFERRED: + model_ids = _merge_with_models_dev(hermes_slug, model_ids) total = len(model_ids) top = model_ids[:max_models] @@ -1099,6 +1240,15 @@ def list_authenticated_providers( if m and m not in models_list: models_list.append(m) + # Official OpenAI API rows in providers: often have base_url but no + # explicit models: dict — avoid a misleading zero count in /model. + if not models_list: + url_lower = str(api_url).strip().lower() + if "api.openai.com" in url_lower: + fb = curated.get("openai") or [] + if fb: + models_list = list(fb) + # Try to probe /v1/models if URL is set (but don't block on it) # For now just show what we know from config results.append({ diff --git a/hermes_cli/models.py b/hermes_cli/models.py index bc7f40258..3a902ffdf 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ ("moonshotai/kimi-k2.6", "recommended"), + ("deepseek/deepseek-v4-pro", ""), + ("deepseek/deepseek-v4-flash", ""), ("anthropic/claude-opus-4.7", ""), ("anthropic/claude-opus-4.6", ""), ("anthropic/claude-sonnet-4.6", ""), @@ -40,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("anthropic/claude-sonnet-4.5", ""), ("anthropic/claude-haiku-4.5", ""), ("openrouter/elephant-alpha", "free"), - ("openai/gpt-5.4", ""), + ("openai/gpt-5.5", ""), ("openai/gpt-5.4-mini", ""), ("xiaomi/mimo-v2.5-pro", ""), ("xiaomi/mimo-v2.5", ""), @@ -63,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("nvidia/nemotron-3-super-120b-a12b:free", "free"), ("arcee-ai/trinity-large-preview:free", "free"), ("arcee-ai/trinity-large-thinking", ""), - ("openai/gpt-5.4-pro", ""), + ("openai/gpt-5.5-pro", ""), ("openai/gpt-5.4-nano", ""), ] @@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]: _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ "moonshotai/kimi-k2.6", + "deepseek/deepseek-v4-pro", + "deepseek/deepseek-v4-flash", "xiaomi/mimo-v2.5-pro", "xiaomi/mimo-v2.5", "anthropic/claude-opus-4.7", @@ -116,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "anthropic/claude-sonnet-4.6", "anthropic/claude-sonnet-4.5", "anthropic/claude-haiku-4.5", - "openai/gpt-5.4", + "openai/gpt-5.5", "openai/gpt-5.4-mini", "openai/gpt-5.3-codex", "google/gemini-3-pro-preview", @@ -135,9 +139,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "x-ai/grok-4.20-beta", "nvidia/nemotron-3-super-120b-a12b", "arcee-ai/trinity-large-thinking", - "openai/gpt-5.4-pro", + "openai/gpt-5.5-pro", "openai/gpt-5.4-nano", ], + # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and + # provider_model_ids fallback when /v1/models is unavailable. + "openai": [ + "gpt-5.4", + "gpt-5.4-mini", + "gpt-5-mini", + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-4.1", + "gpt-4o", + "gpt-4o-mini", + ], "openai-codex": _codex_curated_models(), "copilot-acp": [ "copilot-acp", @@ -151,10 +167,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gpt-4.1", "gpt-4o", "gpt-4o-mini", - "claude-opus-4.6", "claude-sonnet-4.6", + "claude-sonnet-4", "claude-sonnet-4.5", "claude-haiku-4.5", + "gemini-3.1-pro-preview", + "gemini-3-pro-preview", + "gemini-3-flash-preview", "gemini-2.5-pro", "grok-code-fast-1", ], @@ -246,10 +265,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "claude-haiku-4-5-20251001", ], "deepseek": [ + "deepseek-v4-pro", + "deepseek-v4-flash", "deepseek-chat", "deepseek-reasoner", ], "xiaomi": [ + "mimo-v2.5-pro", + "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "mimo-v2-flash", @@ -301,6 +324,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "kimi-k2.5", "glm-5.1", "glm-5", + "mimo-v2.5-pro", + "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", @@ -672,7 +697,7 @@ def get_nous_recommended_aux_model( # --------------------------------------------------------------------------- # Canonical provider list — single source of truth for provider identity. # Every code path that lists, displays, or iterates providers derives from -# this list: hermes model, /model, /provider, list_authenticated_providers. +# this list: hermes model, /model, list_authenticated_providers. # # Fields: # slug — internal provider ID (used in config.yaml, --provider flag) @@ -692,7 +717,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"), ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), - ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"), + ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"), ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), @@ -1100,7 +1125,10 @@ def fetch_models_with_pricing( return _pricing_cache[cache_key] url = cache_key.rstrip("/") + "/v1/models" - headers: dict[str, str] = {"Accept": "application/json"} + headers: dict[str, str] = { + "Accept": "application/json", + "User-Agent": _HERMES_USER_AGENT, + } if api_key: headers["Authorization"] = f"Bearer {api_key}" @@ -1674,7 +1702,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) if normalized == "openai-codex": from hermes_cli.codex_models import get_codex_model_ids - return get_codex_model_ids() + # Pass the live OAuth access token so the picker matches whatever + # ChatGPT lists for this account right now (new models appear without + # a Hermes release). Falls back to the hardcoded catalog if no token + # or the endpoint is unreachable. + access_token = None + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + + creds = resolve_codex_runtime_credentials(refresh_if_expiring=True) + access_token = creds.get("api_key") + except Exception: + access_token = None + return get_codex_model_ids(access_token=access_token) if normalized in {"copilot", "copilot-acp"}: try: live = _fetch_github_models(_resolve_copilot_catalog_api_key()) @@ -1720,6 +1760,17 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) live = fetch_ollama_cloud_models(force_refresh=force_refresh) if live: return live + if normalized == "openai": + api_key = os.getenv("OPENAI_API_KEY", "").strip() + if api_key: + base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/") + base = base_raw or "https://api.openai.com/v1" + try: + live = fetch_api_models(api_key, base) + if live: + return live + except Exception: + pass if normalized == "custom": base_url = _get_custom_base_url() if base_url: @@ -1874,6 +1925,51 @@ def fetch_github_model_catalog( return None +# ─── Copilot catalog context-window helpers ───────────────────────────────── + +# Module-level cache: {model_id: max_prompt_tokens} +_copilot_context_cache: dict[str, int] = {} +_copilot_context_cache_time: float = 0.0 +_COPILOT_CONTEXT_CACHE_TTL = 3600 # 1 hour + + +def get_copilot_model_context(model_id: str, api_key: Optional[str] = None) -> Optional[int]: + """Look up max_prompt_tokens for a Copilot model from the live /models API. + + Results are cached in-process for 1 hour to avoid repeated API calls. + Returns the token limit or None if not found. + """ + global _copilot_context_cache, _copilot_context_cache_time + + # Serve from cache if fresh + if _copilot_context_cache and (time.time() - _copilot_context_cache_time < _COPILOT_CONTEXT_CACHE_TTL): + if model_id in _copilot_context_cache: + return _copilot_context_cache[model_id] + # Cache is fresh but model not in it — don't re-fetch + return None + + # Fetch and populate cache + catalog = fetch_github_model_catalog(api_key=api_key) + if not catalog: + return None + + cache: dict[str, int] = {} + for item in catalog: + mid = str(item.get("id") or "").strip() + if not mid: + continue + caps = item.get("capabilities") or {} + limits = caps.get("limits") or {} + max_prompt = limits.get("max_prompt_tokens") + if isinstance(max_prompt, int) and max_prompt > 0: + cache[mid] = max_prompt + + _copilot_context_cache = cache + _copilot_context_cache_time = time.time() + + return cache.get(model_id) + + def _is_github_models_base_url(base_url: Optional[str]) -> bool: normalized = (base_url or "").strip().rstrip("/").lower() return ( @@ -1907,6 +2003,7 @@ _COPILOT_MODEL_ALIASES = { "openai/o4-mini": "gpt-5-mini", "anthropic/claude-opus-4.6": "claude-opus-4.6", "anthropic/claude-sonnet-4.6": "claude-sonnet-4.6", + "anthropic/claude-sonnet-4": "claude-sonnet-4", "anthropic/claude-sonnet-4.5": "claude-sonnet-4.5", "anthropic/claude-haiku-4.5": "claude-haiku-4.5", # Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use @@ -1916,10 +2013,12 @@ _COPILOT_MODEL_ALIASES = { # "model_not_supported". See issue #6879. "claude-opus-4-6": "claude-opus-4.6", "claude-sonnet-4-6": "claude-sonnet-4.6", + "claude-sonnet-4-0": "claude-sonnet-4", "claude-sonnet-4-5": "claude-sonnet-4.5", "claude-haiku-4-5": "claude-haiku-4.5", "anthropic/claude-opus-4-6": "claude-opus-4.6", "anthropic/claude-sonnet-4-6": "claude-sonnet-4.6", + "anthropic/claude-sonnet-4-0": "claude-sonnet-4", "anthropic/claude-sonnet-4-5": "claude-sonnet-4.5", "anthropic/claude-haiku-4-5": "claude-haiku-4.5", } @@ -2144,8 +2243,15 @@ def probe_api_models( api_key: Optional[str], base_url: Optional[str], timeout: float = 5.0, + api_mode: Optional[str] = None, ) -> dict[str, Any]: - """Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics.""" + """Probe a ``/models`` endpoint with light URL heuristics. + + For ``anthropic_messages`` mode, uses ``x-api-key`` and + ``anthropic-version`` headers (Anthropic's native auth) instead of + ``Authorization: Bearer``. The response shape (``data[].id``) is + identical, so the same parser works for both. + """ normalized = (base_url or "").strip().rstrip("/") if not normalized: return { @@ -2177,7 +2283,10 @@ def probe_api_models( tried: list[str] = [] headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT} - if api_key: + if api_key and api_mode == "anthropic_messages": + headers["x-api-key"] = api_key + headers["anthropic-version"] = "2023-06-01" + elif api_key: headers["Authorization"] = f"Bearer {api_key}" if normalized.startswith(COPILOT_BASE_URL): headers.update(copilot_default_headers()) @@ -2219,7 +2328,10 @@ def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]: base_url = AI_GATEWAY_BASE_URL url = base_url.rstrip("/") + "/models" - headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"} + headers: dict[str, str] = { + "Authorization": f"Bearer {api_key}", + "User-Agent": _HERMES_USER_AGENT, + } req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=timeout) as resp: @@ -2239,13 +2351,14 @@ def fetch_api_models( api_key: Optional[str], base_url: Optional[str], timeout: float = 5.0, + api_mode: Optional[str] = None, ) -> Optional[list[str]]: """Fetch the list of available model IDs from the provider's ``/models`` endpoint. Returns a list of model ID strings, or ``None`` if the endpoint could not be reached (network error, timeout, auth failure, etc.). """ - return probe_api_models(api_key, base_url, timeout=timeout).get("models") + return probe_api_models(api_key, base_url, timeout=timeout, api_mode=api_mode).get("models") # --------------------------------------------------------------------------- @@ -2373,6 +2486,7 @@ def validate_requested_model( *, api_key: Optional[str] = None, base_url: Optional[str] = None, + api_mode: Optional[str] = None, ) -> dict[str, Any]: """ Validate a ``/model`` value for the active provider. @@ -2414,7 +2528,11 @@ def validate_requested_model( } if normalized == "custom": - probe = probe_api_models(api_key, base_url) + # Try probing with correct auth for the api_mode. + if api_mode == "anthropic_messages": + probe = probe_api_models(api_key, base_url, api_mode=api_mode) + else: + probe = probe_api_models(api_key, base_url) api_models = probe.get("models") if api_models is not None: if requested_for_lookup in set(api_models): @@ -2463,12 +2581,17 @@ def validate_requested_model( f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. " f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification." ) + if api_mode == "anthropic_messages": + message += ( + "\n Many Anthropic-compatible proxies do not implement the Models API " + "(GET /v1/models). The model name has been accepted without verification." + ) if probe.get("suggested_base_url"): message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`" return { - "accepted": False, - "persist": False, + "accepted": api_mode == "anthropic_messages", + "persist": True, "recognized": False, "message": message, } @@ -2556,10 +2679,100 @@ def validate_requested_model( ), } + # Native Anthropic provider: /v1/models requires x-api-key (or Bearer for + # OAuth) plus anthropic-version headers. The generic OpenAI-style probe + # below uses plain Bearer auth and 401s against Anthropic, so dispatch to + # the native fetcher which handles both API keys and Claude-Code OAuth + # tokens. (The api_mode=="anthropic_messages" branch below handles the + # Messages-API transport case separately.) + if normalized == "anthropic": + anthropic_models = _fetch_anthropic_models() + if anthropic_models is not None: + if requested_for_lookup in set(anthropic_models): + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + auto = get_close_matches(requested_for_lookup, anthropic_models, n=1, cutoff=0.9) + if auto: + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": auto[0], + "message": f"Auto-corrected `{requested}` → `{auto[0]}`", + } + suggestions = get_close_matches(requested, anthropic_models, n=3, cutoff=0.5) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) + # Accept anyway — Anthropic sometimes gates newer/preview models + # (e.g. snapshot IDs, early-access releases) behind accounts + # even though they aren't listed on /v1/models. + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in Anthropic's /v1/models listing. " + f"It may still work if you have early-access or snapshot IDs." + f"{suggestion_text}" + ), + } + # _fetch_anthropic_models returned None — no token resolvable or + # network failure. Fall through to the generic warning below. + + # Anthropic Messages API: many proxies don't implement /v1/models. + # Try probing with correct auth; if it fails, accept with a warning. + if api_mode == "anthropic_messages": + api_models = fetch_api_models(api_key, base_url, api_mode=api_mode) + if api_models is not None: + if requested_for_lookup in set(api_models): + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9) + if auto: + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": auto[0], + "message": f"Auto-corrected `{requested}` → `{auto[0]}`", + } + # Probe failed or model not found — accept anyway (proxy likely + # doesn't implement the Anthropic Models API). + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: could not verify `{requested}` against this endpoint's " + f"model listing. Many Anthropic-compatible proxies do not " + f"implement GET /v1/models. The model name has been accepted " + f"without verification." + ), + } + # Probe the live API to check if the model actually exists api_models = fetch_api_models(api_key, base_url) if api_models is not None: + # Gemini's OpenAI-compat /v1beta/openai/models endpoint returns IDs + # prefixed with "models/" (e.g. "models/gemini-2.5-flash") — native + # Gemini-API convention. Our curated list and user input both use + # the bare ID, so a direct set-membership check drops every known + # Gemini model. Strip the prefix before comparison. See #12532. + if normalized == "gemini": + api_models = [ + m[len("models/"):] if isinstance(m, str) and m.startswith("models/") else m + for m in api_models + ] if requested_for_lookup in set(api_models): # API confirmed the model exists return { diff --git a/hermes_cli/platforms.py b/hermes_cli/platforms.py index 1fc3a3a85..05507eace 100644 --- a/hermes_cli/platforms.py +++ b/hermes_cli/platforms.py @@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([ ("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")), ("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")), ("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")), + ("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")), ]) diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 28cb3b1b5..7eb9a400c 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -71,6 +71,14 @@ VALID_HOOKS: Set[str] = { "on_session_finalize", "on_session_reset", "subagent_stop", + # Gateway pre-dispatch hook. Fired once per incoming MessageEvent + # after the internal-event guard but BEFORE auth/pairing and agent + # dispatch. Plugins may return a dict to influence flow: + # {"action": "skip", "reason": "..."} -> drop message (no reply) + # {"action": "rewrite", "text": "..."} -> replace event.text, continue + # {"action": "allow"} / None -> normal dispatch + # Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store. + "pre_gateway_dispatch", } ENTRY_POINTS_GROUP = "hermes_agent.plugins" diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index e842086a4..f65ceac7a 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -116,6 +116,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { transport="openai_chat", base_url_env_var="DASHSCOPE_BASE_URL", ), + "alibaba-coding-plan": HermesOverlay( + transport="openai_chat", + base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL", + ), "vercel": HermesOverlay( transport="openai_chat", is_aggregator=True, @@ -259,6 +263,9 @@ ALIASES: Dict[str, str] = { "aliyun": "alibaba", "qwen": "alibaba", "alibaba-cloud": "alibaba", + "alibaba_coding": "alibaba-coding-plan", + "alibaba-coding": "alibaba-coding-plan", + "alibaba_coding_plan": "alibaba-coding-plan", # google-gemini-cli (OAuth + Code Assist) "gemini-cli": "google-gemini-cli", diff --git a/hermes_cli/pty_bridge.py b/hermes_cli/pty_bridge.py new file mode 100644 index 000000000..9a8a73bad --- /dev/null +++ b/hermes_cli/pty_bridge.py @@ -0,0 +1,229 @@ +"""PTY bridge for `hermes dashboard` chat tab. + +Wraps a child process behind a pseudo-terminal so its ANSI output can be +streamed to a browser-side terminal emulator (xterm.js) and typed +keystrokes can be fed back in. The only caller today is the +``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``. + +Design constraints: + +* **POSIX-only.** Hermes Agent supports Windows exclusively via WSL, which + exposes a native POSIX PTY via ``openpty(3)``. Native Windows Python + has no PTY; :class:`PtyUnavailableError` is raised with a user-readable + install/platform message so the dashboard can render a banner instead of + crashing. +* **Zero Node dependency on the server side.** We use :mod:`ptyprocess`, + which is a pure-Python wrapper around the OS calls. The browser talks + to the same ``hermes --tui`` binary it would launch from the CLI, so + every TUI feature (slash popover, model picker, tool rows, markdown, + skin engine, clarify/sudo/approval prompts) ships automatically. +* **Byte-safe I/O.** Reads and writes go through the PTY master fd + directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because + streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land + mid-read. +""" + +from __future__ import annotations + +import errno +import fcntl +import os +import select +import signal +import struct +import sys +import termios +import time +from typing import Optional, Sequence + +try: + import ptyprocess # type: ignore + _PTY_AVAILABLE = not sys.platform.startswith("win") +except ImportError: # pragma: no cover - dev env without ptyprocess + ptyprocess = None # type: ignore + _PTY_AVAILABLE = False + + +__all__ = ["PtyBridge", "PtyUnavailableError"] + + +class PtyUnavailableError(RuntimeError): + """Raised when a PTY cannot be created on this platform. + + Today this means native Windows (no ConPTY bindings) or a dev + environment missing the ``ptyprocess`` dependency. The dashboard + surfaces the message to the user as a chat-tab banner. + """ + + +class PtyBridge: + """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming. + + Not thread-safe. A single bridge is owned by the WebSocket handler + that spawned it; the reader runs in an executor thread while writes + happen on the event-loop thread. Both sides are OK because the + kernel PTY is the actual synchronization point — we never call + :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and + ``os.write`` on the master fd, which is safe. + """ + + def __init__(self, proc: "ptyprocess.PtyProcess"): # type: ignore[name-defined] + self._proc = proc + self._fd: int = proc.fd + self._closed = False + + # -- lifecycle -------------------------------------------------------- + + @classmethod + def is_available(cls) -> bool: + """True if a PTY can be spawned on this platform.""" + return bool(_PTY_AVAILABLE) + + @classmethod + def spawn( + cls, + argv: Sequence[str], + *, + cwd: Optional[str] = None, + env: Optional[dict] = None, + cols: int = 80, + rows: int = 24, + ) -> "PtyBridge": + """Spawn ``argv`` behind a new PTY and return a bridge. + + Raises :class:`PtyUnavailableError` if the platform can't host a + PTY. Raises :class:`FileNotFoundError` or :class:`OSError` for + ordinary exec failures (missing binary, bad cwd, etc.). + """ + if not _PTY_AVAILABLE: + if sys.platform.startswith("win"): + raise PtyUnavailableError( + "Pseudo-terminals are unavailable on this platform. " + "Hermes Agent supports Windows only via WSL." + ) + if ptyprocess is None: + raise PtyUnavailableError( + "The `ptyprocess` package is missing. " + "Install with: pip install ptyprocess " + "(or pip install -e '.[pty]')." + ) + raise PtyUnavailableError("Pseudo-terminals are unavailable.") + # Let caller-supplied env fully override inheritance; if they pass + # None we inherit the server's env (same semantics as subprocess). + spawn_env = os.environ.copy() if env is None else env + proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr] + list(argv), + cwd=cwd, + env=spawn_env, + dimensions=(rows, cols), + ) + return cls(proc) + + @property + def pid(self) -> int: + return int(self._proc.pid) + + def is_alive(self) -> bool: + if self._closed: + return False + try: + return bool(self._proc.isalive()) + except Exception: + return False + + # -- I/O -------------------------------------------------------------- + + def read(self, timeout: float = 0.2) -> Optional[bytes]: + """Read up to 64 KiB of raw bytes from the PTY master. + + Returns: + * bytes — zero or more bytes of child output + * empty bytes (``b""``) — no data available within ``timeout`` + * None — child has exited and the master fd is at EOF + + Never blocks longer than ``timeout`` seconds. Safe to call after + :meth:`close`; returns ``None`` in that case. + """ + if self._closed: + return None + try: + readable, _, _ = select.select([self._fd], [], [], timeout) + except (OSError, ValueError): + return None + if not readable: + return b"" + try: + data = os.read(self._fd, 65536) + except OSError as exc: + # EIO on Linux = slave side closed. EBADF = already closed. + if exc.errno in (errno.EIO, errno.EBADF): + return None + raise + if not data: + return None + return data + + def write(self, data: bytes) -> None: + """Write raw bytes to the PTY master (i.e. the child's stdin).""" + if self._closed or not data: + return + # os.write can return a short write under load; loop until drained. + view = memoryview(data) + while view: + try: + n = os.write(self._fd, view) + except OSError as exc: + if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE): + return + raise + if n <= 0: + return + view = view[n:] + + def resize(self, cols: int, rows: int) -> None: + """Forward a terminal resize to the child via ``TIOCSWINSZ``.""" + if self._closed: + return + # struct winsize: rows, cols, xpixel, ypixel (all unsigned short) + winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0) + try: + fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize) + except OSError: + pass + + # -- teardown --------------------------------------------------------- + + def close(self) -> None: + """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds. + + Idempotent. Reaping the child is important so we don't leak + zombies across the lifetime of the dashboard process. + """ + if self._closed: + return + self._closed = True + + # SIGHUP is the conventional "your terminal went away" signal. + # We escalate if the child ignores it. + for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL): + if not self._proc.isalive(): + break + try: + self._proc.kill(sig) + except Exception: + pass + deadline = time.monotonic() + 0.5 + while self._proc.isalive() and time.monotonic() < deadline: + time.sleep(0.02) + + try: + self._proc.close(force=True) + except Exception: + pass + + # Context-manager sugar — handy in tests and ad-hoc scripts. + def __enter__(self) -> "PtyBridge": + return self + + def __exit__(self, *_exc) -> None: + self.close() diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 922946e2a..cbfcbdbd6 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -36,6 +36,29 @@ def _normalize_custom_provider_name(value: str) -> str: return value.strip().lower().replace(" ", "-") +def _loopback_hostname(host: str) -> bool: + h = (host or "").lower().rstrip(".") + return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"} + + +def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool: + """Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution. + + GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a + previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``, + so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions. + """ + cfg_provider_norm = (cfg_provider or "").strip().lower() + bu = (cfg_base_url or "").strip() + if not bu: + return False + if cfg_provider_norm == "custom": + return True + if base_url_host_matches(bu, "openrouter.ai"): + return False + return _loopback_hostname(base_url_hostname(bu)) + + def _detect_api_mode_for_url(base_url: str) -> Optional[str]: """Auto-detect api_mode from the resolved base URL. @@ -160,8 +183,16 @@ def _resolve_runtime_from_pool_entry( requested_provider: str, model_cfg: Optional[Dict[str, Any]] = None, pool: Optional[CredentialPool] = None, + target_model: Optional[str] = None, ) -> Dict[str, Any]: model_cfg = model_cfg or _get_model_config() + # When the caller is resolving for a specific target model (e.g. a /model + # mid-session switch), prefer that over the persisted model.default. This + # prevents api_mode being computed from a stale config default that no + # longer matches the model actually being used — the bug that caused + # opencode-zen /v1 to be stripped for chat_completions requests when + # config.default was still a Claude model. + effective_model = (target_model or model_cfg.get("default") or "") base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/") api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") api_mode = "chat_completions" @@ -207,7 +238,7 @@ def _resolve_runtime_from_pool_entry( api_mode = configured_mode elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode - api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) + api_mode = opencode_model_api_mode(provider, effective_model) else: # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, # Kimi /coding, api.openai.com → codex_responses, api.x.ai → @@ -323,12 +354,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An # Found match by provider key base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or "" if base_url: - return { + result = { "name": entry.get("name", ep_name), "base_url": base_url.strip(), "api_key": resolved_api_key, "model": entry.get("default_model", ""), } + api_mode = _parse_api_mode(entry.get("api_mode")) + if api_mode: + result["api_mode"] = api_mode + return result # Also check the 'name' field if present display_name = entry.get("name", "") if display_name: @@ -337,12 +372,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An # Found match by display name base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or "" if base_url: - return { + result = { "name": display_name, "base_url": base_url.strip(), "api_key": resolved_api_key, "model": entry.get("default_model", ""), } + api_mode = _parse_api_mode(entry.get("api_mode")) + if api_mode: + result["api_mode"] = api_mode + return result # Fall back to custom_providers: list (legacy format) custom_providers = config.get("custom_providers") @@ -464,6 +503,7 @@ def _resolve_openrouter_runtime( cfg_provider = cfg_provider.strip().lower() env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() + env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip() # Use config base_url when available and the provider context matches. # OPENAI_BASE_URL env var is no longer consulted — config.yaml is @@ -473,11 +513,14 @@ def _resolve_openrouter_runtime( if requested_norm == "auto": if not cfg_provider or cfg_provider == "auto": use_config_base_url = True - elif requested_norm == "custom" and cfg_provider == "custom": + elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom( + cfg_base_url, cfg_provider + ): use_config_base_url = True base_url = ( (explicit_base_url or "").strip() + or env_custom_base_url or (cfg_base_url.strip() if use_config_base_url else "") or env_openrouter_base_url or OPENROUTER_BASE_URL @@ -689,8 +732,18 @@ def resolve_runtime_provider( requested: Optional[str] = None, explicit_api_key: Optional[str] = None, explicit_base_url: Optional[str] = None, + target_model: Optional[str] = None, ) -> Dict[str, Any]: - """Resolve runtime provider credentials for agent execution.""" + """Resolve runtime provider credentials for agent execution. + + target_model: Optional override for model_cfg.get("default") when + computing provider-specific api_mode (e.g. OpenCode Zen/Go where different + models route through different API surfaces). Callers performing an + explicit mid-session model switch should pass the new model here so + api_mode is derived from the model they are switching TO, not the stale + persisted default. Other callers can leave it None to preserve existing + behavior (api_mode derived from config). + """ requested_provider = resolve_requested_provider(requested) custom_runtime = _resolve_named_custom_runtime( @@ -772,6 +825,7 @@ def resolve_runtime_provider( requested_provider=requested_provider, model_cfg=model_cfg, pool=pool, + target_model=target_model, ) if provider == "nous": @@ -990,7 +1044,11 @@ def resolve_runtime_provider( api_mode = configured_mode elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode - api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) + # Prefer the target_model from the caller (explicit mid-session + # switch) over the stale model.default; see _resolve_runtime_from_pool_entry + # for the same rationale. + _effective = target_model or model_cfg.get("default", "") + api_mode = opencode_model_api_mode(provider, _effective) else: # Auto-detect Anthropic-compatible endpoints by URL convention # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 362961689..e28acd41b 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = { "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], - "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"], + "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"], "huggingface": [ "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", @@ -500,6 +500,15 @@ def _print_setup_summary(config: dict, hermes_home): if get_env_value("HASS_TOKEN"): tool_status.append(("Smart Home (Home Assistant)", True, None)) + # Spotify (OAuth via hermes auth spotify — check auth.json, not env vars) + try: + from hermes_cli.auth import get_provider_auth_state + _spotify_state = get_provider_auth_state("spotify") or {} + if _spotify_state.get("access_token") or _spotify_state.get("refresh_token"): + tool_status.append(("Spotify (PKCE OAuth)", True, None)) + except Exception: + pass + # Skills Hub if get_env_value("GITHUB_TOKEN"): tool_status.append(("Skills Hub (GitHub)", True, None)) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 8541f0a05..d07e1a822 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -164,19 +164,26 @@ def show_status(args): qwen_status = {} nous_logged_in = bool(nous_status.get("logged_in")) + nous_error = nous_status.get("error") + nous_label = "logged in" if nous_logged_in else "not logged in (run: hermes auth add nous --type oauth)" print( f" {'Nous Portal':<12} {check_mark(nous_logged_in)} " - f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}" + f"{nous_label}" ) - if nous_logged_in: - portal_url = nous_status.get("portal_base_url") or "(unknown)" - access_exp = _format_iso_timestamp(nous_status.get("access_expires_at")) - key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at")) - refresh_label = "yes" if nous_status.get("has_refresh_token") else "no" + portal_url = nous_status.get("portal_base_url") or "(unknown)" + access_exp = _format_iso_timestamp(nous_status.get("access_expires_at")) + key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at")) + refresh_label = "yes" if nous_status.get("has_refresh_token") else "no" + if nous_logged_in or portal_url != "(unknown)" or nous_error: print(f" Portal URL: {portal_url}") + if nous_logged_in or nous_status.get("access_expires_at"): print(f" Access exp: {access_exp}") + if nous_logged_in or nous_status.get("agent_key_expires_at"): print(f" Key exp: {key_exp}") + if nous_logged_in or nous_status.get("has_refresh_token"): print(f" Refresh: {refresh_label}") + if nous_error and not nous_logged_in: + print(f" Error: {nous_error}") codex_logged_in = bool(codex_status.get("logged_in")) print( diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index 24acc15f5..db66e1db1 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -127,7 +127,7 @@ TIPS = [ # --- Tools & Capabilities --- "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.", - "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.", + "delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.", "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.", "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.", "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.", @@ -289,6 +289,7 @@ TIPS = [ "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.", "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.", "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.", + "agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.", "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.", "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.", "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.", diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index e89f96178..32645aea3 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -67,12 +67,13 @@ CONFIGURABLE_TOOLSETS = [ ("messaging", "📨 Cross-Platform Messaging", "send_message"), ("rl", "🧪 RL Training", "Tinker-Atropos training tools"), ("homeassistant", "🏠 Home Assistant", "smart home device control"), + ("spotify", "🎵 Spotify", "playback, search, playlists, library"), ] # Toolsets that are OFF by default for new installs. # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled), # but the setup checklist won't pre-select them for first-time users. -_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"} +_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"} def _get_effective_configurable_toolsets(): @@ -361,6 +362,18 @@ TOOL_CATEGORIES = { }, ], }, + "spotify": { + "name": "Spotify", + "icon": "🎵", + "providers": [ + { + "name": "Spotify Web API", + "tag": "PKCE OAuth — opens the setup wizard", + "env_vars": [], + "post_setup": "spotify", + }, + ], + }, "rl": { "name": "RL Training", "icon": "🧪", @@ -461,6 +474,35 @@ def _run_post_setup(post_setup_key: str): _print_warning(" kittentts install timed out (>5min)") _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + elif post_setup_key == "spotify": + # Run the full `hermes auth spotify` flow — if the user has no + # client_id yet, this drops them into the interactive wizard + # (opens the Spotify dashboard, prompts for client_id, persists + # to ~/.hermes/.env), then continues straight into PKCE. If they + # already have an app, it skips the wizard and just does OAuth. + from types import SimpleNamespace + try: + from hermes_cli.auth import login_spotify_command + except Exception as exc: + _print_warning(f" Could not load Spotify auth: {exc}") + _print_info(" Run manually: hermes auth spotify") + return + _print_info(" Starting Spotify login...") + try: + login_spotify_command(SimpleNamespace( + client_id=None, redirect_uri=None, scope=None, + no_browser=False, timeout=None, + )) + _print_success(" Spotify authenticated") + except SystemExit as exc: + # User aborted the wizard, or OAuth failed — don't fail the + # toolset enable; they can retry with `hermes auth spotify`. + _print_warning(f" Spotify login did not complete: {exc}") + _print_info(" Run later: hermes auth spotify") + except Exception as exc: + _print_warning(f" Spotify login failed: {exc}") + _print_info(" Run manually: hermes auth spotify") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") @@ -590,7 +632,10 @@ def _get_platform_tools( default_off.remove(platform) enabled_toolsets -= default_off - # Plugin toolsets: enabled by default unless explicitly disabled. + # Plugin toolsets: enabled by default unless explicitly disabled, or + # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify — + # shipped as a bundled plugin but user must opt in via `hermes tools` + # so we don't ship 7 Spotify tool schemas to users who don't use it). # A plugin toolset is "known" for a platform once `hermes tools` # has been saved for that platform (tracked via known_plugin_toolsets). # Unknown plugins default to enabled; known-but-absent = disabled. @@ -602,6 +647,9 @@ def _get_platform_tools( if pts in toolset_names: # Explicitly listed in config — enabled enabled_toolsets.add(pts) + elif pts in _DEFAULT_OFF_TOOLSETS: + # Opt-in plugin toolset — stay off until user picks it + continue elif pts not in known_for_platform: # New plugin not yet seen by hermes tools — default enabled enabled_toolsets.add(pts) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py new file mode 100644 index 000000000..0a355ce4f --- /dev/null +++ b/hermes_cli/voice.py @@ -0,0 +1,548 @@ +"""Process-wide voice recording + TTS API for the TUI gateway. + +Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool`` +(text-to-speech) behind idempotent, stateful entry points that the gateway's +``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can +call from a dedicated thread. The gateway imports this module lazily so that +missing optional audio deps (sounddevice, faster-whisper, numpy) surface as +an ``ImportError`` at call time, not at startup. + +Two usage modes are exposed: + +* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single + manually-bounded capture used when the caller drives the start/stop pair + explicitly. +* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors + the classic CLI voice mode: recording auto-stops on silence, transcribes, + hands the result to a callback, and then auto-restarts for the next turn. + Three consecutive no-speech cycles stop the loop and fire + ``on_silent_limit`` so the UI can turn the mode off. +""" + +from __future__ import annotations + +import logging +import os +import sys +import threading +from typing import Any, Callable, Optional + +from tools.voice_mode import ( + create_audio_recorder, + is_whisper_hallucination, + play_audio_file, + transcribe_recording, +) + +logger = logging.getLogger(__name__) + + +def _debug(msg: str) -> None: + """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1. + + Goes to stderr so the TUI gateway wraps it as a gateway.stderr event, + which createGatewayEventHandler shows as an Activity line — exactly + what we need to diagnose "why didn't the loop auto-restart?" in the + user's real terminal without shipping a separate debug RPC. + + Any OSError / BrokenPipeError is swallowed because this fires from + background threads (silence callback, TTS daemon, beep) where a + broken stderr pipe must not kill the whole gateway — the main + command pipe (stdin+stdout) is what actually matters. + """ + if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1": + return + try: + print(f"[voice] {msg}", file=sys.stderr, flush=True) + except (BrokenPipeError, OSError): + pass + + +def _beeps_enabled() -> bool: + """CLI parity: voice.beep_enabled in config.yaml (default True).""" + try: + from hermes_cli.config import load_config + + voice_cfg = load_config().get("voice", {}) + if isinstance(voice_cfg, dict): + return bool(voice_cfg.get("beep_enabled", True)) + except Exception: + pass + return True + + +def _play_beep(frequency: int, count: int = 1) -> None: + """Audible cue matching cli.py's record/stop beeps. + + 880 Hz single-beep on start (cli.py:_voice_start_recording line 7532), + 660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585). + Best-effort — sounddevice failures are silently swallowed so the + voice loop never breaks because a speaker was unavailable. + """ + if not _beeps_enabled(): + return + try: + from tools.voice_mode import play_beep + + play_beep(frequency=frequency, count=count) + except Exception as e: + _debug(f"beep {frequency}Hz failed: {e}") + +# ── Push-to-talk state ─────────────────────────────────────────────── +_recorder = None +_recorder_lock = threading.Lock() + +# ── Continuous (VAD) state ─────────────────────────────────────────── +_continuous_lock = threading.Lock() +_continuous_active = False +_continuous_recorder: Any = None + +# ── TTS-vs-STT feedback guard ──────────────────────────────────────── +# When TTS plays the agent reply over the speakers, the live microphone +# picks it up and transcribes the agent's own voice as user input — an +# infinite loop the agent happily joins ("Ha, looks like we're in a loop"). +# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is +# playing, set while silent. _continuous_on_silence waits on it before +# re-arming the recorder, and speak_text itself cancels any live capture +# before starting playback so the tail of the previous utterance doesn't +# leak into the mic. +_tts_playing = threading.Event() +_tts_playing.set() # initially "not playing" +_continuous_on_transcript: Optional[Callable[[str], None]] = None +_continuous_on_status: Optional[Callable[[str], None]] = None +_continuous_on_silent_limit: Optional[Callable[[], None]] = None +_continuous_no_speech_count = 0 +_CONTINUOUS_NO_SPEECH_LIMIT = 3 + + +# ── Push-to-talk API ───────────────────────────────────────────────── + + +def start_recording() -> None: + """Begin capturing from the default input device (push-to-talk). + + Idempotent — calling again while a recording is in progress is a no-op. + """ + global _recorder + + with _recorder_lock: + if _recorder is not None and getattr(_recorder, "is_recording", False): + return + rec = create_audio_recorder() + rec.start() + _recorder = rec + + +def stop_and_transcribe() -> Optional[str]: + """Stop the active push-to-talk recording, transcribe, return text. + + Returns ``None`` when no recording is active, when the microphone + captured no speech, or when Whisper returned a known hallucination. + """ + global _recorder + + with _recorder_lock: + rec = _recorder + _recorder = None + + if rec is None: + return None + + wav_path = rec.stop() + if not wav_path: + return None + + try: + result = transcribe_recording(wav_path) + except Exception as e: + logger.warning("voice transcription failed: %s", e) + return None + finally: + try: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception: + pass + + # transcribe_recording returns {"success": bool, "transcript": str, ...} + # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript"). + if not result.get("success"): + return None + text = (result.get("transcript") or "").strip() + if not text or is_whisper_hallucination(text): + return None + + return text + + +# ── Continuous (VAD) API ───────────────────────────────────────────── + + +def start_continuous( + on_transcript: Callable[[str], None], + on_status: Optional[Callable[[str], None]] = None, + on_silent_limit: Optional[Callable[[], None]] = None, + silence_threshold: int = 200, + silence_duration: float = 3.0, +) -> None: + """Start a VAD-driven continuous recording loop. + + The loop calls ``on_transcript(text)`` each time speech is detected and + transcribed successfully, then auto-restarts. After + ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech + picked up at all) the loop stops itself and calls ``on_silent_limit`` + so the UI can reflect "voice off". Idempotent — calling while already + active is a no-op. + + ``on_status`` is called with ``"listening"`` / ``"transcribing"`` / + ``"idle"`` so the UI can show a live indicator. + """ + global _continuous_active, _continuous_recorder + global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit + global _continuous_no_speech_count + + with _continuous_lock: + if _continuous_active: + _debug("start_continuous: already active — no-op") + return + _continuous_active = True + _continuous_on_transcript = on_transcript + _continuous_on_status = on_status + _continuous_on_silent_limit = on_silent_limit + _continuous_no_speech_count = 0 + + if _continuous_recorder is None: + _continuous_recorder = create_audio_recorder() + + _continuous_recorder._silence_threshold = silence_threshold + _continuous_recorder._silence_duration = silence_duration + rec = _continuous_recorder + + _debug( + f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)" + ) + + # CLI parity: single 880 Hz beep *before* opening the stream — placing + # the beep after stream.start() on macOS triggers a CoreAudio conflict + # (cli.py:7528 comment). + _play_beep(frequency=880, count=1) + + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to start continuous recording: %s", e) + _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + raise + + if on_status: + try: + on_status("listening") + except Exception: + pass + + +def stop_continuous() -> None: + """Stop the active continuous loop and release the microphone. + + Idempotent — calling while not active is a no-op. Any in-flight + transcription completes but its result is discarded (the callback + checks ``_continuous_active`` before firing). + """ + global _continuous_active, _continuous_on_transcript + global _continuous_on_status, _continuous_on_silent_limit + global _continuous_recorder, _continuous_no_speech_count + + with _continuous_lock: + if not _continuous_active: + return + _continuous_active = False + rec = _continuous_recorder + on_status = _continuous_on_status + _continuous_on_transcript = None + _continuous_on_status = None + _continuous_on_silent_limit = None + _continuous_no_speech_count = 0 + + if rec is not None: + try: + # cancel() (not stop()) discards buffered frames — the loop + # is over, we don't want to transcribe a half-captured turn. + rec.cancel() + except Exception as e: + logger.warning("failed to cancel recorder: %s", e) + + # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the + # silence-auto-stop path plays). + _play_beep(frequency=660, count=2) + + if on_status: + try: + on_status("idle") + except Exception: + pass + + +def is_continuous_active() -> bool: + """Whether a continuous voice loop is currently running.""" + with _continuous_lock: + return _continuous_active + + +def _continuous_on_silence() -> None: + """AudioRecorder silence callback — runs in a daemon thread. + + Stops the current capture, transcribes, delivers the text via + ``on_transcript``, and — if the loop is still active — starts the + next capture. Three consecutive silent cycles end the loop. + """ + global _continuous_active, _continuous_no_speech_count + + _debug("_continuous_on_silence: fired") + + with _continuous_lock: + if not _continuous_active: + _debug("_continuous_on_silence: loop inactive — abort") + return + rec = _continuous_recorder + on_transcript = _continuous_on_transcript + on_status = _continuous_on_status + on_silent_limit = _continuous_on_silent_limit + + if rec is None: + _debug("_continuous_on_silence: no recorder — abort") + return + + if on_status: + try: + on_status("transcribing") + except Exception: + pass + + wav_path = rec.stop() + # Peak RMS is the critical diagnostic when stop() returns None despite + # the VAD firing — tells us at a glance whether the mic was too quiet + # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree. + peak_rms = getattr(rec, "_peak_rms", -1) + _debug( + f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})" + ) + + # CLI parity: double 660 Hz beep after the stream stops (safe from the + # CoreAudio conflict that blocks pre-start beeps). + _play_beep(frequency=660, count=2) + + transcript: Optional[str] = None + + if wav_path: + try: + result = transcribe_recording(wav_path) + # transcribe_recording returns {"success": bool, "transcript": str, + # "error": str?} — NOT {"text": str}. Using the wrong key silently + # produced empty transcripts even when Groq/local STT returned fine, + # which masqueraded as "not hearing the user" to the caller. + success = bool(result.get("success")) + text = (result.get("transcript") or "").strip() + err = result.get("error") + _debug( + f"_continuous_on_silence: transcribe -> success={success} " + f"text={text!r} err={err!r}" + ) + if success and text and not is_whisper_hallucination(text): + transcript = text + except Exception as e: + logger.warning("continuous transcription failed: %s", e) + _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}") + finally: + try: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception: + pass + + with _continuous_lock: + if not _continuous_active: + # User stopped us while we were transcribing — discard. + _debug("_continuous_on_silence: stopped during transcribe — no restart") + return + if transcript: + _continuous_no_speech_count = 0 + else: + _continuous_no_speech_count += 1 + should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT + no_speech = _continuous_no_speech_count + + if transcript and on_transcript: + try: + on_transcript(transcript) + except Exception as e: + logger.warning("on_transcript callback raised: %s", e) + + if should_halt: + _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting") + with _continuous_lock: + _continuous_active = False + _continuous_no_speech_count = 0 + if on_silent_limit: + try: + on_silent_limit() + except Exception: + pass + try: + rec.cancel() + except Exception: + pass + if on_status: + try: + on_status("idle") + except Exception: + pass + return + + # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to + # finish before re-arming the mic, then leave a small gap to avoid + # catching the tail of the speaker output. Without this the voice + # loop becomes a feedback loop — the agent's spoken reply lands + # back in the mic and gets re-submitted. + if not _tts_playing.is_set(): + _debug("_continuous_on_silence: waiting for TTS to finish") + _tts_playing.wait(timeout=60) + import time as _time + _time.sleep(0.3) + + # User may have stopped the loop during the wait. + with _continuous_lock: + if not _continuous_active: + _debug("_continuous_on_silence: stopped while waiting for TTS") + return + + # Restart for the next turn. + _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") + _play_beep(frequency=880, count=1) + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to restart continuous recording: %s", e) + _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + return + + if on_status: + try: + on_status("listening") + except Exception: + pass + + +# ── TTS API ────────────────────────────────────────────────────────── + + +def speak_text(text: str) -> None: + """Synthesize ``text`` with the configured TTS provider and play it. + + Mirrors cli.py:_voice_speak_response exactly — same markdown strip + pipeline, same 4000-char cap, same explicit mp3 output path, same + MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup + of both extensions. Keeping these in sync means a voice-mode TTS + session in the TUI sounds identical to one in the classic CLI. + + While playback is in flight the module-level _tts_playing Event is + cleared so the continuous-recording loop knows to wait before + re-arming the mic (otherwise the agent's spoken reply feedback-loops + through the microphone and the agent ends up replying to itself). + """ + if not text or not text.strip(): + return + + import re + import tempfile + import time + + # Cancel any live capture before we open the speakers — otherwise the + # last ~200ms of the user's turn tail + the first syllables of our TTS + # both end up in the next recording window. The continuous loop will + # re-arm itself after _tts_playing flips back (see _continuous_on_silence). + paused_recording = False + with _continuous_lock: + if ( + _continuous_active + and _continuous_recorder is not None + and getattr(_continuous_recorder, "is_recording", False) + ): + try: + _continuous_recorder.cancel() + paused_recording = True + except Exception as e: + logger.warning("failed to pause recorder for TTS: %s", e) + + _tts_playing.clear() + _debug(f"speak_text: TTS begin (paused_recording={paused_recording})") + + try: + from tools.tts_tool import text_to_speech_tool + + tts_text = text[:4000] if len(text) > 4000 else text + tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks + tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text + tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs + tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold + tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic + tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code + tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers + tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets + tts_text = re.sub(r'---+', '', tts_text) # horizontal rules + tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines + tts_text = tts_text.strip() + if not tts_text: + return + + # MP3 output path, pre-chosen so we can play the MP3 directly even + # when text_to_speech_tool auto-converts to OGG for messaging + # platforms. afplay's OGG support is flaky, MP3 always works. + os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True) + mp3_path = os.path.join( + tempfile.gettempdir(), + "hermes_voice", + f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3", + ) + + _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}") + text_to_speech_tool(text=tts_text, output_path=mp3_path) + + if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0: + _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)") + play_audio_file(mp3_path) + try: + os.unlink(mp3_path) + ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg" + if os.path.isfile(ogg_path): + os.unlink(ogg_path) + except OSError: + pass + else: + _debug(f"speak_text: TTS tool produced no audio at {mp3_path}") + except Exception as e: + logger.warning("Voice TTS playback failed: %s", e) + _debug(f"speak_text raised {type(e).__name__}: {e}") + finally: + _tts_playing.set() + _debug("speak_text: TTS done") + + # Re-arm the mic so the user can answer without pressing Ctrl+B. + # Small delay lets the OS flush speaker output and afplay fully + # release the audio device before sounddevice re-opens the input. + if paused_recording: + time.sleep(0.3) + with _continuous_lock: + if _continuous_active and _continuous_recorder is not None: + try: + _continuous_recorder.start( + on_silence_stop=_continuous_on_silence + ) + _debug("speak_text: recording resumed after TTS") + except Exception as e: + logger.warning( + "failed to resume recorder after TTS: %s", e + ) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 9cdfdb37d..8c33a383e 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -49,7 +49,7 @@ from hermes_cli.config import ( from gateway.status import get_running_pid, read_runtime_status try: - from fastapi import FastAPI, HTTPException, Request + from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles @@ -71,6 +71,11 @@ app = FastAPI(title="Hermes Agent", version=__version__) # Injected into the SPA HTML so only the legitimate web UI can use it. # --------------------------------------------------------------------------- _SESSION_TOKEN = secrets.token_urlsafe(32) +_SESSION_HEADER_NAME = "X-Hermes-Session-Token" + +# In-browser Chat tab (/chat, /api/pty, …). Off unless ``hermes dashboard --tui`` +# or HERMES_DASHBOARD_TUI=1. Set from :func:`start_server`. +_DASHBOARD_EMBEDDED_CHAT_ENABLED = False # Simple rate limiter for the reveal endpoint _reveal_timestamps: List[float] = [] @@ -104,14 +109,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({ }) -def _require_token(request: Request) -> None: - """Validate the ephemeral session token. Raises 401 on mismatch. +def _has_valid_session_token(request: Request) -> bool: + """True if the request carries a valid dashboard session token. - Uses ``hmac.compare_digest`` to prevent timing side-channels. + The dedicated session header avoids collisions with reverse proxies that + already use ``Authorization`` (for example Caddy ``basic_auth``). We still + accept the legacy Bearer path for backward compatibility with older + dashboard bundles. """ + session_header = request.headers.get(_SESSION_HEADER_NAME, "") + if session_header and hmac.compare_digest( + session_header.encode(), + _SESSION_TOKEN.encode(), + ): + return True + auth = request.headers.get("authorization", "") expected = f"Bearer {_SESSION_TOKEN}" - if not hmac.compare_digest(auth.encode(), expected.encode()): + return hmac.compare_digest(auth.encode(), expected.encode()) + + +def _require_token(request: Request) -> None: + """Validate the ephemeral session token. Raises 401 on mismatch.""" + if not _has_valid_session_token(request): raise HTTPException(status_code=401, detail="Unauthorized") @@ -205,9 +225,7 @@ async def auth_middleware(request: Request, call_next): """Require the session token on all /api/ routes except the public list.""" path = request.url.path if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"): - auth = request.headers.get("authorization", "") - expected = f"Bearer {_SESSION_TOKEN}" - if not hmac.compare_digest(auth.encode(), expected.encode()): + if not _has_valid_session_token(request): return JSONResponse( status_code=401, content={"detail": "Unauthorized"}, @@ -269,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = { "display.busy_input_mode": { "type": "select", "description": "Input behavior while agent is running", - "options": ["queue", "interrupt", "block"], + "options": ["interrupt", "queue"], }, "memory.provider": { "type": "select", @@ -417,7 +435,14 @@ class EnvVarReveal(BaseModel): _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL") -_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3")) +try: + _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3")) +except (ValueError, TypeError): + _log.warning( + "Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s", + os.getenv("GATEWAY_HEALTH_TIMEOUT"), + ) + _GATEWAY_HEALTH_TIMEOUT = 3.0 def _probe_gateway_health() -> tuple[bool, dict | None]: @@ -1508,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]: with urllib.request.urlopen(req, timeout=20) as resp: result = json.loads(resp.read().decode()) except Exception as e: - sess["status"] = "error" - sess["error_message"] = f"Token exchange failed: {e}" + with _oauth_sessions_lock: + sess["status"] = "error" + sess["error_message"] = f"Token exchange failed: {e}" return {"ok": False, "status": "error", "message": sess["error_message"]} access_token = result.get("access_token", "") refresh_token = result.get("refresh_token", "") expires_in = int(result.get("expires_in") or 3600) if not access_token: - sess["status"] = "error" - sess["error_message"] = "No access token returned" + with _oauth_sessions_lock: + sess["status"] = "error" + sess["error_message"] = "No access token returned" return {"ok": False, "status": "error", "message": sess["error_message"]} expires_at_ms = int(time.time() * 1000) + (expires_in * 1000) try: _save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms) except Exception as e: - sess["status"] = "error" - sess["error_message"] = f"Save failed: {e}" + with _oauth_sessions_lock: + sess["status"] = "error" + sess["error_message"] = f"Save failed: {e}" return {"ok": False, "status": "error", "message": sess["error_message"]} - sess["status"] = "approved" + with _oauth_sessions_lock: + sess["status"] = "approved" _log.info("oauth/pkce: anthropic login completed (session=%s)", session_id) return {"ok": True, "status": "approved"} @@ -2242,6 +2271,329 @@ async def get_usage_analytics(days: int = 30): db.close() +# --------------------------------------------------------------------------- +# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab. +# +# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind +# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a +# WebSocket. The browser renders the ANSI through xterm.js (see +# web/src/pages/ChatPage.tsx). +# +# Auth: ``?token=`` query param (browsers can't set +# Authorization on the WS upgrade). Same ephemeral ``_SESSION_TOKEN`` as +# REST. Localhost-only — we defensively reject non-loopback clients even +# though uvicorn binds to 127.0.0.1. +# --------------------------------------------------------------------------- + +import re +import asyncio + +from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError + +_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]") +_PTY_READ_CHUNK_TIMEOUT = 0.2 +_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$") +# Starlette's TestClient reports the peer as "testclient"; treat it as +# loopback so tests don't need to rewrite request scope. +_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"}) + +# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard) +# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id +# the chat tab generates on mount; entries auto-evict when the last subscriber +# drops AND the publisher has disconnected. +_event_channels: dict[str, set] = {} +_event_lock = asyncio.Lock() + + +def _resolve_chat_argv( + resume: Optional[str] = None, + sidecar_url: Optional[str] = None, +) -> tuple[list[str], Optional[str], Optional[dict]]: + """Resolve the argv + cwd + env for the chat PTY. + + Default: whatever ``hermes --tui`` would run. Tests monkeypatch this + function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``) + so nothing has to build Node or the TUI bundle. + + Session resume is propagated via the ``HERMES_TUI_RESUME`` env var — + matching what ``hermes_cli.main._launch_tui`` does for the CLI path. + Appending ``--resume `` to argv doesn't work because ``ui-tui`` does + not parse its argv. + + `sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so + the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the + dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`). + """ + from hermes_cli.main import PROJECT_ROOT, _make_tui_argv + + argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False) + env: Optional[dict] = None + + if resume or sidecar_url: + env = os.environ.copy() + + if resume: + env["HERMES_TUI_RESUME"] = resume + + if sidecar_url: + env["HERMES_TUI_SIDECAR_URL"] = sidecar_url + + return list(argv), str(cwd) if cwd else None, env + + +def _build_sidecar_url(channel: str) -> Optional[str]: + """ws:// URL the PTY child should publish events to, or None when unbound.""" + host = getattr(app.state, "bound_host", None) + port = getattr(app.state, "bound_port", None) + + if not host or not port: + return None + + netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}" + qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel}) + + return f"ws://{netloc}/api/pub?{qs}" + + +async def _broadcast_event(channel: str, payload: str) -> None: + """Fan out one publisher frame to every subscriber on `channel`.""" + async with _event_lock: + subs = list(_event_channels.get(channel, ())) + + for sub in subs: + try: + await sub.send_text(payload) + except Exception: + # Subscriber went away mid-send; the /api/events finally clause + # will remove it from the registry on its next iteration. + pass + + +def _channel_or_close_code(ws: WebSocket) -> Optional[str]: + """Return the channel id from the query string or None if invalid.""" + channel = ws.query_params.get("channel", "") + + return channel if _VALID_CHANNEL_RE.match(channel) else None + + +@app.websocket("/api/pty") +async def pty_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + # --- auth + loopback check (before accept so we can close cleanly) --- + token = ws.query_params.get("token", "") + expected = _SESSION_TOKEN + if not hmac.compare_digest(token.encode(), expected.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + await ws.accept() + + # --- spawn PTY ------------------------------------------------------ + resume = ws.query_params.get("resume") or None + channel = _channel_or_close_code(ws) + sidecar_url = _build_sidecar_url(channel) if channel else None + + try: + argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url) + except SystemExit as exc: + # _make_tui_argv calls sys.exit(1) when node/npm is missing. + await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n") + await ws.close(code=1011) + return + + + try: + bridge = PtyBridge.spawn(argv, cwd=cwd, env=env) + except PtyUnavailableError as exc: + await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n") + await ws.close(code=1011) + return + except (FileNotFoundError, OSError) as exc: + await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n") + await ws.close(code=1011) + return + + loop = asyncio.get_running_loop() + + # --- reader task: PTY master → WebSocket ---------------------------- + async def pump_pty_to_ws() -> None: + while True: + chunk = await loop.run_in_executor( + None, bridge.read, _PTY_READ_CHUNK_TIMEOUT + ) + if chunk is None: # EOF + return + if not chunk: # no data this tick; yield control and retry + await asyncio.sleep(0) + continue + try: + await ws.send_bytes(chunk) + except Exception: + return + + reader_task = asyncio.create_task(pump_pty_to_ws()) + + # --- writer loop: WebSocket → PTY master ---------------------------- + try: + while True: + msg = await ws.receive() + msg_type = msg.get("type") + if msg_type == "websocket.disconnect": + break + raw = msg.get("bytes") + if raw is None: + text = msg.get("text") + raw = text.encode("utf-8") if isinstance(text, str) else b"" + if not raw: + continue + + # Resize escape is consumed locally, never written to the PTY. + match = _RESIZE_RE.match(raw) + if match and match.end() == len(raw): + cols = int(match.group(1)) + rows = int(match.group(2)) + bridge.resize(cols=cols, rows=rows) + continue + + bridge.write(raw) + except WebSocketDisconnect: + pass + finally: + reader_task.cancel() + try: + await reader_task + except (asyncio.CancelledError, Exception): + pass + bridge.close() + + +# --------------------------------------------------------------------------- +# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab. +# +# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the +# dashboard can render structured metadata (model badge, tool-call sidebar, +# slash launcher, session info) alongside the xterm.js terminal that PTY +# already paints. Both transports bind to the same session id when one is +# active, so a tool.start emitted by the agent fans out to both sinks. +# --------------------------------------------------------------------------- + + +@app.websocket("/api/ws") +async def gateway_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + token = ws.query_params.get("token", "") + if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + from tui_gateway.ws import handle_ws + + await handle_ws(ws) + + +# --------------------------------------------------------------------------- +# /api/pub + /api/events — chat-tab event broadcast. +# +# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by +# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every +# dispatcher emit through it. The dashboard fans those frames out to any +# subscriber that opened /api/events on the same channel id. This is what +# gives the React sidebar its tool-call feed without breaking the PTY +# child's stdio handshake with Ink. +# --------------------------------------------------------------------------- + + +@app.websocket("/api/pub") +async def pub_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + token = ws.query_params.get("token", "") + if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + channel = _channel_or_close_code(ws) + if not channel: + await ws.close(code=4400) + return + + await ws.accept() + + try: + while True: + await _broadcast_event(channel, await ws.receive_text()) + except WebSocketDisconnect: + pass + + +@app.websocket("/api/events") +async def events_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + token = ws.query_params.get("token", "") + if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + channel = _channel_or_close_code(ws) + if not channel: + await ws.close(code=4400) + return + + await ws.accept() + + async with _event_lock: + _event_channels.setdefault(channel, set()).add(ws) + + try: + while True: + # Subscribers don't speak — the receive() just blocks until + # disconnect so the connection stays open as long as the + # browser holds it. + await ws.receive_text() + except WebSocketDisconnect: + pass + finally: + async with _event_lock: + subs = _event_channels.get(channel) + + if subs is not None: + subs.discard(ws) + + if not subs: + _event_channels.pop(channel, None) + + def mount_spa(application: FastAPI): """Mount the built SPA. Falls back to index.html for client-side routing. @@ -2263,8 +2615,10 @@ def mount_spa(application: FastAPI): def _serve_index(): """Return index.html with the session token injected.""" html = _index_path.read_text() + chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false" token_script = ( - f'' + f'" ) html = html.replace("", f"{token_script}", 1) return HTMLResponse( @@ -2304,8 +2658,227 @@ _BUILTIN_DASHBOARD_THEMES = [ ] +def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]: + """Normalise a theme layer spec from YAML into `{hex, alpha}` form. + + Accepts shorthand (a bare hex string) or full dict form. Returns + ``None`` on garbage input so the caller can fall back to a built-in + default rather than blowing up. + """ + if value is None: + return {"hex": default_hex, "alpha": default_alpha} + if isinstance(value, str): + return {"hex": value, "alpha": default_alpha} + if isinstance(value, dict): + hex_val = value.get("hex", default_hex) + alpha_val = value.get("alpha", default_alpha) + if not isinstance(hex_val, str): + return None + try: + alpha_f = float(alpha_val) + except (TypeError, ValueError): + alpha_f = default_alpha + return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))} + return None + + +_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = { + "fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif', + "fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace', + "baseSize": "15px", + "lineHeight": "1.55", + "letterSpacing": "0", +} + +_THEME_DEFAULT_LAYOUT: Dict[str, str] = { + "radius": "0.5rem", + "density": "comfortable", +} + +_THEME_OVERRIDE_KEYS = { + "card", "cardForeground", "popover", "popoverForeground", + "primary", "primaryForeground", "secondary", "secondaryForeground", + "muted", "mutedForeground", "accent", "accentForeground", + "destructive", "destructiveForeground", "success", "warning", + "border", "input", "ring", +} + +# Well-known named asset slots themes can populate. Any other keys under +# ``assets.custom`` are exposed as ``--theme-asset-custom-`` CSS vars +# for plugin/shell use. +_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"} + +# Component-style buckets themes can override. The value under each bucket +# is a mapping from camelCase property name to CSS string; each pair emits +# ``--component--`` on :root. The frontend's shell +# components (Card, App header, Backdrop, etc.) consume these vars so themes +# can restyle chrome (clip-path, border-image, segmented progress, etc.) +# without shipping their own CSS. +_THEME_COMPONENT_BUCKETS = { + "card", "header", "footer", "sidebar", "tab", + "progress", "badge", "backdrop", "page", +} + +_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"} + +# Cap on customCSS length so a malformed/oversized theme YAML can't blow up +# the response payload or the + + + + + +``` + +Key implementation patterns: +- **Seeded randomness**: Always `randomSeed()` + `noiseSeed()` for reproducibility +- **Color mode**: Use `colorMode(HSB, 360, 100, 100, 100)` for intuitive color control +- **State separation**: CONFIG for parameters, PALETTE for colors, globals for mutable state +- **Class-based entities**: Particles, agents, shapes as classes with `update()` + `display()` methods +- **Offscreen buffers**: `createGraphics()` for layered composition, trails, masks + +### Step 4: Preview & Iterate + +- Open HTML file directly in browser — no server needed for basic sketches +- For `loadImage()`/`loadFont()` from local files: use `scripts/serve.sh` or `python3 -m http.server` +- Chrome DevTools Performance tab to verify 60fps +- Test at target export resolution, not just the window size +- Adjust parameters until the visual matches the concept from Step 1 + +### Step 5: Export + +| Format | Method | Command | +|--------|--------|---------| +| **PNG** | `saveCanvas('output', 'png')` in `keyPressed()` | Press 's' to save | +| **High-res PNG** | Puppeteer headless capture | `node scripts/export-frames.js sketch.html --width 3840 --height 2160 --frames 1` | +| **GIF** | `saveGif('output', 5)` — captures N seconds | Press 'g' to save | +| **Frame sequence** | `saveFrames('frame', 'png', 10, 30)` — 10s at 30fps | Then `ffmpeg -i frame-%04d.png -c:v libx264 output.mp4` | +| **MP4** | Puppeteer frame capture + ffmpeg | `bash scripts/render.sh sketch.html output.mp4 --duration 30 --fps 30` | +| **SVG** | `createCanvas(w, h, SVG)` with p5.js-svg | `save('output.svg')` | + +### Step 6: Quality Verification + +- **Does it match the vision?** Compare output to the creative concept. If it looks generic, go back to Step 1 +- **Resolution check**: Is it sharp at the target display size? No aliasing artifacts? +- **Performance check**: Does it hold 60fps in browser? (30fps minimum for animations) +- **Color check**: Do the colors work together? Test on both light and dark monitors +- **Edge cases**: What happens at canvas edges? On resize? After running for 10 minutes? + +## Critical Implementation Notes + +### Performance — Disable FES First + +The Friendly Error System (FES) adds up to 10x overhead. Disable it in every production sketch: + +```javascript +p5.disableFriendlyErrors = true; // BEFORE setup() + +function setup() { + pixelDensity(1); // prevent 2x-4x overdraw on retina + createCanvas(1920, 1080); +} +``` + +In hot loops (particles, pixel ops), use `Math.*` instead of p5 wrappers — measurably faster: + +```javascript +// In draw() or update() hot paths: +let a = Math.sin(t); // not sin(t) +let r = Math.sqrt(dx*dx+dy*dy); // not dist() — or better: skip sqrt, compare magSq +let v = Math.random(); // not random() — when seed not needed +let m = Math.min(a, b); // not min(a, b) +``` + +Never `console.log()` inside `draw()`. Never manipulate DOM in `draw()`. See `references/troubleshooting.md` § Performance. + +### Seeded Randomness — Always + +Every generative sketch must be reproducible. Same seed, same output. + +```javascript +function setup() { + randomSeed(CONFIG.seed); + noiseSeed(CONFIG.seed); + // All random() and noise() calls now deterministic +} +``` + +Never use `Math.random()` for generative content — only for performance-critical non-visual code. Always `random()` for visual elements. If you need a random seed: `CONFIG.seed = floor(random(99999))`. + +### Generative Art Platform Support (fxhash / Art Blocks) + +For generative art platforms, replace p5's PRNG with the platform's deterministic random: + +```javascript +// fxhash convention +const SEED = $fx.hash; // unique per mint +const rng = $fx.rand; // deterministic PRNG +$fx.features({ palette: 'warm', complexity: 'high' }); + +// In setup(): +randomSeed(SEED); // for p5's noise() +noiseSeed(SEED); + +// Replace random() with rng() for platform determinism +let x = rng() * width; // instead of random(width) +``` + +See `references/export-pipeline.md` § Platform Export. + +### Color Mode — Use HSB + +HSB (Hue, Saturation, Brightness) is dramatically easier to work with than RGB for generative art: + +```javascript +colorMode(HSB, 360, 100, 100, 100); +// Now: fill(hue, sat, bri, alpha) +// Rotate hue: fill((baseHue + offset) % 360, 80, 90) +// Desaturate: fill(hue, sat * 0.3, bri) +// Darken: fill(hue, sat, bri * 0.5) +``` + +Never hardcode raw RGB values. Define a palette object, derive variations procedurally. See `references/color-systems.md`. + +### Noise — Multi-Octave, Not Raw + +Raw `noise(x, y)` looks like smooth blobs. Layer octaves for natural texture: + +```javascript +function fbm(x, y, octaves = 4) { + let val = 0, amp = 1, freq = 1, sum = 0; + for (let i = 0; i < octaves; i++) { + val += noise(x * freq, y * freq) * amp; + sum += amp; + amp *= 0.5; + freq *= 2; + } + return val / sum; +} +``` + +For flowing organic forms, use **domain warping**: feed noise output back as noise input coordinates. See `references/visual-effects.md`. + +### createGraphics() for Layers — Not Optional + +Flat single-pass rendering looks flat. Use offscreen buffers for composition: + +```javascript +let bgLayer, fgLayer, trailLayer; +function setup() { + createCanvas(1920, 1080); + bgLayer = createGraphics(width, height); + fgLayer = createGraphics(width, height); + trailLayer = createGraphics(width, height); +} +function draw() { + renderBackground(bgLayer); + renderTrails(trailLayer); // persistent, fading + renderForeground(fgLayer); // cleared each frame + image(bgLayer, 0, 0); + image(trailLayer, 0, 0); + image(fgLayer, 0, 0); +} +``` + +### Performance — Vectorize Where Possible + +p5.js draw calls are expensive. For thousands of particles: + +```javascript +// SLOW: individual shapes +for (let p of particles) { + ellipse(p.x, p.y, p.size); +} + +// FAST: single shape with beginShape() +beginShape(POINTS); +for (let p of particles) { + vertex(p.x, p.y); +} +endShape(); + +// FASTEST: pixel buffer for massive counts +loadPixels(); +for (let p of particles) { + let idx = 4 * (floor(p.y) * width + floor(p.x)); + pixels[idx] = r; pixels[idx+1] = g; pixels[idx+2] = b; pixels[idx+3] = 255; +} +updatePixels(); +``` + +See `references/troubleshooting.md` § Performance. + +### Instance Mode for Multiple Sketches + +Global mode pollutes `window`. For production, use instance mode: + +```javascript +const sketch = (p) => { + p.setup = function() { + p.createCanvas(800, 800); + }; + p.draw = function() { + p.background(0); + p.ellipse(p.mouseX, p.mouseY, 50); + }; +}; +new p5(sketch, 'canvas-container'); +``` + +Required when embedding multiple sketches on one page or integrating with frameworks. + +### WebGL Mode Gotchas + +- `createCanvas(w, h, WEBGL)` — origin is center, not top-left +- Y-axis is inverted (positive Y goes up in WEBGL, down in P2D) +- `translate(-width/2, -height/2)` to get P2D-like coordinates +- `push()`/`pop()` around every transform — matrix stack overflows silently +- `texture()` before `rect()`/`plane()` — not after +- Custom shaders: `createShader(vert, frag)` — test on multiple browsers + +### Export — Key Bindings Convention + +Every sketch should include these in `keyPressed()`: + +```javascript +function keyPressed() { + if (key === 's' || key === 'S') saveCanvas('output', 'png'); + if (key === 'g' || key === 'G') saveGif('output', 5); + if (key === 'r' || key === 'R') { randomSeed(millis()); noiseSeed(millis()); } + if (key === ' ') CONFIG.paused = !CONFIG.paused; +} +``` + +### Headless Video Export — Use noLoop() + +For headless rendering via Puppeteer, the sketch **must** use `noLoop()` in setup. Without it, p5's draw loop runs freely while screenshots are slow — the sketch races ahead and you get skipped/duplicate frames. + +```javascript +function setup() { + createCanvas(1920, 1080); + pixelDensity(1); + noLoop(); // capture script controls frame advance + window._p5Ready = true; // signal readiness to capture script +} +``` + +The bundled `scripts/export-frames.js` detects `_p5Ready` and calls `redraw()` once per capture for exact 1:1 frame correspondence. See `references/export-pipeline.md` § Deterministic Capture. + +For multi-scene videos, use the per-clip architecture: one HTML per scene, render independently, stitch with `ffmpeg -f concat`. See `references/export-pipeline.md` § Per-Clip Architecture. + +### Agent Workflow + +When building p5.js sketches: + +1. **Write the HTML file** — single self-contained file, all code inline +2. **Open in browser** — `open sketch.html` (macOS) or `xdg-open sketch.html` (Linux) +3. **Local assets** (fonts, images) require a server: `python3 -m http.server 8080` in the project directory, then open `http://localhost:8080/sketch.html` +4. **Export PNG/GIF** — add `keyPressed()` shortcuts as shown above, tell the user which key to press +5. **Headless export** — `node scripts/export-frames.js sketch.html --frames 300` for automated frame capture (sketch must use `noLoop()` + `_p5Ready`) +6. **MP4 rendering** — `bash scripts/render.sh sketch.html output.mp4 --duration 30` +7. **Iterative refinement** — edit the HTML file, user refreshes browser to see changes +8. **Load references on demand** — use `skill_view(name="p5js", file_path="references/...")` to load specific reference files as needed during implementation + +## Performance Targets + +| Metric | Target | +|--------|--------| +| Frame rate (interactive) | 60fps sustained | +| Frame rate (animated export) | 30fps minimum | +| Particle count (P2D shapes) | 5,000-10,000 at 60fps | +| Particle count (pixel buffer) | 50,000-100,000 at 60fps | +| Canvas resolution | Up to 3840x2160 (export), 1920x1080 (interactive) | +| File size (HTML) | < 100KB (excluding CDN libraries) | +| Load time | < 2s to first frame | + +## References + +| File | Contents | +|------|----------| +| `references/core-api.md` | Canvas setup, coordinate system, draw loop, `push()`/`pop()`, offscreen buffers, composition patterns, `pixelDensity()`, responsive design | +| `references/shapes-and-geometry.md` | 2D primitives, `beginShape()`/`endShape()`, Bezier/Catmull-Rom curves, `vertex()` systems, custom shapes, `p5.Vector`, signed distance fields, SVG path conversion | +| `references/visual-effects.md` | Noise (Perlin, fractal, domain warp, curl), flow fields, particle systems (physics, flocking, trails), pixel manipulation, texture generation (stipple, hatch, halftone), feedback loops, reaction-diffusion | +| `references/animation.md` | Frame-based animation, easing functions, `lerp()`/`map()`, spring physics, state machines, timeline sequencing, `millis()`-based timing, transition patterns | +| `references/typography.md` | `text()`, `loadFont()`, `textToPoints()`, kinetic typography, text masks, font metrics, responsive text sizing | +| `references/color-systems.md` | `colorMode()`, HSB/HSL/RGB, `lerpColor()`, `paletteLerp()`, procedural palettes, color harmony, `blendMode()`, gradient rendering, curated palette library | +| `references/webgl-and-3d.md` | WEBGL renderer, 3D primitives, camera, lighting, materials, custom geometry, GLSL shaders (`createShader()`, `createFilterShader()`), framebuffers, post-processing | +| `references/interaction.md` | Mouse events, keyboard state, touch input, DOM elements, `createSlider()`/`createButton()`, audio input (p5.sound FFT/amplitude), scroll-driven animation, responsive events | +| `references/export-pipeline.md` | `saveCanvas()`, `saveGif()`, `saveFrames()`, deterministic headless capture, ffmpeg frame-to-video, CCapture.js, SVG export, per-clip architecture, platform export (fxhash), video gotchas | +| `references/troubleshooting.md` | Performance profiling, per-pixel budgets, common mistakes, browser compatibility, WebGL debugging, font loading issues, pixel density traps, memory leaks, CORS | +| `templates/viewer.html` | Interactive viewer template: seed navigation (prev/next/random/jump), parameter sliders, download PNG, responsive canvas. Start from this for explorable generative art | + +--- + +## Creative Divergence (use only when user requests experimental/creative/unique output) + +If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code. + +- **Conceptual Blending** — when the user names two things to combine or wants hybrid aesthetics +- **SCAMPER** — when the user wants a twist on a known generative art pattern +- **Distance Association** — when the user gives a single concept and wants exploration ("make something about time") + +### Conceptual Blending +1. Name two distinct visual systems (e.g., particle physics + handwriting) +2. Map correspondences (particles = ink drops, forces = pen pressure, fields = letterforms) +3. Blend selectively — keep mappings that produce interesting emergent visuals +4. Code the blend as a unified system, not two systems side-by-side + +### SCAMPER Transformation +Take a known generative pattern (flow field, particle system, L-system, cellular automata) and systematically transform it: +- **Substitute**: replace circles with text characters, lines with gradients +- **Combine**: merge two patterns (flow field + voronoi) +- **Adapt**: apply a 2D pattern to a 3D projection +- **Modify**: exaggerate scale, warp the coordinate space +- **Purpose**: use a physics sim for typography, a sorting algorithm for color +- **Eliminate**: remove the grid, remove color, remove symmetry +- **Reverse**: run the simulation backward, invert the parameter space + +### Distance Association +1. Anchor on the user's concept (e.g., "loneliness") +2. Generate associations at three distances: + - Close (obvious): empty room, single figure, silence + - Medium (interesting): one fish in a school swimming the wrong way, a phone with no notifications, the gap between subway cars + - Far (abstract): prime numbers, asymptotic curves, the color of 3am +3. Develop the medium-distance associations — they're specific enough to visualize but unexpected enough to be interesting diff --git a/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md b/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md new file mode 100644 index 000000000..beecb38f0 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md @@ -0,0 +1,232 @@ +--- +title: "Pixel Art — Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc" +sidebar_label: "Pixel Art" +description: "Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Pixel Art + +Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc.), and animate them into short videos. Presets cover arcade, SNES, and 10+ era-correct looks. Use `clarify` to let the user pick a style before generating. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/pixel-art` | +| Version | `2.0.0` | +| Author | dodo-reach | +| License | MIT | +| Tags | `creative`, `pixel-art`, `arcade`, `snes`, `nes`, `gameboy`, `retro`, `image`, `video` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Pixel Art + +Convert any image into retro pixel art, then optionally animate it into a short +MP4 or GIF with era-appropriate effects (rain, fireflies, snow, embers). + +Two scripts ship with this skill: + +- `scripts/pixel_art.py` — photo → pixel-art PNG (Floyd-Steinberg dithering) +- `scripts/pixel_art_video.py` — pixel-art PNG → animated MP4 (+ optional GIF) + +Each is importable or runnable directly. Presets snap to hardware palettes +when you want era-accurate colors (NES, Game Boy, PICO-8, etc.), or use +adaptive N-color quantization for arcade/SNES-style looks. + +## When to Use + +- User wants retro pixel art from a source image +- User asks for NES / Game Boy / PICO-8 / C64 / arcade / SNES styling +- User wants a short looping animation (rain scene, night sky, snow, etc.) +- Posters, album covers, social posts, sprites, characters, avatars + +## Workflow + +Before generating, confirm the style with the user. Different presets produce +very different outputs and regenerating is costly. + +### Step 1 — Offer a style + +Call `clarify` with 4 representative presets. Pick the set based on what the +user asked for — don't just dump all 14. + +Default menu when the user's intent is unclear: + +```python +clarify( + question="Which pixel-art style do you want?", + choices=[ + "arcade — bold, chunky 80s cabinet feel (16 colors, 8px)", + "nes — Nintendo 8-bit hardware palette (54 colors, 8px)", + "gameboy — 4-shade green Game Boy DMG", + "snes — cleaner 16-bit look (32 colors, 4px)", + ], +) +``` + +When the user already named an era (e.g. "80s arcade", "Gameboy"), skip +`clarify` and use the matching preset directly. + +### Step 2 — Offer animation (optional) + +If the user asked for a video/GIF, or the output might benefit from motion, +ask which scene: + +```python +clarify( + question="Want to animate it? Pick a scene or skip.", + choices=[ + "night — stars + fireflies + leaves", + "urban — rain + neon pulse", + "snow — falling snowflakes", + "skip — just the image", + ], +) +``` + +Do NOT call `clarify` more than twice in a row. One for style, one for scene if +animation is on the table. If the user explicitly asked for a specific style +and scene in their message, skip `clarify` entirely. + +### Step 3 — Generate + +Run `pixel_art()` first; if animation was requested, chain into +`pixel_art_video()` on the result. + +## Preset Catalog + +| Preset | Era | Palette | Block | Best for | +|--------|-----|---------|-------|----------| +| `arcade` | 80s arcade | adaptive 16 | 8px | Bold posters, hero art | +| `snes` | 16-bit | adaptive 32 | 4px | Characters, detailed scenes | +| `nes` | 8-bit | NES (54) | 8px | True NES look | +| `gameboy` | DMG handheld | 4 green shades | 8px | Monochrome Game Boy | +| `gameboy_pocket` | Pocket handheld | 4 grey shades | 8px | Mono GB Pocket | +| `pico8` | PICO-8 | 16 fixed | 6px | Fantasy-console look | +| `c64` | Commodore 64 | 16 fixed | 8px | 8-bit home computer | +| `apple2` | Apple II hi-res | 6 fixed | 10px | Extreme retro, 6 colors | +| `teletext` | BBC Teletext | 8 pure | 10px | Chunky primary colors | +| `mspaint` | Windows MS Paint | 24 fixed | 8px | Nostalgic desktop | +| `mono_green` | CRT phosphor | 2 green | 6px | Terminal/CRT aesthetic | +| `mono_amber` | CRT amber | 2 amber | 6px | Amber monitor look | +| `neon` | Cyberpunk | 10 neons | 6px | Vaporwave/cyber | +| `pastel` | Soft pastel | 10 pastels | 6px | Kawaii / gentle | + +Named palettes live in `scripts/palettes.py` (see `references/palettes.md` for +the complete list — 28 named palettes total). Any preset can be overridden: + +```python +pixel_art("in.png", "out.png", preset="snes", palette="PICO_8", block=6) +``` + +## Scene Catalog (for video) + +| Scene | Effects | +|-------|---------| +| `night` | Twinkling stars + fireflies + drifting leaves | +| `dusk` | Fireflies + sparkles | +| `tavern` | Dust motes + warm sparkles | +| `indoor` | Dust motes | +| `urban` | Rain + neon pulse | +| `nature` | Leaves + fireflies | +| `magic` | Sparkles + fireflies | +| `storm` | Rain + lightning | +| `underwater` | Bubbles + light sparkles | +| `fire` | Embers + sparkles | +| `snow` | Snowflakes + sparkles | +| `desert` | Heat shimmer + dust | + +## Invocation Patterns + +### Python (import) + +```python +import sys +sys.path.insert(0, "/home/teknium/.hermes/skills/creative/pixel-art/scripts") +from pixel_art import pixel_art +from pixel_art_video import pixel_art_video + +# 1. Convert to pixel art +pixel_art("/path/to/photo.jpg", "/tmp/pixel.png", preset="nes") + +# 2. Animate (optional) +pixel_art_video( + "/tmp/pixel.png", + "/tmp/pixel.mp4", + scene="night", + duration=6, + fps=15, + seed=42, + export_gif=True, +) +``` + +### CLI + +```bash +cd /home/teknium/.hermes/skills/creative/pixel-art/scripts + +python pixel_art.py in.jpg out.png --preset gameboy +python pixel_art.py in.jpg out.png --preset snes --palette PICO_8 --block 6 + +python pixel_art_video.py out.png out.mp4 --scene night --duration 6 --gif +``` + +## Pipeline Rationale + +**Pixel conversion:** +1. Boost contrast/color/sharpness (stronger for smaller palettes) +2. Posterize to simplify tonal regions before quantization +3. Downscale by `block` with `Image.NEAREST` (hard pixels, no interpolation) +4. Quantize with Floyd-Steinberg dithering — against either an adaptive + N-color palette OR a named hardware palette +5. Upscale back with `Image.NEAREST` + +Quantizing AFTER downscale keeps dithering aligned with the final pixel grid. +Quantizing before would waste error-diffusion on detail that disappears. + +**Video overlay:** +- Copies the base frame each tick (static background) +- Overlays stateless-per-frame particle draws (one function per effect) +- Encodes via ffmpeg `libx264 -pix_fmt yuv420p -crf 18` +- Optional GIF via `palettegen` + `paletteuse` + +## Dependencies + +- Python 3.9+ +- Pillow (`pip install Pillow`) +- ffmpeg on PATH (only needed for video — Hermes installs package this) + +## Pitfalls + +- Pallet keys are case-sensitive (`"NES"`, `"PICO_8"`, `"GAMEBOY_ORIGINAL"`). +- Very small sources (<100px wide) collapse under 8-10px blocks. Upscale the + source first if it's tiny. +- Fractional `block` or `palette` will break quantization — keep them positive ints. +- Animation particle counts are tuned for ~640x480 canvases. On very large + images you may want a second pass with a different seed for density. +- `mono_green` / `mono_amber` force `color=0.0` (desaturate). If you override + and keep chroma, the 2-color palette can produce stripes on smooth regions. +- `clarify` loop: call it at most twice per turn (style, then scene). Don't + pepper the user with more picks. + +## Verification + +- PNG is created at the output path +- Clear square pixel blocks visible at the preset's block size +- Color count matches preset (eyeball the image or run `Image.open(p).getcolors()`) +- Video is a valid MP4 (`ffprobe` can open it) with non-zero size + +## Attribution + +Named hardware palettes and the procedural animation loops in `pixel_art_video.py` +are ported from [pixel-art-studio](https://github.com/Synero/pixel-art-studio) +(MIT). See `ATTRIBUTION.md` in this skill directory for details. diff --git a/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md b/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md new file mode 100644 index 000000000..838a1c179 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md @@ -0,0 +1,212 @@ +--- +title: "Popular Web Designs — 54 production-quality design systems extracted from real websites" +sidebar_label: "Popular Web Designs" +description: "54 production-quality design systems extracted from real websites" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Popular Web Designs + +54 production-quality design systems extracted from real websites. Load a template to generate HTML/CSS that matches the visual identity of sites like Stripe, Linear, Vercel, Notion, Airbnb, and more. Each template includes colors, typography, components, layout rules, and ready-to-use CSS values. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/popular-web-designs` | +| Version | `1.0.0` | +| Author | Hermes Agent + Teknium (design systems sourced from VoltAgent/awesome-design-md) | +| License | MIT | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Popular Web Designs + +54 real-world design systems ready for use when generating HTML/CSS. Each template captures a +site's complete visual language: color palette, typography hierarchy, component styles, spacing +system, shadows, responsive behavior, and practical agent prompts with exact CSS values. + +## How to Use + +1. Pick a design from the catalog below +2. Load it: `skill_view(name="popular-web-designs", file_path="templates/.md")` +3. Use the design tokens and component specs when generating HTML +4. Pair with the `generative-widgets` skill to serve the result via cloudflared tunnel + +Each template includes a **Hermes Implementation Notes** block at the top with: +- CDN font substitute and Google Fonts `` tag (ready to paste) +- CSS font-family stacks for primary and monospace +- Reminders to use `write_file` for HTML creation and `browser_vision` for verification + +## HTML Generation Pattern + +```html + + + + + + Page Title + + + + + + + + +``` + +Write the file with `write_file`, serve with the `generative-widgets` workflow (cloudflared tunnel), +and verify the result with `browser_vision` to confirm visual accuracy. + +## Font Substitution Reference + +Most sites use proprietary fonts unavailable via CDN. Each template maps to a Google Fonts +substitute that preserves the design's character. Common mappings: + +| Proprietary Font | CDN Substitute | Character | +|---|---|---| +| Geist / Geist Sans | Geist (on Google Fonts) | Geometric, compressed tracking | +| Geist Mono | Geist Mono (on Google Fonts) | Clean monospace, ligatures | +| sohne-var (Stripe) | Source Sans 3 | Light weight elegance | +| Berkeley Mono | JetBrains Mono | Technical monospace | +| Airbnb Cereal VF | DM Sans | Rounded, friendly geometric | +| Circular (Spotify) | DM Sans | Geometric, warm | +| figmaSans | Inter | Clean humanist | +| Pin Sans (Pinterest) | DM Sans | Friendly, rounded | +| NVIDIA-EMEA | Inter (or Arial system) | Industrial, clean | +| CoinbaseDisplay/Sans | DM Sans | Geometric, trustworthy | +| UberMove | DM Sans | Bold, tight | +| HashiCorp Sans | Inter | Enterprise, neutral | +| waldenburgNormal (Sanity) | Space Grotesk | Geometric, slightly condensed | +| IBM Plex Sans/Mono | IBM Plex Sans/Mono | Available on Google Fonts | +| Rubik (Sentry) | Rubik | Available on Google Fonts | + +When a template's CDN font matches the original (Inter, IBM Plex, Rubik, Geist), no +substitution loss occurs. When a substitute is used (DM Sans for Circular, Source Sans 3 +for sohne-var), follow the template's weight, size, and letter-spacing values closely — +those carry more visual identity than the specific font face. + +## Design Catalog + +### AI & Machine Learning + +| Template | Site | Style | +|---|---|---| +| `claude.md` | Anthropic Claude | Warm terracotta accent, clean editorial layout | +| `cohere.md` | Cohere | Vibrant gradients, data-rich dashboard aesthetic | +| `elevenlabs.md` | ElevenLabs | Dark cinematic UI, audio-waveform aesthetics | +| `minimax.md` | Minimax | Bold dark interface with neon accents | +| `mistral.ai.md` | Mistral AI | French-engineered minimalism, purple-toned | +| `ollama.md` | Ollama | Terminal-first, monochrome simplicity | +| `opencode.ai.md` | OpenCode AI | Developer-centric dark theme, full monospace | +| `replicate.md` | Replicate | Clean white canvas, code-forward | +| `runwayml.md` | RunwayML | Cinematic dark UI, media-rich layout | +| `together.ai.md` | Together AI | Technical, blueprint-style design | +| `voltagent.md` | VoltAgent | Void-black canvas, emerald accent, terminal-native | +| `x.ai.md` | xAI | Stark monochrome, futuristic minimalism, full monospace | + +### Developer Tools & Platforms + +| Template | Site | Style | +|---|---|---| +| `cursor.md` | Cursor | Sleek dark interface, gradient accents | +| `expo.md` | Expo | Dark theme, tight letter-spacing, code-centric | +| `linear.app.md` | Linear | Ultra-minimal dark-mode, precise, purple accent | +| `lovable.md` | Lovable | Playful gradients, friendly dev aesthetic | +| `mintlify.md` | Mintlify | Clean, green-accented, reading-optimized | +| `posthog.md` | PostHog | Playful branding, developer-friendly dark UI | +| `raycast.md` | Raycast | Sleek dark chrome, vibrant gradient accents | +| `resend.md` | Resend | Minimal dark theme, monospace accents | +| `sentry.md` | Sentry | Dark dashboard, data-dense, pink-purple accent | +| `supabase.md` | Supabase | Dark emerald theme, code-first developer tool | +| `superhuman.md` | Superhuman | Premium dark UI, keyboard-first, purple glow | +| `vercel.md` | Vercel | Black and white precision, Geist font system | +| `warp.md` | Warp | Dark IDE-like interface, block-based command UI | +| `zapier.md` | Zapier | Warm orange, friendly illustration-driven | + +### Infrastructure & Cloud + +| Template | Site | Style | +|---|---|---| +| `clickhouse.md` | ClickHouse | Yellow-accented, technical documentation style | +| `composio.md` | Composio | Modern dark with colorful integration icons | +| `hashicorp.md` | HashiCorp | Enterprise-clean, black and white | +| `mongodb.md` | MongoDB | Green leaf branding, developer documentation focus | +| `sanity.md` | Sanity | Red accent, content-first editorial layout | +| `stripe.md` | Stripe | Signature purple gradients, weight-300 elegance | + +### Design & Productivity + +| Template | Site | Style | +|---|---|---| +| `airtable.md` | Airtable | Colorful, friendly, structured data aesthetic | +| `cal.md` | Cal.com | Clean neutral UI, developer-oriented simplicity | +| `clay.md` | Clay | Organic shapes, soft gradients, art-directed layout | +| `figma.md` | Figma | Vibrant multi-color, playful yet professional | +| `framer.md` | Framer | Bold black and blue, motion-first, design-forward | +| `intercom.md` | Intercom | Friendly blue palette, conversational UI patterns | +| `miro.md` | Miro | Bright yellow accent, infinite canvas aesthetic | +| `notion.md` | Notion | Warm minimalism, serif headings, soft surfaces | +| `pinterest.md` | Pinterest | Red accent, masonry grid, image-first layout | +| `webflow.md` | Webflow | Blue-accented, polished marketing site aesthetic | + +### Fintech & Crypto + +| Template | Site | Style | +|---|---|---| +| `coinbase.md` | Coinbase | Clean blue identity, trust-focused, institutional feel | +| `kraken.md` | Kraken | Purple-accented dark UI, data-dense dashboards | +| `revolut.md` | Revolut | Sleek dark interface, gradient cards, fintech precision | +| `wise.md` | Wise | Bright green accent, friendly and clear | + +### Enterprise & Consumer + +| Template | Site | Style | +|---|---|---| +| `airbnb.md` | Airbnb | Warm coral accent, photography-driven, rounded UI | +| `apple.md` | Apple | Premium white space, SF Pro, cinematic imagery | +| `bmw.md` | BMW | Dark premium surfaces, precise engineering aesthetic | +| `ibm.md` | IBM | Carbon design system, structured blue palette | +| `nvidia.md` | NVIDIA | Green-black energy, technical power aesthetic | +| `spacex.md` | SpaceX | Stark black and white, full-bleed imagery, futuristic | +| `spotify.md` | Spotify | Vibrant green on dark, bold type, album-art-driven | +| `uber.md` | Uber | Bold black and white, tight type, urban energy | + +## Choosing a Design + +Match the design to the content: + +- **Developer tools / dashboards:** Linear, Vercel, Supabase, Raycast, Sentry +- **Documentation / content sites:** Mintlify, Notion, Sanity, MongoDB +- **Marketing / landing pages:** Stripe, Framer, Apple, SpaceX +- **Dark mode UIs:** Linear, Cursor, ElevenLabs, Warp, Superhuman +- **Light / clean UIs:** Vercel, Stripe, Notion, Cal.com, Replicate +- **Playful / friendly:** PostHog, Figma, Lovable, Zapier, Miro +- **Premium / luxury:** Apple, BMW, Stripe, Superhuman, Revolut +- **Data-dense / dashboards:** Sentry, Kraken, Cohere, ClickHouse +- **Monospace / terminal aesthetic:** Ollama, OpenCode, x.ai, VoltAgent diff --git a/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md b/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md new file mode 100644 index 000000000..cd0b7fb14 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md @@ -0,0 +1,297 @@ +--- +title: "Songwriting And Ai Music" +sidebar_label: "Songwriting And Ai Music" +description: "Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Songwriting And Ai Music + +Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned. These are tools and ideas, not rules. Break any of them when the art calls for it. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/songwriting-and-ai-music` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Songwriting & AI Music Generation + +Everything here is a GUIDELINE, not a rule. Art breaks rules on purpose. +Use what serves the song. Ignore what doesn't. + +--- + +## 1. Song Structure (Pick One or Invent Your Own) + +Common skeletons — mix, modify, or throw out as needed: + +``` +ABABCB Verse/Chorus/Verse/Chorus/Bridge/Chorus (most pop/rock) +AABA Verse/Verse/Bridge/Verse (refrain-based) (jazz standards, ballads) +ABAB Verse/Chorus alternating (simple, direct) +AAA Verse/Verse/Verse (strophic, no chorus) (folk, storytelling) +``` + +The six building blocks: +- Intro — set the mood, pull the listener in +- Verse — the story, the details, the world-building +- Pre-Chorus — optional tension ramp before the payoff +- Chorus — the emotional core, the part people remember +- Bridge — a detour, a shift in perspective or key +- Outro — the farewell, can echo or subvert the rest + +You don't need all of these. Some great songs are just one section +that evolves. Structure serves the emotion, not the other way around. + +--- + +## 2. Rhyme, Meter, and Sound + +RHYME TYPES (from tight to loose): +- Perfect: lean/mean +- Family: crate/braid +- Assonance: had/glass (same vowels, different endings) +- Consonance: scene/when (different vowels, similar endings) +- Near/slant: enough to suggest connection without locking it down + +Mix them. All perfect rhymes can sound like a nursery rhyme. +All slant rhymes can sound lazy. The blend is where it lives. + +INTERNAL RHYME: Rhyming within a line, not just at the ends. + "We pruned the lies from bleeding trees / Distilled the storm + from entropy" — "lies/flies," "trees/entropy" create internal echoes. + +METER: The rhythm of stressed vs unstressed syllables. +- Matching syllable counts between parallel lines helps singability +- The STRESSED syllables matter more than total count +- Say it out loud. If you stumble, the meter needs work. +- Intentionally breaking meter can create emphasis or surprise + +--- + +## 3. Emotional Arc and Dynamics + +Think of a song as a journey, not a flat road. + +ENERGY MAPPING (rough idea, not prescription): + Intro: 2-3 | Verse: 5-6 | Pre-Chorus: 7 + Chorus: 8-9 | Bridge: varies | Final Chorus: 9-10 + +The most powerful dynamic trick: CONTRAST. +- Whisper before a scream hits harder than just screaming +- Sparse before dense. Slow before fast. Low before high. +- The drop only works because of the buildup +- Silence is an instrument + +"Whisper to roar to whisper" — start intimate, build to full power, +strip back to vulnerability. Works for ballads, epics, anthems. + +--- + +## 4. Writing Lyrics That Work + +SHOW, DON'T TELL (usually): +- "I was sad" = flat +- "Your hoodie's still on the hook by the door" = alive +- But sometimes "I give my life" said plainly IS the power + +THE HOOK: +- The line people remember, hum, repeat +- Usually the title or core phrase +- Works best when melody + lyric + emotion all align +- Place it where it lands hardest (often first/last line of chorus) + +PROSODY — lyrics and music supporting each other: +- Stable feelings (resolution, peace) pair with settled melodies, + perfect rhymes, resolved chords +- Unstable feelings (longing, doubt) pair with wandering melodies, + near-rhymes, unresolved chords +- Verse melody typically sits lower, chorus goes higher +- But flip this if it serves the song + +AVOID (unless you're doing it on purpose): +- Cliches on autopilot ("heart of gold" without earning it) +- Forcing word order to hit a rhyme ("Yoda-speak") +- Same energy in every section (flat dynamics) +- Treating your first draft as sacred — revision is creation + +--- + +## 5. Parody and Adaptation + +When rewriting an existing song with new lyrics: + +THE SKELETON: Map the original's structure first. +- Count syllables per line +- Mark the rhyme scheme (ABAB, AABB, etc.) +- Identify which syllables are STRESSED +- Note where held/sustained notes fall + +FITTING NEW WORDS: +- Match stressed syllables to the same beats as the original +- Total syllable count can flex by 1-2 unstressed syllables +- On long held notes, try to match the VOWEL SOUND of the original + (if original holds "LOOOVE" with an "oo" vowel, "FOOOD" fits + better than "LIFE") +- Monosyllabic swaps in key spots keep rhythm intact + (Crime -> Code, Snake -> Noose) +- Sing your new words over the original — if you stumble, revise + +CONCEPT: +- Pick a concept strong enough to sustain the whole song +- Start from the title/hook and build outward +- Generate lots of raw material (puns, phrases, images) FIRST, + then fit the best ones into the structure +- If you need a specific line somewhere, reverse-engineer the + rhyme scheme backward to set it up + +KEEP SOME ORIGINALS: Leaving a few original lines or structures +intact adds recognizability and lets the audience feel the connection. + +--- + +## 6. Suno AI Prompt Engineering + +### Style/Genre Description Field + +FORMULA (adapt as needed): + Genre + Mood + Era + Instruments + Vocal Style + Production + Dynamics + +``` +BAD: "sad rock song" +GOOD: "Cinematic orchestral spy thriller, 1960s Cold War era, smoky + sultry female vocalist, big band jazz, brass section with + trumpets and french horns, sweeping strings, minor key, + vintage analog warmth" +``` + +DESCRIBE THE JOURNEY, not just the genre: +``` +"Begins as a haunting whisper over sparse piano. Gradually layers + in muted brass. Builds through the chorus with full orchestra. + Second verse erupts with raw belting intensity. Outro strips back + to a lone piano and a fragile whisper fading to silence." +``` + +TIPS: +- V4.5+ supports up to 1,000 chars in Style field — use them +- NO artist names or trademarks. Describe the sound instead. + "1960s Cold War spy thriller brass" not "James Bond style" + "90s grunge" not "Nirvana-style" +- Specify BPM and key when you have a preference +- Use Exclude Styles field for what you DON'T want +- Unexpected genre combos can be gold: "bossa nova trap", + "Appalachian gothic", "chiptune jazz" +- Build a vocal PERSONA, not just a gender: + "A weathered torch singer with a smoky alto, slight rasp, + who starts vulnerable and builds to devastating power" + +### Metatags (place in [brackets] inside lyrics field) + +STRUCTURE: + [Intro] [Verse] [Verse 1] [Pre-Chorus] [Chorus] + [Post-Chorus] [Hook] [Bridge] [Interlude] + [Instrumental] [Instrumental Break] [Guitar Solo] + [Breakdown] [Build-up] [Outro] [Silence] [End] + +VOCAL PERFORMANCE: + [Whispered] [Spoken Word] [Belted] [Falsetto] [Powerful] + [Soulful] [Raspy] [Breathy] [Smooth] [Gritty] + [Staccato] [Legato] [Vibrato] [Melismatic] + [Harmonies] [Choir] [Harmonized Chorus] + +DYNAMICS: + [High Energy] [Low Energy] [Building Energy] [Explosive] + [Emotional Climax] [Gradual swell] [Orchestral swell] + [Quiet arrangement] [Falling tension] [Slow Down] + +GENDER: + [Female Vocals] [Male Vocals] + +ATMOSPHERE: + [Melancholic] [Euphoric] [Nostalgic] [Aggressive] + [Dreamy] [Intimate] [Dark Atmosphere] + +SFX: + [Vinyl Crackle] [Rain] [Applause] [Static] [Thunder] + +Put tags in BOTH style field AND lyrics for reinforcement. +Keep to 5-8 tags per section max — too many confuses the AI. +Don't contradict yourself ([Calm] + [Aggressive] in same section). + +### Custom Mode +- Always use Custom Mode for serious work (separate Style + Lyrics) +- Lyrics field limit: ~3,000 chars (~40-60 lines) +- Always add structural tags — without them Suno defaults to + flat verse/chorus/verse with no emotional arc + +--- + +## 7. Phonetic Tricks for AI Singers + +AI vocalists don't read — they pronounce. Help them: + +PHONETIC RESPELLING: +- Spell words as they SOUND: "through" -> "thru" +- Proper nouns are highest failure rate — test early +- "Nous" -> "Noose" (forces correct pronunciation) +- Hyphenate to guide syllables: "Re-search", "bio-engineering" + +DELIVERY CONTROL: +- ALL CAPS = louder, more intense +- Vowel extension: "lo-o-o-ove" = sustained/melisma +- Ellipses: "I... need... you" = dramatic pauses +- Hyphenated stretch: "ne-e-ed" = emotional stretch + +ALWAYS: +- Spell out numbers: "24/7" -> "twenty four seven" +- Space acronyms: "AI" -> "A I" or "A-I" +- Test proper nouns/unusual words in a short 30-second clip first +- Once generated, pronunciation is baked in — fix in lyrics BEFORE + +--- + +## 8. Workflow + +1. Write the concept/hook first — what's the emotional core? +2. If adapting, map the original structure (syllables, rhyme, stress) +3. Generate raw material — brainstorm freely before structuring +4. Draft lyrics into the structure +5. Read/sing aloud — catch stumbles, fix meter +6. Build the Suno style description — paint the dynamic journey +7. Add metatags to lyrics for performance direction +8. Generate 3-5 variations minimum — treat them like recording takes +9. Pick the best, use Extend/Continue to build on promising sections +10. If something great happens by accident, keep it + +EXPECT: ~3-5 generations per 1 good result. Revision is normal. +Style can drift in extensions — restate genre/mood when extending. + +--- + +## 9. Lessons Learned + +- Describing the dynamic ARC in the style field matters way more + than just listing genres. "Whisper to roar to whisper" gives + Suno a performance map. +- Keeping some original lines intact in a parody adds recognizability + and emotional weight — the audience feels the ghost of the original. +- The bridge slot in a song is where you can transform imagery. + Swap the original's specific references for your theme's metaphors + while keeping the emotional function (reflection, shift, revelation). +- Monosyllabic word swaps in hooks/tags are the cleanest way to + maintain rhythm while changing meaning. +- A strong vocal persona description in the style field makes a + bigger difference than any single metatag. +- Don't be precious about rules. If a line breaks meter but hits + harder, keep it. The feeling is what matters. Craft serves art, + not the other way around. diff --git a/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md b/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md new file mode 100644 index 000000000..027156ccd --- /dev/null +++ b/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md @@ -0,0 +1,183 @@ +--- +title: "Jupyter Live Kernel — Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb" +sidebar_label: "Jupyter Live Kernel" +description: "Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Jupyter Live Kernel + +Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results — data science, ML experimentation, API exploration, or building up complex code step-by-step. Uses terminal to run CLI commands against a live Jupyter kernel. No new tools required. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/data-science/jupyter-live-kernel` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `jupyter`, `notebook`, `repl`, `data-science`, `exploration`, `iterative` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Jupyter Live Kernel (hamelnb) + +Gives you a **stateful Python REPL** via a live Jupyter kernel. Variables persist +across executions. Use this instead of `execute_code` when you need to build up +state incrementally, explore APIs, inspect DataFrames, or iterate on complex code. + +## When to Use This vs Other Tools + +| Tool | Use When | +|------|----------| +| **This skill** | Iterative exploration, state across steps, data science, ML, "let me try this and check" | +| `execute_code` | One-shot scripts needing hermes tool access (web_search, file ops). Stateless. | +| `terminal` | Shell commands, builds, installs, git, process management | + +**Rule of thumb:** If you'd want a Jupyter notebook for the task, use this skill. + +## Prerequisites + +1. **uv** must be installed (check: `which uv`) +2. **JupyterLab** must be installed: `uv tool install jupyterlab` +3. A Jupyter server must be running (see Setup below) + +## Setup + +The hamelnb script location: +``` +SCRIPT="$HOME/.agent-skills/hamelnb/skills/jupyter-live-kernel/scripts/jupyter_live_kernel.py" +``` + +If not cloned yet: +``` +git clone https://github.com/hamelsmu/hamelnb.git ~/.agent-skills/hamelnb +``` + +### Starting JupyterLab + +Check if a server is already running: +``` +uv run "$SCRIPT" servers +``` + +If no servers found, start one: +``` +jupyter-lab --no-browser --port=8888 --notebook-dir=$HOME/notebooks \ + --IdentityProvider.token='' --ServerApp.password='' > /tmp/jupyter.log 2>&1 & +sleep 3 +``` + +Note: Token/password disabled for local agent access. The server runs headless. + +### Creating a Notebook for REPL Use + +If you just need a REPL (no existing notebook), create a minimal notebook file: +``` +mkdir -p ~/notebooks +``` +Write a minimal .ipynb JSON file with one empty code cell, then start a kernel +session via the Jupyter REST API: +``` +curl -s -X POST http://127.0.0.1:8888/api/sessions \ + -H "Content-Type: application/json" \ + -d '{"path":"scratch.ipynb","type":"notebook","name":"scratch.ipynb","kernel":{"name":"python3"}}' +``` + +## Core Workflow + +All commands return structured JSON. Always use `--compact` to save tokens. + +### 1. Discover servers and notebooks + +``` +uv run "$SCRIPT" servers --compact +uv run "$SCRIPT" notebooks --compact +``` + +### 2. Execute code (primary operation) + +``` +uv run "$SCRIPT" execute --path --code '' --compact +``` + +State persists across execute calls. Variables, imports, objects all survive. + +Multi-line code works with $'...' quoting: +``` +uv run "$SCRIPT" execute --path scratch.ipynb --code $'import os\nfiles = os.listdir(".")\nprint(f"Found {len(files)} files")' --compact +``` + +### 3. Inspect live variables + +``` +uv run "$SCRIPT" variables --path list --compact +uv run "$SCRIPT" variables --path preview --name --compact +``` + +### 4. Edit notebook cells + +``` +# View current cells +uv run "$SCRIPT" contents --path --compact + +# Insert a new cell +uv run "$SCRIPT" edit --path insert \ + --at-index --cell-type code --source '' --compact + +# Replace cell source (use cell-id from contents output) +uv run "$SCRIPT" edit --path replace-source \ + --cell-id --source '' --compact + +# Delete a cell +uv run "$SCRIPT" edit --path delete --cell-id --compact +``` + +### 5. Verification (restart + run all) + +Only use when the user asks for a clean verification or you need to confirm +the notebook runs top-to-bottom: + +``` +uv run "$SCRIPT" restart-run-all --path --save-outputs --compact +``` + +## Practical Tips from Experience + +1. **First execution after server start may timeout** — the kernel needs a moment + to initialize. If you get a timeout, just retry. + +2. **The kernel Python is JupyterLab's Python** — packages must be installed in + that environment. If you need additional packages, install them into the + JupyterLab tool environment first. + +3. **--compact flag saves significant tokens** — always use it. JSON output can + be very verbose without it. + +4. **For pure REPL use**, create a scratch.ipynb and don't bother with cell editing. + Just use `execute` repeatedly. + +5. **Argument order matters** — subcommand flags like `--path` go BEFORE the + sub-subcommand. E.g.: `variables --path nb.ipynb list` not `variables list --path nb.ipynb`. + +6. **If a session doesn't exist yet**, you need to start one via the REST API + (see Setup section). The tool can't execute without a live kernel session. + +7. **Errors are returned as JSON** with traceback — read the `ename` and `evalue` + fields to understand what went wrong. + +8. **Occasional websocket timeouts** — some operations may timeout on first try, + especially after a kernel restart. Retry once before escalating. + +## Timeout Defaults + +The script has a 30-second default timeout per execution. For long-running +operations, pass `--timeout 120`. Use generous timeouts (60+) for initial +setup or heavy computation. diff --git a/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md b/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md new file mode 100644 index 000000000..8b5b8ade8 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md @@ -0,0 +1,221 @@ +--- +title: "Webhook Subscriptions" +sidebar_label: "Webhook Subscriptions" +description: "Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Webhook Subscriptions + +Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/devops/webhook-subscriptions` | +| Version | `1.1.0` | +| Tags | `webhook`, `events`, `automation`, `integrations`, `notifications`, `push` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Webhook Subscriptions + +Create dynamic webhook subscriptions so external services (GitHub, GitLab, Stripe, CI/CD, IoT sensors, monitoring tools) can trigger Hermes agent runs by POSTing events to a URL. + +## Setup (Required First) + +The webhook platform must be enabled before subscriptions can be created. Check with: +```bash +hermes webhook list +``` + +If it says "Webhook platform is not enabled", set it up: + +### Option 1: Setup wizard +```bash +hermes gateway setup +``` +Follow the prompts to enable webhooks, set the port, and set a global HMAC secret. + +### Option 2: Manual config +Add to `~/.hermes/config.yaml`: +```yaml +platforms: + webhook: + enabled: true + extra: + host: "0.0.0.0" + port: 8644 + secret: "generate-a-strong-secret-here" +``` + +### Option 3: Environment variables +Add to `~/.hermes/.env`: +```bash +WEBHOOK_ENABLED=true +WEBHOOK_PORT=8644 +WEBHOOK_SECRET=generate-a-strong-secret-here +``` + +After configuration, start (or restart) the gateway: +```bash +hermes gateway run +# Or if using systemd: +systemctl --user restart hermes-gateway +``` + +Verify it's running: +```bash +curl http://localhost:8644/health +``` + +## Commands + +All management is via the `hermes webhook` CLI command: + +### Create a subscription +```bash +hermes webhook subscribe \ + --prompt "Prompt template with {payload.fields}" \ + --events "event1,event2" \ + --description "What this does" \ + --skills "skill1,skill2" \ + --deliver telegram \ + --deliver-chat-id "12345" \ + --secret "optional-custom-secret" +``` + +Returns the webhook URL and HMAC secret. The user configures their service to POST to that URL. + +### List subscriptions +```bash +hermes webhook list +``` + +### Remove a subscription +```bash +hermes webhook remove +``` + +### Test a subscription +```bash +hermes webhook test +hermes webhook test --payload '{"key": "value"}' +``` + +## Prompt Templates + +Prompts support `{dot.notation}` for accessing nested payload fields: + +- `{issue.title}` — GitHub issue title +- `{pull_request.user.login}` — PR author +- `{data.object.amount}` — Stripe payment amount +- `{sensor.temperature}` — IoT sensor reading + +If no prompt is specified, the full JSON payload is dumped into the agent prompt. + +## Common Patterns + +### GitHub: new issues +```bash +hermes webhook subscribe github-issues \ + --events "issues" \ + --prompt "New GitHub issue #{issue.number}: {issue.title}\n\nAction: {action}\nAuthor: {issue.user.login}\nBody:\n{issue.body}\n\nPlease triage this issue." \ + --deliver telegram \ + --deliver-chat-id "-100123456789" +``` + +Then in GitHub repo Settings → Webhooks → Add webhook: +- Payload URL: the returned webhook_url +- Content type: application/json +- Secret: the returned secret +- Events: "Issues" + +### GitHub: PR reviews +```bash +hermes webhook subscribe github-prs \ + --events "pull_request" \ + --prompt "PR #{pull_request.number} {action}: {pull_request.title}\nBy: {pull_request.user.login}\nBranch: {pull_request.head.ref}\n\n{pull_request.body}" \ + --skills "github-code-review" \ + --deliver github_comment +``` + +### Stripe: payment events +```bash +hermes webhook subscribe stripe-payments \ + --events "payment_intent.succeeded,payment_intent.payment_failed" \ + --prompt "Payment {data.object.status}: {data.object.amount} cents from {data.object.receipt_email}" \ + --deliver telegram \ + --deliver-chat-id "-100123456789" +``` + +### CI/CD: build notifications +```bash +hermes webhook subscribe ci-builds \ + --events "pipeline" \ + --prompt "Build {object_attributes.status} on {project.name} branch {object_attributes.ref}\nCommit: {commit.message}" \ + --deliver discord \ + --deliver-chat-id "1234567890" +``` + +### Generic monitoring alert +```bash +hermes webhook subscribe alerts \ + --prompt "Alert: {alert.name}\nSeverity: {alert.severity}\nMessage: {alert.message}\n\nPlease investigate and suggest remediation." \ + --deliver origin +``` + +### Direct delivery (no agent, zero LLM cost) + +For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter. + +Use this for: +- External service push notifications (Supabase/Firebase webhooks → Telegram) +- Monitoring alerts that should forward verbatim +- Inter-agent pings where one agent is telling another agent's user something +- Any webhook where an LLM round trip would be wasted effort + +```bash +hermes webhook subscribe antenna-matches \ + --deliver telegram \ + --deliver-chat-id "123456789" \ + --deliver-only \ + --prompt "🎉 New match: {match.user_name} matched with you!" \ + --description "Antenna match notifications" +``` + +The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply. + +Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless. + +## Security + +- Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`) +- The webhook adapter validates signatures on every incoming POST +- Static routes from config.yaml cannot be overwritten by dynamic subscriptions +- Subscriptions persist to `~/.hermes/webhook_subscriptions.json` + +## How It Works + +1. `hermes webhook subscribe` writes to `~/.hermes/webhook_subscriptions.json` +2. The webhook adapter hot-reloads this file on each incoming request (mtime-gated, negligible overhead) +3. When a POST arrives matching a route, the adapter formats the prompt and triggers an agent run +4. The agent's response is delivered to the configured target (Telegram, Discord, GitHub comment, etc.) + +## Troubleshooting + +If webhooks aren't working: + +1. **Is the gateway running?** Check with `systemctl --user status hermes-gateway` or `ps aux | grep gateway` +2. **Is the webhook server listening?** `curl http://localhost:8644/health` should return `{"status": "ok"}` +3. **Check gateway logs:** `grep webhook ~/.hermes/logs/gateway.log | tail -20` +4. **Signature mismatch?** Verify the secret in your service matches the one from `hermes webhook list`. GitHub sends `X-Hub-Signature-256`, GitLab sends `X-Gitlab-Token`. +5. **Firewall/NAT?** The webhook URL must be reachable from the service. For local development, use a tunnel (ngrok, cloudflared). +6. **Wrong event type?** Check `--events` filter matches what the service sends. Use `hermes webhook test ` to verify the route works. diff --git a/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md b/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md new file mode 100644 index 000000000..0ff7e72d9 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md @@ -0,0 +1,178 @@ +--- +title: "Dogfood" +sidebar_label: "Dogfood" +description: "Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Dogfood + +Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/dogfood` | +| Version | `1.0.0` | +| Tags | `qa`, `testing`, `browser`, `web`, `dogfood` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Dogfood: Systematic Web Application QA Testing + +## Overview + +This skill guides you through systematic exploratory QA testing of web applications using the browser toolset. You will navigate the application, interact with elements, capture evidence of issues, and produce a structured bug report. + +## Prerequisites + +- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`) +- A target URL and testing scope from the user + +## Inputs + +The user provides: +1. **Target URL** — the entry point for testing +2. **Scope** — what areas/features to focus on (or "full site" for comprehensive testing) +3. **Output directory** (optional) — where to save screenshots and the report (default: `./dogfood-output`) + +## Workflow + +Follow this 5-phase systematic workflow: + +### Phase 1: Plan + +1. Create the output directory structure: + ``` + {output_dir}/ + ├── screenshots/ # Evidence screenshots + └── report.md # Final report (generated in Phase 5) + ``` +2. Identify the testing scope based on user input. +3. Build a rough sitemap by planning which pages and features to test: + - Landing/home page + - Navigation links (header, footer, sidebar) + - Key user flows (sign up, login, search, checkout, etc.) + - Forms and interactive elements + - Edge cases (empty states, error pages, 404s) + +### Phase 2: Explore + +For each page or feature in your plan: + +1. **Navigate** to the page: + ``` + browser_navigate(url="https://example.com/page") + ``` + +2. **Take a snapshot** to understand the DOM structure: + ``` + browser_snapshot() + ``` + +3. **Check the console** for JavaScript errors: + ``` + browser_console(clear=true) + ``` + Do this after every navigation and after every significant interaction. Silent JS errors are high-value findings. + +4. **Take an annotated screenshot** to visually assess the page and identify interactive elements: + ``` + browser_vision(question="Describe the page layout, identify any visual issues, broken elements, or accessibility concerns", annotate=true) + ``` + The `annotate=true` flag overlays numbered `[N]` labels on interactive elements. Each `[N]` maps to ref `@eN` for subsequent browser commands. + +5. **Test interactive elements** systematically: + - Click buttons and links: `browser_click(ref="@eN")` + - Fill forms: `browser_type(ref="@eN", text="test input")` + - Test keyboard navigation: `browser_press(key="Tab")`, `browser_press(key="Enter")` + - Scroll through content: `browser_scroll(direction="down")` + - Test form validation with invalid inputs + - Test empty submissions + +6. **After each interaction**, check for: + - Console errors: `browser_console()` + - Visual changes: `browser_vision(question="What changed after the interaction?")` + - Expected vs actual behavior + +### Phase 3: Collect Evidence + +For every issue found: + +1. **Take a screenshot** showing the issue: + ``` + browser_vision(question="Capture and describe the issue visible on this page", annotate=false) + ``` + Save the `screenshot_path` from the response — you will reference it in the report. + +2. **Record the details**: + - URL where the issue occurs + - Steps to reproduce + - Expected behavior + - Actual behavior + - Console errors (if any) + - Screenshot path + +3. **Classify the issue** using the issue taxonomy (see `references/issue-taxonomy.md`): + - Severity: Critical / High / Medium / Low + - Category: Functional / Visual / Accessibility / Console / UX / Content + +### Phase 4: Categorize + +1. Review all collected issues. +2. De-duplicate — merge issues that are the same bug manifesting in different places. +3. Assign final severity and category to each issue. +4. Sort by severity (Critical first, then High, Medium, Low). +5. Count issues by severity and category for the executive summary. + +### Phase 5: Report + +Generate the final report using the template at `templates/dogfood-report-template.md`. + +The report must include: +1. **Executive summary** with total issue count, breakdown by severity, and testing scope +2. **Per-issue sections** with: + - Issue number and title + - Severity and category badges + - URL where observed + - Description of the issue + - Steps to reproduce + - Expected vs actual behavior + - Screenshot references (use `MEDIA:` for inline images) + - Console errors if relevant +3. **Summary table** of all issues +4. **Testing notes** — what was tested, what was not, any blockers + +Save the report to `{output_dir}/report.md`. + +## Tools Reference + +| Tool | Purpose | +|------|---------| +| `browser_navigate` | Go to a URL | +| `browser_snapshot` | Get DOM text snapshot (accessibility tree) | +| `browser_click` | Click an element by ref (`@eN`) or text | +| `browser_type` | Type into an input field | +| `browser_scroll` | Scroll up/down on the page | +| `browser_back` | Go back in browser history | +| `browser_press` | Press a keyboard key | +| `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels | +| `browser_console` | Get JS console output and errors | + +## Tips + +- **Always check `browser_console()` after navigating and after significant interactions.** Silent JS errors are among the most valuable findings. +- **Use `annotate=true` with `browser_vision`** when you need to reason about interactive element positions or when the snapshot refs are unclear. +- **Test with both valid and invalid inputs** — form validation bugs are common. +- **Scroll through long pages** — content below the fold may have rendering issues. +- **Test navigation flows** — click through multi-step processes end-to-end. +- **Check responsive behavior** by noting any layout issues visible in screenshots. +- **Don't forget edge cases**: empty states, very long text, special characters, rapid clicking. +- When reporting screenshots to the user, include `MEDIA:` so they can see the evidence inline. diff --git a/website/docs/user-guide/skills/bundled/email/email-himalaya.md b/website/docs/user-guide/skills/bundled/email/email-himalaya.md new file mode 100644 index 000000000..55178bdc9 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/email/email-himalaya.md @@ -0,0 +1,293 @@ +--- +title: "Himalaya — CLI to manage emails via IMAP/SMTP" +sidebar_label: "Himalaya" +description: "CLI to manage emails via IMAP/SMTP" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Himalaya + +CLI to manage emails via IMAP/SMTP. Use himalaya to list, read, write, reply, forward, search, and organize emails from the terminal. Supports multiple accounts and message composition with MML (MIME Meta Language). + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/email/himalaya` | +| Version | `1.0.0` | +| Author | community | +| License | MIT | +| Tags | `Email`, `IMAP`, `SMTP`, `CLI`, `Communication` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Himalaya Email CLI + +Himalaya is a CLI email client that lets you manage emails from the terminal using IMAP, SMTP, Notmuch, or Sendmail backends. + +## References + +- `references/configuration.md` (config file setup + IMAP/SMTP authentication) +- `references/message-composition.md` (MML syntax for composing emails) + +## Prerequisites + +1. Himalaya CLI installed (`himalaya --version` to verify) +2. A configuration file at `~/.config/himalaya/config.toml` +3. IMAP/SMTP credentials configured (password stored securely) + +### Installation + +```bash +# Pre-built binary (Linux/macOS — recommended) +curl -sSL https://raw.githubusercontent.com/pimalaya/himalaya/master/install.sh | PREFIX=~/.local sh + +# macOS via Homebrew +brew install himalaya + +# Or via cargo (any platform with Rust) +cargo install himalaya --locked +``` + +## Configuration Setup + +Run the interactive wizard to set up an account: + +```bash +himalaya account configure +``` + +Or create `~/.config/himalaya/config.toml` manually: + +```toml +[accounts.personal] +email = "you@example.com" +display-name = "Your Name" +default = true + +backend.type = "imap" +backend.host = "imap.example.com" +backend.port = 993 +backend.encryption.type = "tls" +backend.login = "you@example.com" +backend.auth.type = "password" +backend.auth.cmd = "pass show email/imap" # or use keyring + +message.send.backend.type = "smtp" +message.send.backend.host = "smtp.example.com" +message.send.backend.port = 587 +message.send.backend.encryption.type = "start-tls" +message.send.backend.login = "you@example.com" +message.send.backend.auth.type = "password" +message.send.backend.auth.cmd = "pass show email/smtp" +``` + +## Hermes Integration Notes + +- **Reading, listing, searching, moving, deleting** all work directly through the terminal tool +- **Composing/replying/forwarding** — piped input (`cat << EOF | himalaya template send`) is recommended for reliability. Interactive `$EDITOR` mode works with `pty=true` + background + process tool, but requires knowing the editor and its commands +- Use `--output json` for structured output that's easier to parse programmatically +- The `himalaya account configure` wizard requires interactive input — use PTY mode: `terminal(command="himalaya account configure", pty=true)` + +## Common Operations + +### List Folders + +```bash +himalaya folder list +``` + +### List Emails + +List emails in INBOX (default): + +```bash +himalaya envelope list +``` + +List emails in a specific folder: + +```bash +himalaya envelope list --folder "Sent" +``` + +List with pagination: + +```bash +himalaya envelope list --page 1 --page-size 20 +``` + +### Search Emails + +```bash +himalaya envelope list from john@example.com subject meeting +``` + +### Read an Email + +Read email by ID (shows plain text): + +```bash +himalaya message read 42 +``` + +Export raw MIME: + +```bash +himalaya message export 42 --full +``` + +### Reply to an Email + +To reply non-interactively from Hermes, read the original message, compose a reply, and pipe it: + +```bash +# Get the reply template, edit it, and send +himalaya template reply 42 | sed 's/^$/\nYour reply text here\n/' | himalaya template send +``` + +Or build the reply manually: + +```bash +cat << 'EOF' | himalaya template send +From: you@example.com +To: sender@example.com +Subject: Re: Original Subject +In-Reply-To: + +Your reply here. +EOF +``` + +Reply-all (interactive — needs $EDITOR, use template approach above instead): + +```bash +himalaya message reply 42 --all +``` + +### Forward an Email + +```bash +# Get forward template and pipe with modifications +himalaya template forward 42 | sed 's/^To:.*/To: newrecipient@example.com/' | himalaya template send +``` + +### Write a New Email + +**Non-interactive (use this from Hermes)** — pipe the message via stdin: + +```bash +cat << 'EOF' | himalaya template send +From: you@example.com +To: recipient@example.com +Subject: Test Message + +Hello from Himalaya! +EOF +``` + +Or with headers flag: + +```bash +himalaya message write -H "To:recipient@example.com" -H "Subject:Test" "Message body here" +``` + +Note: `himalaya message write` without piped input opens `$EDITOR`. This works with `pty=true` + background mode, but piping is simpler and more reliable. + +### Move/Copy Emails + +Move to folder: + +```bash +himalaya message move 42 "Archive" +``` + +Copy to folder: + +```bash +himalaya message copy 42 "Important" +``` + +### Delete an Email + +```bash +himalaya message delete 42 +``` + +### Manage Flags + +Add flag: + +```bash +himalaya flag add 42 --flag seen +``` + +Remove flag: + +```bash +himalaya flag remove 42 --flag seen +``` + +## Multiple Accounts + +List accounts: + +```bash +himalaya account list +``` + +Use a specific account: + +```bash +himalaya --account work envelope list +``` + +## Attachments + +Save attachments from a message: + +```bash +himalaya attachment download 42 +``` + +Save to specific directory: + +```bash +himalaya attachment download 42 --dir ~/Downloads +``` + +## Output Formats + +Most commands support `--output` for structured output: + +```bash +himalaya envelope list --output json +himalaya envelope list --output plain +``` + +## Debugging + +Enable debug logging: + +```bash +RUST_LOG=debug himalaya envelope list +``` + +Full trace with backtrace: + +```bash +RUST_LOG=trace RUST_BACKTRACE=1 himalaya envelope list +``` + +## Tips + +- Use `himalaya --help` or `himalaya --help` for detailed usage. +- Message IDs are relative to the current folder; re-list after folder changes. +- For composing rich emails with attachments, use MML syntax (see `references/message-composition.md`). +- Store passwords securely using `pass`, system keyring, or a command that outputs the password. diff --git a/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md b/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md new file mode 100644 index 000000000..d85495a18 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md @@ -0,0 +1,205 @@ +--- +title: "Minecraft Modpack Server — Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip" +sidebar_label: "Minecraft Modpack Server" +description: "Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Minecraft Modpack Server + +Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip. Covers NeoForge/Forge install, Java version, JVM tuning, firewall, LAN config, backups, and launch scripts. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/gaming/minecraft-modpack-server` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Minecraft Modpack Server Setup + +## When to use +- User wants to set up a modded Minecraft server from a server pack zip +- User needs help with NeoForge/Forge server configuration +- User asks about Minecraft server performance tuning or backups + +## Gather User Preferences First +Before starting setup, ask the user for: +- **Server name / MOTD** — what should it say in the server list? +- **Seed** — specific seed or random? +- **Difficulty** — peaceful / easy / normal / hard? +- **Gamemode** — survival / creative / adventure? +- **Online mode** — true (Mojang auth, legit accounts) or false (LAN/cracked friendly)? +- **Player count** — how many players expected? (affects RAM & view distance tuning) +- **RAM allocation** — or let agent decide based on mod count & available RAM? +- **View distance / simulation distance** — or let agent pick based on player count & hardware? +- **PvP** — on or off? +- **Whitelist** — open server or whitelist only? +- **Backups** — want automated backups? How often? + +Use sensible defaults if the user doesn't care, but always ask before generating the config. + +## Steps + +### 1. Download & Inspect the Pack +```bash +mkdir -p ~/minecraft-server +cd ~/minecraft-server +wget -O serverpack.zip "" +unzip -o serverpack.zip -d server +ls server/ +``` +Look for: `startserver.sh`, installer jar (neoforge/forge), `user_jvm_args.txt`, `mods/` folder. +Check the script to determine: mod loader type, version, and required Java version. + +### 2. Install Java +- Minecraft 1.21+ → Java 21: `sudo apt install openjdk-21-jre-headless` +- Minecraft 1.18-1.20 → Java 17: `sudo apt install openjdk-17-jre-headless` +- Minecraft 1.16 and below → Java 8: `sudo apt install openjdk-8-jre-headless` +- Verify: `java -version` + +### 3. Install the Mod Loader +Most server packs include an install script. Use the INSTALL_ONLY env var to install without launching: +```bash +cd ~/minecraft-server/server +ATM10_INSTALL_ONLY=true bash startserver.sh +# Or for generic Forge packs: +# java -jar forge-*-installer.jar --installServer +``` +This downloads libraries, patches the server jar, etc. + +### 4. Accept EULA +```bash +echo "eula=true" > ~/minecraft-server/server/eula.txt +``` + +### 5. Configure server.properties +Key settings for modded/LAN: +```properties +motd=\u00a7b\u00a7lServer Name \u00a7r\u00a78| \u00a7aModpack Name +server-port=25565 +online-mode=true # false for LAN without Mojang auth +enforce-secure-profile=true # match online-mode +difficulty=hard # most modpacks balance around hard +allow-flight=true # REQUIRED for modded (flying mounts/items) +spawn-protection=0 # let everyone build at spawn +max-tick-time=180000 # modded needs longer tick timeout +enable-command-block=true +``` + +Performance settings (scale to hardware): +```properties +# 2 players, beefy machine: +view-distance=16 +simulation-distance=10 + +# 4-6 players, moderate machine: +view-distance=10 +simulation-distance=6 + +# 8+ players or weaker hardware: +view-distance=8 +simulation-distance=4 +``` + +### 6. Tune JVM Args (user_jvm_args.txt) +Scale RAM to player count and mod count. Rule of thumb for modded: +- 100-200 mods: 6-12GB +- 200-350+ mods: 12-24GB +- Leave at least 8GB free for the OS/other tasks + +``` +-Xms12G +-Xmx24G +-XX:+UseG1GC +-XX:+ParallelRefProcEnabled +-XX:MaxGCPauseMillis=200 +-XX:+UnlockExperimentalVMOptions +-XX:+DisableExplicitGC +-XX:+AlwaysPreTouch +-XX:G1NewSizePercent=30 +-XX:G1MaxNewSizePercent=40 +-XX:G1HeapRegionSize=8M +-XX:G1ReservePercent=20 +-XX:G1HeapWastePercent=5 +-XX:G1MixedGCCountTarget=4 +-XX:InitiatingHeapOccupancyPercent=15 +-XX:G1MixedGCLiveThresholdPercent=90 +-XX:G1RSetUpdatingPauseTimePercent=5 +-XX:SurvivorRatio=32 +-XX:+PerfDisableSharedMem +-XX:MaxTenuringThreshold=1 +``` + +### 7. Open Firewall +```bash +sudo ufw allow 25565/tcp comment "Minecraft Server" +``` +Check with: `sudo ufw status | grep 25565` + +### 8. Create Launch Script +```bash +cat > ~/start-minecraft.sh << 'EOF' +#!/bin/bash +cd ~/minecraft-server/server +java @user_jvm_args.txt @libraries/net/neoforged/neoforge//unix_args.txt nogui +EOF +chmod +x ~/start-minecraft.sh +``` +Note: For Forge (not NeoForge), the args file path differs. Check `startserver.sh` for the exact path. + +### 9. Set Up Automated Backups +Create backup script: +```bash +cat > ~/minecraft-server/backup.sh << 'SCRIPT' +#!/bin/bash +SERVER_DIR="$HOME/minecraft-server/server" +BACKUP_DIR="$HOME/minecraft-server/backups" +WORLD_DIR="$SERVER_DIR/world" +MAX_BACKUPS=24 +mkdir -p "$BACKUP_DIR" +[ ! -d "$WORLD_DIR" ] && echo "[BACKUP] No world folder" && exit 0 +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +BACKUP_FILE="$BACKUP_DIR/world_${TIMESTAMP}.tar.gz" +echo "[BACKUP] Starting at $(date)" +tar -czf "$BACKUP_FILE" -C "$SERVER_DIR" world +SIZE=$(du -h "$BACKUP_FILE" | cut -f1) +echo "[BACKUP] Saved: $BACKUP_FILE ($SIZE)" +BACKUP_COUNT=$(ls -1t "$BACKUP_DIR"/world_*.tar.gz 2>/dev/null | wc -l) +if [ "$BACKUP_COUNT" -gt "$MAX_BACKUPS" ]; then + REMOVE=$((BACKUP_COUNT - MAX_BACKUPS)) + ls -1t "$BACKUP_DIR"/world_*.tar.gz | tail -n "$REMOVE" | xargs rm -f + echo "[BACKUP] Pruned $REMOVE old backup(s)" +fi +echo "[BACKUP] Done at $(date)" +SCRIPT +chmod +x ~/minecraft-server/backup.sh +``` + +Add hourly cron: +```bash +(crontab -l 2>/dev/null | grep -v "minecraft/backup.sh"; echo "0 * * * * $HOME/minecraft-server/backup.sh >> $HOME/minecraft-server/backups/backup.log 2>&1") | crontab - +``` + +## Pitfalls +- ALWAYS set `allow-flight=true` for modded — mods with jetpacks/flight will kick players otherwise +- `max-tick-time=180000` or higher — modded servers often have long ticks during worldgen +- First startup is SLOW (several minutes for big packs) — don't panic +- "Can't keep up!" warnings on first launch are normal, settles after initial chunk gen +- If online-mode=false, set enforce-secure-profile=false too or clients get rejected +- The pack's startserver.sh often has an auto-restart loop — make a clean launch script without it +- Delete the world/ folder to regenerate with a new seed +- Some packs have env vars to control behavior (e.g., ATM10 uses ATM10_JAVA, ATM10_RESTART, ATM10_INSTALL_ONLY) + +## Verification +- `pgrep -fa neoforge` or `pgrep -fa minecraft` to check if running +- Check logs: `tail -f ~/minecraft-server/server/logs/latest.log` +- Look for "Done (Xs)!" in the log = server is ready +- Test connection: player adds server IP in Multiplayer diff --git a/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md b/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md new file mode 100644 index 000000000..ab070f867 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md @@ -0,0 +1,235 @@ +--- +title: "Pokemon Player — Play Pokemon games autonomously via headless emulation" +sidebar_label: "Pokemon Player" +description: "Play Pokemon games autonomously via headless emulation" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Pokemon Player + +Play Pokemon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/gaming/pokemon-player` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Pokemon Player + +Play Pokemon games via headless emulation using the `pokemon-agent` package. + +## When to Use +- User says "play pokemon", "start pokemon", "pokemon game" +- User asks about Pokemon Red, Blue, Yellow, FireRed, etc. +- User wants to watch an AI play Pokemon +- User references a ROM file (.gb, .gbc, .gba) + +## Startup Procedure + +### 1. First-time setup (clone, venv, install) +The repo is NousResearch/pokemon-agent on GitHub. Clone it, then +set up a Python 3.10+ virtual environment. Use uv (preferred for speed) +to create the venv and install the package in editable mode with the +pyboy extra. If uv is not available, fall back to python3 -m venv + pip. + +On this machine it is already set up at /home/teknium/pokemon-agent +with a venv ready — just cd there and source .venv/bin/activate. + +You also need a ROM file. Ask the user for theirs. On this machine +one exists at roms/pokemon_red.gb inside that directory. +NEVER download or provide ROM files — always ask the user. + +### 2. Start the game server +From inside the pokemon-agent directory with the venv activated, run +pokemon-agent serve with --rom pointing to the ROM and --port 9876. +Run it in the background with &. +To resume from a saved game, add --load-state with the save name. +Wait 4 seconds for startup, then verify with GET /health. + +### 3. Set up live dashboard for user to watch +Use an SSH reverse tunnel via localhost.run so the user can view +the dashboard in their browser. Connect with ssh, forwarding local +port 9876 to remote port 80 on nokey@localhost.run. Redirect output +to a log file, wait 10 seconds, then grep the log for the .lhr.life +URL. Give the user the URL with /dashboard/ appended. +The tunnel URL changes each time — give the user the new one if restarted. + +## Save and Load + +### When to save +- Every 15-20 turns of gameplay +- ALWAYS before gym battles, rival encounters, or risky fights +- Before entering a new town or dungeon +- Before any action you are unsure about + +### How to save +POST /save with a descriptive name. Good examples: +before_brock, route1_start, mt_moon_entrance, got_cut + +### How to load +POST /load with the save name. + +### List available saves +GET /saves returns all saved states. + +### Loading on server startup +Use --load-state flag when starting the server to auto-load a save. +This is faster than loading via the API after startup. + +## The Gameplay Loop + +### Step 1: OBSERVE — check state AND take a screenshot +GET /state for position, HP, battle, dialog. +GET /screenshot and save to /tmp/pokemon.png, then use vision_analyze. +Always do BOTH — RAM state gives numbers, vision gives spatial awareness. + +### Step 2: ORIENT +- Dialog/text on screen → advance it +- In battle → fight or run +- Party hurt → head to Pokemon Center +- Near objective → navigate carefully + +### Step 3: DECIDE +Priority: dialog > battle > heal > story objective > training > explore + +### Step 4: ACT — move 2-4 steps max, then re-check +POST /action with a SHORT action list (2-4 actions, not 10-15). + +### Step 5: VERIFY — screenshot after every move sequence +Take a screenshot and use vision_analyze to confirm you moved where +intended. This is the MOST IMPORTANT step. Without vision you WILL get lost. + +### Step 6: RECORD progress to memory with PKM: prefix + +### Step 7: SAVE periodically + +## Action Reference +- press_a — confirm, talk, select +- press_b — cancel, close menu +- press_start — open game menu +- walk_up/down/left/right — move one tile +- hold_b_N — hold B for N frames (use for speeding through text) +- wait_60 — wait about 1 second (60 frames) +- a_until_dialog_end — press A repeatedly until dialog clears + +## Critical Tips from Experience + +### USE VISION CONSTANTLY +- Take a screenshot every 2-4 movement steps +- The RAM state tells you position and HP but NOT what is around you +- Ledges, fences, signs, building doors, NPCs — only visible via screenshot +- Ask the vision model specific questions: "what is one tile north of me?" +- When stuck, always screenshot before trying random directions + +### Warp Transitions Need Extra Wait Time +When walking through a door or stairs, the screen fades to black during +the map transition. You MUST wait for it to complete. Add 2-3 wait_60 +actions after any door/stair warp. Without waiting, the position reads +as stale and you will think you are still in the old map. + +### Building Exit Trap +When you exit a building, you appear directly IN FRONT of the door. +If you walk north, you go right back inside. ALWAYS sidestep first +by walking left or right 2 tiles, then proceed in your intended direction. + +### Dialog Handling +Gen 1 text scrolls slowly letter-by-letter. To speed through dialog, +hold B for 120 frames then press A. Repeat as needed. Holding B makes +text display at max speed. Then press A to advance to the next line. +The a_until_dialog_end action checks the RAM dialog flag, but this flag +does not catch ALL text states. If dialog seems stuck, use the manual +hold_b + press_a pattern instead and verify via screenshot. + +### Ledges Are One-Way +Ledges (small cliff edges) can only be jumped DOWN (south), never climbed +UP (north). If blocked by a ledge going north, you must go left or right +to find the gap around it. Use vision to identify which direction the +gap is. Ask the vision model explicitly. + +### Navigation Strategy +- Move 2-4 steps at a time, then screenshot to check position +- When entering a new area, screenshot immediately to orient +- Ask the vision model "which direction to [destination]?" +- If stuck for 3+ attempts, screenshot and re-evaluate completely +- Do not spam 10-15 movements — you will overshoot or get stuck + +### Running from Wild Battles +On the battle menu, RUN is bottom-right. To reach it from the default +cursor position (FIGHT, top-left): press down then right to move cursor +to RUN, then press A. Wrap with hold_b to speed through text/animations. + +### Battling (FIGHT) +On the battle menu FIGHT is top-left (default cursor position). +Press A to enter move selection, A again to use the first move. +Then hold B to speed through attack animations and text. + +## Battle Strategy + +### Decision Tree +1. Want to catch? → Weaken then throw Poke Ball +2. Wild you don't need? → RUN +3. Type advantage? → Use super-effective move +4. No advantage? → Use strongest STAB move +5. Low HP? → Switch or use Potion + +### Gen 1 Type Chart (key matchups) +- Water beats Fire, Ground, Rock +- Fire beats Grass, Bug, Ice +- Grass beats Water, Ground, Rock +- Electric beats Water, Flying +- Ground beats Fire, Electric, Rock, Poison +- Psychic beats Fighting, Poison (dominant in Gen 1!) + +### Gen 1 Quirks +- Special stat = both offense AND defense for special moves +- Psychic type is overpowered (Ghost moves bugged) +- Critical hits based on Speed stat +- Wrap/Bind prevent opponent from acting +- Focus Energy bug: REDUCES crit rate instead of raising it + +## Memory Conventions +| Prefix | Purpose | Example | +|--------|---------|---------| +| PKM:OBJECTIVE | Current goal | Get Parcel from Viridian Mart | +| PKM:MAP | Navigation knowledge | Viridian: mart is northeast | +| PKM:STRATEGY | Battle/team plans | Need Grass type before Misty | +| PKM:PROGRESS | Milestone tracker | Beat rival, heading to Viridian | +| PKM:STUCK | Stuck situations | Ledge at y=28 go right to bypass | +| PKM:TEAM | Team notes | Squirtle Lv6, Tackle + Tail Whip | + +## Progression Milestones +- Choose starter +- Deliver Parcel from Viridian Mart, receive Pokedex +- Boulder Badge — Brock (Rock) → use Water/Grass +- Cascade Badge — Misty (Water) → use Grass/Electric +- Thunder Badge — Lt. Surge (Electric) → use Ground +- Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying +- Soul Badge — Koga (Poison) → use Ground/Psychic +- Marsh Badge — Sabrina (Psychic) → hardest gym +- Volcano Badge — Blaine (Fire) → use Water/Ground +- Earth Badge — Giovanni (Ground) → use Water/Grass/Ice +- Elite Four → Champion! + +## Stopping Play +1. Save the game with a descriptive name via POST /save +2. Update memory with PKM:PROGRESS +3. Tell user: "Game saved as [name]! Say 'play pokemon' to resume." +4. Kill the server and tunnel background processes + +## Pitfalls +- NEVER download or provide ROM files +- Do NOT send more than 4-5 actions without checking vision +- Always sidestep after exiting buildings before going north +- Always add wait_60 x2-3 after door/stair warps +- Dialog detection via RAM is unreliable — verify with screenshots +- Save BEFORE risky encounters +- The tunnel URL changes each time you restart it diff --git a/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md new file mode 100644 index 000000000..13c3fe442 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md @@ -0,0 +1,131 @@ +--- +title: "Codebase Inspection" +sidebar_label: "Codebase Inspection" +description: "Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Codebase Inspection + +Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios. Use when asked to check lines of code, repo size, language composition, or codebase stats. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/codebase-inspection` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `LOC`, `Code Analysis`, `pygount`, `Codebase`, `Metrics`, `Repository` | +| Related skills | [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Codebase Inspection with pygount + +Analyze repositories for lines of code, language breakdown, file counts, and code-vs-comment ratios using `pygount`. + +## When to Use + +- User asks for LOC (lines of code) count +- User wants a language breakdown of a repo +- User asks about codebase size or composition +- User wants code-vs-comment ratios +- General "how big is this repo" questions + +## Prerequisites + +```bash +pip install --break-system-packages pygount 2>/dev/null || pip install pygount +``` + +## 1. Basic Summary (Most Common) + +Get a full language breakdown with file counts, code lines, and comment lines: + +```bash +cd /path/to/repo +pygount --format=summary \ + --folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,.next,.tox,.eggs,*.egg-info" \ + . +``` + +**IMPORTANT:** Always use `--folders-to-skip` to exclude dependency/build directories, otherwise pygount will crawl them and take a very long time or hang. + +## 2. Common Folder Exclusions + +Adjust based on the project type: + +```bash +# Python projects +--folders-to-skip=".git,venv,.venv,__pycache__,.cache,dist,build,.tox,.eggs,.mypy_cache" + +# JavaScript/TypeScript projects +--folders-to-skip=".git,node_modules,dist,build,.next,.cache,.turbo,coverage" + +# General catch-all +--folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,.next,.tox,vendor,third_party" +``` + +## 3. Filter by Specific Language + +```bash +# Only count Python files +pygount --suffix=py --format=summary . + +# Only count Python and YAML +pygount --suffix=py,yaml,yml --format=summary . +``` + +## 4. Detailed File-by-File Output + +```bash +# Default format shows per-file breakdown +pygount --folders-to-skip=".git,node_modules,venv" . + +# Sort by code lines (pipe through sort) +pygount --folders-to-skip=".git,node_modules,venv" . | sort -t$'\t' -k1 -nr | head -20 +``` + +## 5. Output Formats + +```bash +# Summary table (default recommendation) +pygount --format=summary . + +# JSON output for programmatic use +pygount --format=json . + +# Pipe-friendly: Language, file count, code, docs, empty, string +pygount --format=summary . 2>/dev/null +``` + +## 6. Interpreting Results + +The summary table columns: +- **Language** — detected programming language +- **Files** — number of files of that language +- **Code** — lines of actual code (executable/declarative) +- **Comment** — lines that are comments or documentation +- **%** — percentage of total + +Special pseudo-languages: +- `__empty__` — empty files +- `__binary__` — binary files (images, compiled, etc.) +- `__generated__` — auto-generated files (detected heuristically) +- `__duplicate__` — files with identical content +- `__unknown__` — unrecognized file types + +## Pitfalls + +1. **Always exclude .git, node_modules, venv** — without `--folders-to-skip`, pygount will crawl everything and may take minutes or hang on large dependency trees. +2. **Markdown shows 0 code lines** — pygount classifies all Markdown content as comments, not code. This is expected behavior. +3. **JSON files show low code counts** — pygount may count JSON lines conservatively. For accurate JSON line counts, use `wc -l` directly. +4. **Large monorepos** — for very large repos, consider using `--suffix` to target specific languages rather than scanning everything. diff --git a/website/docs/user-guide/skills/bundled/github/github-github-auth.md b/website/docs/user-guide/skills/bundled/github/github-github-auth.md new file mode 100644 index 000000000..4f7360c43 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-auth.md @@ -0,0 +1,264 @@ +--- +title: "Github Auth — Set up GitHub authentication for the agent using git (universally available) or the gh CLI" +sidebar_label: "Github Auth" +description: "Set up GitHub authentication for the agent using git (universally available) or the gh CLI" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Auth + +Set up GitHub authentication for the agent using git (universally available) or the gh CLI. Covers HTTPS tokens, SSH keys, credential helpers, and gh auth — with a detection flow to pick the right method automatically. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-auth` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Authentication`, `Git`, `gh-cli`, `SSH`, `Setup` | +| Related skills | [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues), [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Authentication Setup + +This skill sets up authentication so the agent can work with GitHub repositories, PRs, issues, and CI. It covers two paths: + +- **`git` (always available)** — uses HTTPS personal access tokens or SSH keys +- **`gh` CLI (if installed)** — richer GitHub API access with a simpler auth flow + +## Detection Flow + +When a user asks you to work with GitHub, run this check first: + +```bash +# Check what's available +git --version +gh --version 2>/dev/null || echo "gh not installed" + +# Check if already authenticated +gh auth status 2>/dev/null || echo "gh not authenticated" +git config --global credential.helper 2>/dev/null || echo "no git credential helper" +``` + +**Decision tree:** +1. If `gh auth status` shows authenticated → you're good, use `gh` for everything +2. If `gh` is installed but not authenticated → use "gh auth" method below +3. If `gh` is not installed → use "git-only" method below (no sudo needed) + +--- + +## Method 1: Git-Only Authentication (No gh, No sudo) + +This works on any machine with `git` installed. No root access needed. + +### Option A: HTTPS with Personal Access Token (Recommended) + +This is the most portable method — works everywhere, no SSH config needed. + +**Step 1: Create a personal access token** + +Tell the user to go to: **https://github.com/settings/tokens** + +- Click "Generate new token (classic)" +- Give it a name like "hermes-agent" +- Select scopes: + - `repo` (full repository access — read, write, push, PRs) + - `workflow` (trigger and manage GitHub Actions) + - `read:org` (if working with organization repos) +- Set expiration (90 days is a good default) +- Copy the token — it won't be shown again + +**Step 2: Configure git to store the token** + +```bash +# Set up the credential helper to cache credentials +# "store" saves to ~/.git-credentials in plaintext (simple, persistent) +git config --global credential.helper store + +# Now do a test operation that triggers auth — git will prompt for credentials +# Username: +# Password: +git ls-remote https://github.com//.git +``` + +After entering credentials once, they're saved and reused for all future operations. + +**Alternative: cache helper (credentials expire from memory)** + +```bash +# Cache in memory for 8 hours (28800 seconds) instead of saving to disk +git config --global credential.helper 'cache --timeout=28800' +``` + +**Alternative: set the token directly in the remote URL (per-repo)** + +```bash +# Embed token in the remote URL (avoids credential prompts entirely) +git remote set-url origin https://:@github.com//.git +``` + +**Step 3: Configure git identity** + +```bash +# Required for commits — set name and email +git config --global user.name "Their Name" +git config --global user.email "their-email@example.com" +``` + +**Step 4: Verify** + +```bash +# Test push access (this should work without any prompts now) +git ls-remote https://github.com//.git + +# Verify identity +git config --global user.name +git config --global user.email +``` + +### Option B: SSH Key Authentication + +Good for users who prefer SSH or already have keys set up. + +**Step 1: Check for existing SSH keys** + +```bash +ls -la ~/.ssh/id_*.pub 2>/dev/null || echo "No SSH keys found" +``` + +**Step 2: Generate a key if needed** + +```bash +# Generate an ed25519 key (modern, secure, fast) +ssh-keygen -t ed25519 -C "their-email@example.com" -f ~/.ssh/id_ed25519 -N "" + +# Display the public key for them to add to GitHub +cat ~/.ssh/id_ed25519.pub +``` + +Tell the user to add the public key at: **https://github.com/settings/keys** +- Click "New SSH key" +- Paste the public key content +- Give it a title like "hermes-agent-<machine-name>" + +**Step 3: Test the connection** + +```bash +ssh -T git@github.com +# Expected: "Hi ! You've successfully authenticated..." +``` + +**Step 4: Configure git to use SSH for GitHub** + +```bash +# Rewrite HTTPS GitHub URLs to SSH automatically +git config --global url."git@github.com:".insteadOf "https://github.com/" +``` + +**Step 5: Configure git identity** + +```bash +git config --global user.name "Their Name" +git config --global user.email "their-email@example.com" +``` + +--- + +## Method 2: gh CLI Authentication + +If `gh` is installed, it handles both API access and git credentials in one step. + +### Interactive Browser Login (Desktop) + +```bash +gh auth login +# Select: GitHub.com +# Select: HTTPS +# Authenticate via browser +``` + +### Token-Based Login (Headless / SSH Servers) + +```bash +echo "" | gh auth login --with-token + +# Set up git credentials through gh +gh auth setup-git +``` + +### Verify + +```bash +gh auth status +``` + +--- + +## Using the GitHub API Without gh + +When `gh` is not available, you can still access the full GitHub API using `curl` with a personal access token. This is how the other GitHub skills implement their fallbacks. + +### Setting the Token for API Calls + +```bash +# Option 1: Export as env var (preferred — keeps it out of commands) +export GITHUB_TOKEN="" + +# Then use in curl calls: +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/user +``` + +### Extracting the Token from Git Credentials + +If git credentials are already configured (via credential.helper store), the token can be extracted: + +```bash +# Read from git credential store +grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|' +``` + +### Helper: Detect Auth Method + +Use this pattern at the start of any GitHub workflow: + +```bash +# Try gh first, fall back to git + curl +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + echo "AUTH_METHOD=gh" +elif [ -n "$GITHUB_TOKEN" ]; then + echo "AUTH_METHOD=curl" +elif [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + export GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + echo "AUTH_METHOD=curl" +elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + export GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + echo "AUTH_METHOD=curl" +else + echo "AUTH_METHOD=none" + echo "Need to set up authentication first" +fi +``` + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| `git push` asks for password | GitHub disabled password auth. Use a personal access token as the password, or switch to SSH | +| `remote: Permission to X denied` | Token may lack `repo` scope — regenerate with correct scopes | +| `fatal: Authentication failed` | Cached credentials may be stale — run `git credential reject` then re-authenticate | +| `ssh: connect to host github.com port 22: Connection refused` | Try SSH over HTTPS port: add `Host github.com` with `Port 443` and `Hostname ssh.github.com` to `~/.ssh/config` | +| Credentials not persisting | Check `git config --global credential.helper` — must be `store` or `cache` | +| Multiple GitHub accounts | Use SSH with different keys per host alias in `~/.ssh/config`, or per-repo credential URLs | +| `gh: command not found` + no sudo | Use git-only Method 1 above — no installation needed | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-code-review.md b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md new file mode 100644 index 000000000..9a18c45e1 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md @@ -0,0 +1,498 @@ +--- +title: "Github Code Review" +sidebar_label: "Github Code Review" +description: "Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Code Review + +Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-code-review` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Code-Review`, `Pull-Requests`, `Git`, `Quality` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Code Review + +Perform code reviews on local changes before pushing, or review open PRs on GitHub. Most of this skill uses plain `git` — the `gh`/`curl` split only matters for PR-level interactions. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repository + +### Setup (for PR interactions) + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Reviewing Local Changes (Pre-Push) + +This is pure `git` — works everywhere, no API needed. + +### Get the Diff + +```bash +# Staged changes (what would be committed) +git diff --staged + +# All changes vs main (what a PR would contain) +git diff main...HEAD + +# File names only +git diff main...HEAD --name-only + +# Stat summary (insertions/deletions per file) +git diff main...HEAD --stat +``` + +### Review Strategy + +1. **Get the big picture first:** + +```bash +git diff main...HEAD --stat +git log main..HEAD --oneline +``` + +2. **Review file by file** — use `read_file` on changed files for full context, and the diff to see what changed: + +```bash +git diff main...HEAD -- src/auth/login.py +``` + +3. **Check for common issues:** + +```bash +# Debug statements, TODOs, console.logs left behind +git diff main...HEAD | grep -n "print(\|console\.log\|TODO\|FIXME\|HACK\|XXX\|debugger" + +# Large files accidentally staged +git diff main...HEAD --stat | sort -t'|' -k2 -rn | head -10 + +# Secrets or credential patterns +git diff main...HEAD | grep -in "password\|secret\|api_key\|token.*=\|private_key" + +# Merge conflict markers +git diff main...HEAD | grep -n "<<<<<<\|>>>>>>\|=======" +``` + +4. **Present structured feedback** to the user. + +### Review Output Format + +When reviewing local changes, present findings in this structure: + +``` +## Code Review Summary + +### Critical +- **src/auth.py:45** — SQL injection: user input passed directly to query. + Suggestion: Use parameterized queries. + +### Warnings +- **src/models/user.py:23** — Password stored in plaintext. Use bcrypt or argon2. +- **src/api/routes.py:112** — No rate limiting on login endpoint. + +### Suggestions +- **src/utils/helpers.py:8** — Duplicates logic in `src/core/utils.py:34`. Consolidate. +- **tests/test_auth.py** — Missing edge case: expired token test. + +### Looks Good +- Clean separation of concerns in the middleware layer +- Good test coverage for the happy path +``` + +--- + +## 2. Reviewing a Pull Request on GitHub + +### View PR Details + +**With gh:** + +```bash +gh pr view 123 +gh pr diff 123 +gh pr diff 123 --name-only +``` + +**With git + curl:** + +```bash +PR_NUMBER=123 + +# Get PR details +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c " +import sys, json +pr = json.load(sys.stdin) +print(f\"Title: {pr['title']}\") +print(f\"Author: {pr['user']['login']}\") +print(f\"Branch: {pr['head']['ref']} -> {pr['base']['ref']}\") +print(f\"State: {pr['state']}\") +print(f\"Body:\n{pr['body']}\")" + +# List changed files +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/files \ + | python3 -c " +import sys, json +for f in json.load(sys.stdin): + print(f\"{f['status']:10} +{f['additions']:-4} -{f['deletions']:-4} {f['filename']}\")" +``` + +### Check Out PR Locally for Full Review + +This works with plain `git` — no `gh` needed: + +```bash +# Fetch the PR branch and check it out +git fetch origin pull/123/head:pr-123 +git checkout pr-123 + +# Now you can use read_file, search_files, run tests, etc. + +# View diff against the base branch +git diff main...pr-123 +``` + +**With gh (shortcut):** + +```bash +gh pr checkout 123 +``` + +### Leave Comments on a PR + +**General PR comment — with gh:** + +```bash +gh pr comment 123 --body "Overall looks good, a few suggestions below." +``` + +**General PR comment — with curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/$PR_NUMBER/comments \ + -d '{"body": "Overall looks good, a few suggestions below."}' +``` + +### Leave Inline Review Comments + +**Single inline comment — with gh (via API):** + +```bash +HEAD_SHA=$(gh pr view 123 --json headRefOid --jq '.headRefOid') + +gh api repos/$OWNER/$REPO/pulls/123/comments \ + --method POST \ + -f body="This could be simplified with a list comprehension." \ + -f path="src/auth/login.py" \ + -f commit_id="$HEAD_SHA" \ + -f line=45 \ + -f side="RIGHT" +``` + +**Single inline comment — with curl:** + +```bash +# Get the head commit SHA +HEAD_SHA=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/comments \ + -d "{ + \"body\": \"This could be simplified with a list comprehension.\", + \"path\": \"src/auth/login.py\", + \"commit_id\": \"$HEAD_SHA\", + \"line\": 45, + \"side\": \"RIGHT\" + }" +``` + +### Submit a Formal Review (Approve / Request Changes) + +**With gh:** + +```bash +gh pr review 123 --approve --body "LGTM!" +gh pr review 123 --request-changes --body "See inline comments." +gh pr review 123 --comment --body "Some suggestions, nothing blocking." +``` + +**With curl — multi-comment review submitted atomically:** + +```bash +HEAD_SHA=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/reviews \ + -d "{ + \"commit_id\": \"$HEAD_SHA\", + \"event\": \"COMMENT\", + \"body\": \"Code review from Hermes Agent\", + \"comments\": [ + {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"Use parameterized queries to prevent SQL injection.\"}, + {\"path\": \"src/models/user.py\", \"line\": 23, \"body\": \"Hash passwords with bcrypt before storing.\"}, + {\"path\": \"tests/test_auth.py\", \"line\": 1, \"body\": \"Add test for expired token edge case.\"} + ] + }" +``` + +Event values: `"APPROVE"`, `"REQUEST_CHANGES"`, `"COMMENT"` + +The `line` field refers to the line number in the *new* version of the file. For deleted lines, use `"side": "LEFT"`. + +--- + +## 3. Review Checklist + +When performing a code review (local or PR), systematically check: + +### Correctness +- Does the code do what it claims? +- Edge cases handled (empty inputs, nulls, large data, concurrent access)? +- Error paths handled gracefully? + +### Security +- No hardcoded secrets, credentials, or API keys +- Input validation on user-facing inputs +- No SQL injection, XSS, or path traversal +- Auth/authz checks where needed + +### Code Quality +- Clear naming (variables, functions, classes) +- No unnecessary complexity or premature abstraction +- DRY — no duplicated logic that should be extracted +- Functions are focused (single responsibility) + +### Testing +- New code paths tested? +- Happy path and error cases covered? +- Tests readable and maintainable? + +### Performance +- No N+1 queries or unnecessary loops +- Appropriate caching where beneficial +- No blocking operations in async code paths + +### Documentation +- Public APIs documented +- Non-obvious logic has comments explaining "why" +- README updated if behavior changed + +--- + +## 4. Pre-Push Review Workflow + +When the user asks you to "review the code" or "check before pushing": + +1. `git diff main...HEAD --stat` — see scope of changes +2. `git diff main...HEAD` — read the full diff +3. For each changed file, use `read_file` if you need more context +4. Apply the checklist above +5. Present findings in the structured format (Critical / Warnings / Suggestions / Looks Good) +6. If critical issues found, offer to fix them before the user pushes + +--- + +## 5. PR Review Workflow (End-to-End) + +When the user asks you to "review PR #N", "look at this PR", or gives you a PR URL, follow this recipe: + +### Step 1: Set up environment + +```bash +source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh" +# Or run the inline setup block from the top of this skill +``` + +### Step 2: Gather PR context + +Get the PR metadata, description, and list of changed files to understand scope before diving into code. + +**With gh:** +```bash +gh pr view 123 +gh pr diff 123 --name-only +gh pr checks 123 +``` + +**With curl:** +```bash +PR_NUMBER=123 + +# PR details (title, author, description, branch) +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER + +# Changed files with line counts +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/files +``` + +### Step 3: Check out the PR locally + +This gives you full access to `read_file`, `search_files`, and the ability to run tests. + +```bash +git fetch origin pull/$PR_NUMBER/head:pr-$PR_NUMBER +git checkout pr-$PR_NUMBER +``` + +### Step 4: Read the diff and understand changes + +```bash +# Full diff against the base branch +git diff main...HEAD + +# Or file-by-file for large PRs +git diff main...HEAD --name-only +# Then for each file: +git diff main...HEAD -- path/to/file.py +``` + +For each changed file, use `read_file` to see full context around the changes — diffs alone can miss issues visible only with surrounding code. + +### Step 5: Run automated checks locally (if applicable) + +```bash +# Run tests if there's a test suite +python -m pytest 2>&1 | tail -20 +# or: npm test, cargo test, go test ./..., etc. + +# Run linter if configured +ruff check . 2>&1 | head -30 +# or: eslint, clippy, etc. +``` + +### Step 6: Apply the review checklist (Section 3) + +Go through each category: Correctness, Security, Code Quality, Testing, Performance, Documentation. + +### Step 7: Post the review to GitHub + +Collect your findings and submit them as a formal review with inline comments. + +**With gh:** +```bash +# If no issues — approve +gh pr review $PR_NUMBER --approve --body "Reviewed by Hermes Agent. Code looks clean — good test coverage, no security concerns." + +# If issues found — request changes with inline comments +gh pr review $PR_NUMBER --request-changes --body "Found a few issues — see inline comments." +``` + +**With curl — atomic review with multiple inline comments:** +```bash +HEAD_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +# Build the review JSON — event is APPROVE, REQUEST_CHANGES, or COMMENT +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/reviews \ + -d "{ + \"commit_id\": \"$HEAD_SHA\", + \"event\": \"REQUEST_CHANGES\", + \"body\": \"## Hermes Agent Review\n\nFound 2 issues, 1 suggestion. See inline comments.\", + \"comments\": [ + {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"🔴 **Critical:** User input passed directly to SQL query — use parameterized queries.\"}, + {\"path\": \"src/models.py\", \"line\": 23, \"body\": \"⚠️ **Warning:** Password stored without hashing.\"}, + {\"path\": \"src/utils.py\", \"line\": 8, \"body\": \"💡 **Suggestion:** This duplicates logic in core/utils.py:34.\"} + ] + }" +``` + +### Step 8: Also post a summary comment + +In addition to inline comments, leave a top-level summary so the PR author gets the full picture at a glance. Use the review output format from `references/review-output-template.md`. + +**With gh:** +```bash +gh pr comment $PR_NUMBER --body "$(cat <<'EOF' +## Code Review Summary + +**Verdict: Changes Requested** (2 issues, 1 suggestion) + +### 🔴 Critical +- **src/auth.py:45** — SQL injection vulnerability + +### ⚠️ Warnings +- **src/models.py:23** — Plaintext password storage + +### 💡 Suggestions +- **src/utils.py:8** — Duplicated logic, consider consolidating + +### ✅ Looks Good +- Clean API design +- Good error handling in the middleware layer + +--- +*Reviewed by Hermes Agent* +EOF +)" +``` + +### Step 9: Clean up + +```bash +git checkout main +git branch -D pr-$PR_NUMBER +``` + +### Decision: Approve vs Request Changes vs Comment + +- **Approve** — no critical or warning-level issues, only minor suggestions or all clear +- **Request Changes** — any critical or warning-level issue that should be fixed before merge +- **Comment** — observations and suggestions, but nothing blocking (use when you're unsure or the PR is a draft) diff --git a/website/docs/user-guide/skills/bundled/github/github-github-issues.md b/website/docs/user-guide/skills/bundled/github/github-github-issues.md new file mode 100644 index 000000000..8493663cd --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-issues.md @@ -0,0 +1,387 @@ +--- +title: "Github Issues — Create, manage, triage, and close GitHub issues" +sidebar_label: "Github Issues" +description: "Create, manage, triage, and close GitHub issues" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Issues + +Create, manage, triage, and close GitHub issues. Search existing issues, add labels, assign people, and link to PRs. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-issues` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Issues`, `Project-Management`, `Bug-Tracking`, `Triage` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Issues Management + +Create, search, triage, and manage GitHub issues. Each section shows `gh` first, then the `curl` fallback. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repo with a GitHub remote, or specify the repo explicitly + +### Setup + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Viewing Issues + +**With gh:** + +```bash +gh issue list +gh issue list --state open --label "bug" +gh issue list --assignee @me +gh issue list --search "authentication error" --state all +gh issue view 42 +``` + +**With curl:** + +```bash +# List open issues +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?state=open&per_page=20" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: # GitHub API returns PRs in /issues too + labels = ', '.join(l['name'] for l in i['labels']) + print(f\"#{i['number']:5} {i['state']:6} {labels:30} {i['title']}\")" + +# Filter by label +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?state=open&labels=bug&per_page=20" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: + print(f\"#{i['number']} {i['title']}\")" + +# View a specific issue +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + | python3 -c " +import sys, json +i = json.load(sys.stdin) +labels = ', '.join(l['name'] for l in i['labels']) +assignees = ', '.join(a['login'] for a in i['assignees']) +print(f\"#{i['number']}: {i['title']}\") +print(f\"State: {i['state']} Labels: {labels} Assignees: {assignees}\") +print(f\"Author: {i['user']['login']} Created: {i['created_at']}\") +print(f\"\n{i['body']}\")" + +# Search issues +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/search/issues?q=authentication+error+repo:$OWNER/$REPO" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin)['items']: + print(f\"#{i['number']} {i['state']:6} {i['title']}\")" +``` + +## 2. Creating Issues + +**With gh:** + +```bash +gh issue create \ + --title "Login redirect ignores ?next= parameter" \ + --body "## Description +After logging in, users always land on /dashboard. + +## Steps to Reproduce +1. Navigate to /settings while logged out +2. Get redirected to /login?next=/settings +3. Log in +4. Actual: redirected to /dashboard (should go to /settings) + +## Expected Behavior +Respect the ?next= query parameter." \ + --label "bug,backend" \ + --assignee "username" +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues \ + -d '{ + "title": "Login redirect ignores ?next= parameter", + "body": "## Description\nAfter logging in, users always land on /dashboard.\n\n## Steps to Reproduce\n1. Navigate to /settings while logged out\n2. Get redirected to /login?next=/settings\n3. Log in\n4. Actual: redirected to /dashboard\n\n## Expected Behavior\nRespect the ?next= query parameter.", + "labels": ["bug", "backend"], + "assignees": ["username"] + }' +``` + +### Bug Report Template + +``` +## Bug Description + + +## Steps to Reproduce +1. +2. + +## Expected Behavior + + +## Actual Behavior + + +## Environment +- OS: +- Version: +``` + +### Feature Request Template + +``` +## Feature Description + + +## Motivation + + +## Proposed Solution + + +## Alternatives Considered + +``` + +## 3. Managing Issues + +### Add/Remove Labels + +**With gh:** + +```bash +gh issue edit 42 --add-label "priority:high,bug" +gh issue edit 42 --remove-label "needs-triage" +``` + +**With curl:** + +```bash +# Add labels +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/labels \ + -d '{"labels": ["priority:high", "bug"]}' + +# Remove a label +curl -s -X DELETE \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/labels/needs-triage + +# List available labels in the repo +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/labels \ + | python3 -c " +import sys, json +for l in json.load(sys.stdin): + print(f\" {l['name']:30} {l.get('description', '')}\")" +``` + +### Assignment + +**With gh:** + +```bash +gh issue edit 42 --add-assignee username +gh issue edit 42 --add-assignee @me +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/assignees \ + -d '{"assignees": ["username"]}' +``` + +### Commenting + +**With gh:** + +```bash +gh issue comment 42 --body "Investigated — root cause is in auth middleware. Working on a fix." +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/comments \ + -d '{"body": "Investigated — root cause is in auth middleware. Working on a fix."}' +``` + +### Closing and Reopening + +**With gh:** + +```bash +gh issue close 42 +gh issue close 42 --reason "not planned" +gh issue reopen 42 +``` + +**With curl:** + +```bash +# Close +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + -d '{"state": "closed", "state_reason": "completed"}' + +# Reopen +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + -d '{"state": "open"}' +``` + +### Linking Issues to PRs + +Issues are automatically closed when a PR merges with the right keywords in the body: + +``` +Closes #42 +Fixes #42 +Resolves #42 +``` + +To create a branch from an issue: + +**With gh:** + +```bash +gh issue develop 42 --checkout +``` + +**With git (manual equivalent):** + +```bash +git checkout main && git pull origin main +git checkout -b fix/issue-42-login-redirect +``` + +## 4. Issue Triage Workflow + +When asked to triage issues: + +1. **List untriaged issues:** + +```bash +# With gh +gh issue list --label "needs-triage" --state open + +# With curl +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?labels=needs-triage&state=open" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: + print(f\"#{i['number']} {i['title']}\")" +``` + +2. **Read and categorize** each issue (view details, understand the bug/feature) + +3. **Apply labels and priority** (see Managing Issues above) + +4. **Assign** if the owner is clear + +5. **Comment with triage notes** if needed + +## 5. Bulk Operations + +For batch operations, combine API calls with shell scripting: + +**With gh:** + +```bash +# Close all issues with a specific label +gh issue list --label "wontfix" --json number --jq '.[].number' | \ + xargs -I {} gh issue close {} --reason "not planned" +``` + +**With curl:** + +```bash +# List issue numbers with a label, then close each +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?labels=wontfix&state=open" \ + | python3 -c "import sys,json; [print(i['number']) for i in json.load(sys.stdin)]" \ + | while read num; do + curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/$num \ + -d '{"state": "closed", "state_reason": "not_planned"}' + echo "Closed #$num" + done +``` + +## Quick Reference Table + +| Action | gh | curl endpoint | +|--------|-----|--------------| +| List issues | `gh issue list` | `GET /repos/{o}/{r}/issues` | +| View issue | `gh issue view N` | `GET /repos/{o}/{r}/issues/N` | +| Create issue | `gh issue create ...` | `POST /repos/{o}/{r}/issues` | +| Add labels | `gh issue edit N --add-label ...` | `POST /repos/{o}/{r}/issues/N/labels` | +| Assign | `gh issue edit N --add-assignee ...` | `POST /repos/{o}/{r}/issues/N/assignees` | +| Comment | `gh issue comment N --body ...` | `POST /repos/{o}/{r}/issues/N/comments` | +| Close | `gh issue close N` | `PATCH /repos/{o}/{r}/issues/N` | +| Search | `gh issue list --search "..."` | `GET /search/issues?q=...` | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md new file mode 100644 index 000000000..f1a31e157 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md @@ -0,0 +1,384 @@ +--- +title: "Github Pr Workflow" +sidebar_label: "Github Pr Workflow" +description: "Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Pr Workflow + +Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-pr-workflow` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Pull-Requests`, `CI/CD`, `Git`, `Automation`, `Merge` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Pull Request Workflow + +Complete guide for managing the PR lifecycle. Each section shows the `gh` way first, then the `git` + `curl` fallback for machines without `gh`. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repository with a GitHub remote + +### Quick Auth Detection + +```bash +# Determine which method to use throughout this workflow +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + # Ensure we have a token for API calls + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi +echo "Using: $AUTH" +``` + +### Extracting Owner/Repo from the Git Remote + +Many `curl` commands need `owner/repo`. Extract it from the git remote: + +```bash +# Works for both HTTPS and SSH remote URLs +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +echo "Owner: $OWNER, Repo: $REPO" +``` + +--- + +## 1. Branch Creation + +This part is pure `git` — identical either way: + +```bash +# Make sure you're up to date +git fetch origin +git checkout main && git pull origin main + +# Create and switch to a new branch +git checkout -b feat/add-user-authentication +``` + +Branch naming conventions: +- `feat/description` — new features +- `fix/description` — bug fixes +- `refactor/description` — code restructuring +- `docs/description` — documentation +- `ci/description` — CI/CD changes + +## 2. Making Commits + +Use the agent's file tools (`write_file`, `patch`) to make changes, then commit: + +```bash +# Stage specific files +git add src/auth.py src/models/user.py tests/test_auth.py + +# Commit with a conventional commit message +git commit -m "feat: add JWT-based user authentication + +- Add login/register endpoints +- Add User model with password hashing +- Add auth middleware for protected routes +- Add unit tests for auth flow" +``` + +Commit message format (Conventional Commits): +``` +type(scope): short description + +Longer explanation if needed. Wrap at 72 characters. +``` + +Types: `feat`, `fix`, `refactor`, `docs`, `test`, `ci`, `chore`, `perf` + +## 3. Pushing and Creating a PR + +### Push the Branch (same either way) + +```bash +git push -u origin HEAD +``` + +### Create the PR + +**With gh:** + +```bash +gh pr create \ + --title "feat: add JWT-based user authentication" \ + --body "## Summary +- Adds login and register API endpoints +- JWT token generation and validation + +## Test Plan +- [ ] Unit tests pass + +Closes #42" +``` + +Options: `--draft`, `--reviewer user1,user2`, `--label "enhancement"`, `--base develop` + +**With git + curl:** + +```bash +BRANCH=$(git branch --show-current) + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/$OWNER/$REPO/pulls \ + -d "{ + \"title\": \"feat: add JWT-based user authentication\", + \"body\": \"## Summary\nAdds login and register API endpoints.\n\nCloses #42\", + \"head\": \"$BRANCH\", + \"base\": \"main\" + }" +``` + +The response JSON includes the PR `number` — save it for later commands. + +To create as a draft, add `"draft": true` to the JSON body. + +## 4. Monitoring CI Status + +### Check CI Status + +**With gh:** + +```bash +# One-shot check +gh pr checks + +# Watch until all checks finish (polls every 10s) +gh pr checks --watch +``` + +**With git + curl:** + +```bash +# Get the latest commit SHA on the current branch +SHA=$(git rev-parse HEAD) + +# Query the combined status +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/status \ + | python3 -c " +import sys, json +data = json.load(sys.stdin) +print(f\"Overall: {data['state']}\") +for s in data.get('statuses', []): + print(f\" {s['context']}: {s['state']} - {s.get('description', '')}\")" + +# Also check GitHub Actions check runs (separate endpoint) +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/check-runs \ + | python3 -c " +import sys, json +data = json.load(sys.stdin) +for cr in data.get('check_runs', []): + print(f\" {cr['name']}: {cr['status']} / {cr['conclusion'] or 'pending'}\")" +``` + +### Poll Until Complete (git + curl) + +```bash +# Simple polling loop — check every 30 seconds, up to 10 minutes +SHA=$(git rev-parse HEAD) +for i in $(seq 1 20); do + STATUS=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/status \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])") + echo "Check $i: $STATUS" + if [ "$STATUS" = "success" ] || [ "$STATUS" = "failure" ] || [ "$STATUS" = "error" ]; then + break + fi + sleep 30 +done +``` + +## 5. Auto-Fixing CI Failures + +When CI fails, diagnose and fix. This loop works with either auth method. + +### Step 1: Get Failure Details + +**With gh:** + +```bash +# List recent workflow runs on this branch +gh run list --branch $(git branch --show-current) --limit 5 + +# View failed logs +gh run view --log-failed +``` + +**With git + curl:** + +```bash +BRANCH=$(git branch --show-current) + +# List workflow runs on this branch +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?branch=$BRANCH&per_page=5" \ + | python3 -c " +import sys, json +runs = json.load(sys.stdin)['workflow_runs'] +for r in runs: + print(f\"Run {r['id']}: {r['name']} - {r['conclusion'] or r['status']}\")" + +# Get failed job logs (download as zip, extract, read) +RUN_ID= +curl -s -L \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/logs \ + -o /tmp/ci-logs.zip +cd /tmp && unzip -o ci-logs.zip -d ci-logs && cat ci-logs/*.txt +``` + +### Step 2: Fix and Push + +After identifying the issue, use file tools (`patch`, `write_file`) to fix it: + +```bash +git add +git commit -m "fix: resolve CI failure in " +git push +``` + +### Step 3: Verify + +Re-check CI status using the commands from Section 4 above. + +### Auto-Fix Loop Pattern + +When asked to auto-fix CI, follow this loop: + +1. Check CI status → identify failures +2. Read failure logs → understand the error +3. Use `read_file` + `patch`/`write_file` → fix the code +4. `git add . && git commit -m "fix: ..." && git push` +5. Wait for CI → re-check status +6. Repeat if still failing (up to 3 attempts, then ask the user) + +## 6. Merging + +**With gh:** + +```bash +# Squash merge + delete branch (cleanest for feature branches) +gh pr merge --squash --delete-branch + +# Enable auto-merge (merges when all checks pass) +gh pr merge --auto --squash --delete-branch +``` + +**With git + curl:** + +```bash +PR_NUMBER= + +# Merge the PR via API (squash) +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/merge \ + -d "{ + \"merge_method\": \"squash\", + \"commit_title\": \"feat: add user authentication (#$PR_NUMBER)\" + }" + +# Delete the remote branch after merge +BRANCH=$(git branch --show-current) +git push origin --delete $BRANCH + +# Switch back to main locally +git checkout main && git pull origin main +git branch -d $BRANCH +``` + +Merge methods: `"merge"` (merge commit), `"squash"`, `"rebase"` + +### Enable Auto-Merge (curl) + +```bash +# Auto-merge requires the repo to have it enabled in settings. +# This uses the GraphQL API since REST doesn't support auto-merge. +PR_NODE_ID=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['node_id'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/graphql \ + -d "{\"query\": \"mutation { enablePullRequestAutoMerge(input: {pullRequestId: \\\"$PR_NODE_ID\\\", mergeMethod: SQUASH}) { clientMutationId } }\"}" +``` + +## 7. Complete Workflow Example + +```bash +# 1. Start from clean main +git checkout main && git pull origin main + +# 2. Branch +git checkout -b fix/login-redirect-bug + +# 3. (Agent makes code changes with file tools) + +# 4. Commit +git add src/auth/login.py tests/test_login.py +git commit -m "fix: correct redirect URL after login + +Preserves the ?next= parameter instead of always redirecting to /dashboard." + +# 5. Push +git push -u origin HEAD + +# 6. Create PR (picks gh or curl based on what's available) +# ... (see Section 3) + +# 7. Monitor CI (see Section 4) + +# 8. Merge when green (see Section 6) +``` + +## Useful PR Commands Reference + +| Action | gh | git + curl | +|--------|-----|-----------| +| List my PRs | `gh pr list --author @me` | `curl -s -H "Authorization: token $GITHUB_TOKEN" "https://api.github.com/repos/$OWNER/$REPO/pulls?state=open"` | +| View PR diff | `gh pr diff` | `git diff main...HEAD` (local) or `curl -H "Accept: application/vnd.github.diff" ...` | +| Add comment | `gh pr comment N --body "..."` | `curl -X POST .../issues/N/comments -d '{"body":"..."}'` | +| Request review | `gh pr edit N --add-reviewer user` | `curl -X POST .../pulls/N/requested_reviewers -d '{"reviewers":["user"]}'` | +| Close PR | `gh pr close N` | `curl -X PATCH .../pulls/N -d '{"state":"closed"}'` | +| Check out someone's PR | `gh pr checkout N` | `git fetch origin pull/N/head:pr-N && git checkout pr-N` | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md new file mode 100644 index 000000000..839225034 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md @@ -0,0 +1,533 @@ +--- +title: "Github Repo Management — Clone, create, fork, configure, and manage GitHub repositories" +sidebar_label: "Github Repo Management" +description: "Clone, create, fork, configure, and manage GitHub repositories" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Repo Management + +Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-repo-management` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Repositories`, `Git`, `Releases`, `Secrets`, `Configuration` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Repository Management + +Create, clone, fork, configure, and manage GitHub repositories. Each section shows `gh` first, then the `git` + `curl` fallback. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) + +### Setup + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +# Get your GitHub username (needed for several operations) +if [ "$AUTH" = "gh" ]; then + GH_USER=$(gh api user --jq '.login') +else + GH_USER=$(curl -s -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/user | python3 -c "import sys,json; print(json.load(sys.stdin)['login'])") +fi +``` + +If you're inside a repo already: + +```bash +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Cloning Repositories + +Cloning is pure `git` — works identically either way: + +```bash +# Clone via HTTPS (works with credential helper or token-embedded URL) +git clone https://github.com/owner/repo-name.git + +# Clone into a specific directory +git clone https://github.com/owner/repo-name.git ./my-local-dir + +# Shallow clone (faster for large repos) +git clone --depth 1 https://github.com/owner/repo-name.git + +# Clone a specific branch +git clone --branch develop https://github.com/owner/repo-name.git + +# Clone via SSH (if SSH is configured) +git clone git@github.com:owner/repo-name.git +``` + +**With gh (shorthand):** + +```bash +gh repo clone owner/repo-name +gh repo clone owner/repo-name -- --depth 1 +``` + +## 2. Creating Repositories + +**With gh:** + +```bash +# Create a public repo and clone it +gh repo create my-new-project --public --clone + +# Private, with description and license +gh repo create my-new-project --private --description "A useful tool" --license MIT --clone + +# Under an organization +gh repo create my-org/my-new-project --public --clone + +# From existing local directory +cd /path/to/existing/project +gh repo create my-project --source . --public --push +``` + +**With git + curl:** + +```bash +# Create the remote repo via API +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/user/repos \ + -d '{ + "name": "my-new-project", + "description": "A useful tool", + "private": false, + "auto_init": true, + "license_template": "mit" + }' + +# Clone it +git clone https://github.com/$GH_USER/my-new-project.git +cd my-new-project + +# -- OR -- push an existing local directory to the new repo +cd /path/to/existing/project +git init +git add . +git commit -m "Initial commit" +git remote add origin https://github.com/$GH_USER/my-new-project.git +git push -u origin main +``` + +To create under an organization: + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/orgs/my-org/repos \ + -d '{"name": "my-new-project", "private": false}' +``` + +### From a Template + +**With gh:** + +```bash +gh repo create my-new-app --template owner/template-repo --public --clone +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/owner/template-repo/generate \ + -d '{"owner": "'"$GH_USER"'", "name": "my-new-app", "private": false}' +``` + +## 3. Forking Repositories + +**With gh:** + +```bash +gh repo fork owner/repo-name --clone +``` + +**With git + curl:** + +```bash +# Create the fork via API +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/owner/repo-name/forks + +# Wait a moment for GitHub to create it, then clone +sleep 3 +git clone https://github.com/$GH_USER/repo-name.git +cd repo-name + +# Add the original repo as "upstream" remote +git remote add upstream https://github.com/owner/repo-name.git +``` + +### Keeping a Fork in Sync + +```bash +# Pure git — works everywhere +git fetch upstream +git checkout main +git merge upstream/main +git push origin main +``` + +**With gh (shortcut):** + +```bash +gh repo sync $GH_USER/repo-name +``` + +## 4. Repository Information + +**With gh:** + +```bash +gh repo view owner/repo-name +gh repo list --limit 20 +gh search repos "machine learning" --language python --sort stars +``` + +**With curl:** + +```bash +# View repo details +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO \ + | python3 -c " +import sys, json +r = json.load(sys.stdin) +print(f\"Name: {r['full_name']}\") +print(f\"Description: {r['description']}\") +print(f\"Stars: {r['stargazers_count']} Forks: {r['forks_count']}\") +print(f\"Default branch: {r['default_branch']}\") +print(f\"Language: {r['language']}\")" + +# List your repos +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/user/repos?per_page=20&sort=updated" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin): + vis = 'private' if r['private'] else 'public' + print(f\" {r['full_name']:40} {vis:8} {r.get('language', ''):10} ★{r['stargazers_count']}\")" + +# Search repos +curl -s \ + "https://api.github.com/search/repositories?q=machine+learning+language:python&sort=stars&per_page=10" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin)['items']: + print(f\" {r['full_name']:40} ★{r['stargazers_count']:6} {r['description'][:60] if r['description'] else ''}\")" +``` + +## 5. Repository Settings + +**With gh:** + +```bash +gh repo edit --description "Updated description" --visibility public +gh repo edit --enable-wiki=false --enable-issues=true +gh repo edit --default-branch main +gh repo edit --add-topic "machine-learning,python" +gh repo edit --enable-auto-merge +``` + +**With curl:** + +```bash +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO \ + -d '{ + "description": "Updated description", + "has_wiki": false, + "has_issues": true, + "allow_auto_merge": true + }' + +# Update topics +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.mercy-preview+json" \ + https://api.github.com/repos/$OWNER/$REPO/topics \ + -d '{"names": ["machine-learning", "python", "automation"]}' +``` + +## 6. Branch Protection + +```bash +# View current protection +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/branches/main/protection + +# Set up branch protection +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/branches/main/protection \ + -d '{ + "required_status_checks": { + "strict": true, + "contexts": ["ci/test", "ci/lint"] + }, + "enforce_admins": false, + "required_pull_request_reviews": { + "required_approving_review_count": 1 + }, + "restrictions": null + }' +``` + +## 7. Secrets Management (GitHub Actions) + +**With gh:** + +```bash +gh secret set API_KEY --body "your-secret-value" +gh secret set SSH_KEY < ~/.ssh/id_rsa +gh secret list +gh secret delete API_KEY +``` + +**With curl:** + +Secrets require encryption with the repo's public key — more involved via API: + +```bash +# Get the repo's public key for encrypting secrets +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets/public-key + +# Encrypt and set (requires Python with PyNaCl) +python3 -c " +from base64 import b64encode +from nacl import encoding, public +import json, sys + +# Get the public key +key_id = '' +public_key = '' + +# Encrypt +sealed = public.SealedBox( + public.PublicKey(public_key.encode('utf-8'), encoding.Base64Encoder) +).encrypt('your-secret-value'.encode('utf-8')) +print(json.dumps({ + 'encrypted_value': b64encode(sealed).decode('utf-8'), + 'key_id': key_id +}))" + +# Then PUT the encrypted secret +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets/API_KEY \ + -d '' + +# List secrets (names only, values hidden) +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets \ + | python3 -c " +import sys, json +for s in json.load(sys.stdin)['secrets']: + print(f\" {s['name']:30} updated: {s['updated_at']}\")" +``` + +Note: For secrets, `gh secret set` is dramatically simpler. If setting secrets is needed and `gh` isn't available, recommend installing it for just that operation. + +## 8. Releases + +**With gh:** + +```bash +gh release create v1.0.0 --title "v1.0.0" --generate-notes +gh release create v2.0.0-rc1 --draft --prerelease --generate-notes +gh release create v1.0.0 ./dist/binary --title "v1.0.0" --notes "Release notes" +gh release list +gh release download v1.0.0 --dir ./downloads +``` + +**With curl:** + +```bash +# Create a release +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/releases \ + -d '{ + "tag_name": "v1.0.0", + "name": "v1.0.0", + "body": "## Changelog\n- Feature A\n- Bug fix B", + "draft": false, + "prerelease": false, + "generate_release_notes": true + }' + +# List releases +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/releases \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin): + tag = r.get('tag_name', 'no tag') + print(f\" {tag:15} {r['name']:30} {'draft' if r['draft'] else 'published'}\")" + +# Upload a release asset (binary file) +RELEASE_ID= +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Content-Type: application/octet-stream" \ + "https://uploads.github.com/repos/$OWNER/$REPO/releases/$RELEASE_ID/assets?name=binary-amd64" \ + --data-binary @./dist/binary-amd64 +``` + +## 9. GitHub Actions Workflows + +**With gh:** + +```bash +gh workflow list +gh run list --limit 10 +gh run view +gh run view --log-failed +gh run rerun +gh run rerun --failed +gh workflow run ci.yml --ref main +gh workflow run deploy.yml -f environment=staging +``` + +**With curl:** + +```bash +# List workflows +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/workflows \ + | python3 -c " +import sys, json +for w in json.load(sys.stdin)['workflows']: + print(f\" {w['id']:10} {w['name']:30} {w['state']}\")" + +# List recent runs +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?per_page=10" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin)['workflow_runs']: + print(f\" Run {r['id']} {r['name']:30} {r['conclusion'] or r['status']}\")" + +# Download failed run logs +RUN_ID= +curl -s -L \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/logs \ + -o /tmp/ci-logs.zip +cd /tmp && unzip -o ci-logs.zip -d ci-logs + +# Re-run a failed workflow +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/rerun + +# Re-run only failed jobs +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/rerun-failed-jobs + +# Trigger a workflow manually (workflow_dispatch) +WORKFLOW_ID= +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/workflows/$WORKFLOW_ID/dispatches \ + -d '{"ref": "main", "inputs": {"environment": "staging"}}' +``` + +## 10. Gists + +**With gh:** + +```bash +gh gist create script.py --public --desc "Useful script" +gh gist list +``` + +**With curl:** + +```bash +# Create a gist +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/gists \ + -d '{ + "description": "Useful script", + "public": true, + "files": { + "script.py": {"content": "print(\"hello\")"} + } + }' + +# List your gists +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/gists \ + | python3 -c " +import sys, json +for g in json.load(sys.stdin): + files = ', '.join(g['files'].keys()) + print(f\" {g['id']} {g['description'] or '(no desc)':40} {files}\")" +``` + +## Quick Reference Table + +| Action | gh | git + curl | +|--------|-----|-----------| +| Clone | `gh repo clone o/r` | `git clone https://github.com/o/r.git` | +| Create repo | `gh repo create name --public` | `curl POST /user/repos` | +| Fork | `gh repo fork o/r --clone` | `curl POST /repos/o/r/forks` + `git clone` | +| Repo info | `gh repo view o/r` | `curl GET /repos/o/r` | +| Edit settings | `gh repo edit --...` | `curl PATCH /repos/o/r` | +| Create release | `gh release create v1.0` | `curl POST /repos/o/r/releases` | +| List workflows | `gh workflow list` | `curl GET /repos/o/r/actions/workflows` | +| Rerun CI | `gh run rerun ID` | `curl POST /repos/o/r/actions/runs/ID/rerun` | +| Set secret | `gh secret set KEY` | `curl PUT /repos/o/r/actions/secrets/KEY` (+ encryption) | diff --git a/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md new file mode 100644 index 000000000..267c8c064 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md @@ -0,0 +1,374 @@ +--- +title: "Native Mcp" +sidebar_label: "Native Mcp" +description: "Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Native Mcp + +Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mcp/native-mcp` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `MCP`, `Tools`, `Integrations` | +| Related skills | [`mcporter`](/docs/user-guide/skills/optional/mcp/mcp-mcporter) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Native MCP Client + +Hermes Agent has a built-in MCP client that connects to MCP servers at startup, discovers their tools, and makes them available as first-class tools the agent can call directly. No bridge CLI needed -- tools from MCP servers appear alongside built-in tools like `terminal`, `read_file`, etc. + +## When to Use + +Use this whenever you want to: +- Connect to MCP servers and use their tools from within Hermes Agent +- Add external capabilities (filesystem access, GitHub, databases, APIs) via MCP +- Run local stdio-based MCP servers (npx, uvx, or any command) +- Connect to remote HTTP/StreamableHTTP MCP servers +- Have MCP tools auto-discovered and available in every conversation + +For ad-hoc, one-off MCP tool calls from the terminal without configuring anything, see the `mcporter` skill instead. + +## Prerequisites + +- **mcp Python package** -- optional dependency; install with `pip install mcp`. If not installed, MCP support is silently disabled. +- **Node.js** -- required for `npx`-based MCP servers (most community servers) +- **uv** -- required for `uvx`-based MCP servers (Python-based servers) + +Install the MCP SDK: + +```bash +pip install mcp +# or, if using uv: +uv pip install mcp +``` + +## Quick Start + +Add MCP servers to `~/.hermes/config.yaml` under the `mcp_servers` key: + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] +``` + +Restart Hermes Agent. On startup it will: +1. Connect to the server +2. Discover available tools +3. Register them with the prefix `mcp_time_*` +4. Inject them into all platform toolsets + +You can then use the tools naturally -- just ask the agent to get the current time. + +## Configuration Reference + +Each entry under `mcp_servers` is a server name mapped to its config. There are two transport types: **stdio** (command-based) and **HTTP** (url-based). + +### Stdio Transport (command + args) + +```yaml +mcp_servers: + server_name: + command: "npx" # (required) executable to run + args: ["-y", "pkg-name"] # (optional) command arguments, default: [] + env: # (optional) environment variables for the subprocess + SOME_API_KEY: "value" + timeout: 120 # (optional) per-tool-call timeout in seconds, default: 120 + connect_timeout: 60 # (optional) initial connection timeout in seconds, default: 60 +``` + +### HTTP Transport (url) + +```yaml +mcp_servers: + server_name: + url: "https://my-server.example.com/mcp" # (required) server URL + headers: # (optional) HTTP headers + Authorization: "Bearer sk-..." + timeout: 180 # (optional) per-tool-call timeout in seconds, default: 120 + connect_timeout: 60 # (optional) initial connection timeout in seconds, default: 60 +``` + +### All Config Options + +| Option | Type | Default | Description | +|-------------------|--------|---------|---------------------------------------------------| +| `command` | string | -- | Executable to run (stdio transport, required) | +| `args` | list | `[]` | Arguments passed to the command | +| `env` | dict | `{}` | Extra environment variables for the subprocess | +| `url` | string | -- | Server URL (HTTP transport, required) | +| `headers` | dict | `{}` | HTTP headers sent with every request | +| `timeout` | int | `120` | Per-tool-call timeout in seconds | +| `connect_timeout` | int | `60` | Timeout for initial connection and discovery | + +Note: A server config must have either `command` (stdio) or `url` (HTTP), not both. + +## How It Works + +### Startup Discovery + +When Hermes Agent starts, `discover_mcp_tools()` is called during tool initialization: + +1. Reads `mcp_servers` from `~/.hermes/config.yaml` +2. For each server, spawns a connection in a dedicated background event loop +3. Initializes the MCP session and calls `list_tools()` to discover available tools +4. Registers each tool in the Hermes tool registry + +### Tool Naming Convention + +MCP tools are registered with the naming pattern: + +``` +mcp_{server_name}_{tool_name} +``` + +Hyphens and dots in names are replaced with underscores for LLM API compatibility. + +Examples: +- Server `filesystem`, tool `read_file` → `mcp_filesystem_read_file` +- Server `github`, tool `list-issues` → `mcp_github_list_issues` +- Server `my-api`, tool `fetch.data` → `mcp_my_api_fetch_data` + +### Auto-Injection + +After discovery, MCP tools are automatically injected into all `hermes-*` platform toolsets (CLI, Discord, Telegram, etc.). This means MCP tools are available in every conversation without any additional configuration. + +### Connection Lifecycle + +- Each server runs as a long-lived asyncio Task in a background daemon thread +- Connections persist for the lifetime of the agent process +- If a connection drops, automatic reconnection with exponential backoff kicks in (up to 5 retries, max 60s backoff) +- On agent shutdown, all connections are gracefully closed + +### Idempotency + +`discover_mcp_tools()` is idempotent -- calling it multiple times only connects to servers that aren't already connected. Failed servers are retried on subsequent calls. + +## Transport Types + +### Stdio Transport + +The most common transport. Hermes launches the MCP server as a subprocess and communicates over stdin/stdout. + +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/projects"] +``` + +The subprocess inherits a **filtered** environment (see Security section below) plus any variables you specify in `env`. + +### HTTP / StreamableHTTP Transport + +For remote or shared MCP servers. Requires the `mcp` package to include HTTP client support (`mcp.client.streamable_http`). + +```yaml +mcp_servers: + remote_api: + url: "https://mcp.example.com/mcp" + headers: + Authorization: "Bearer sk-..." +``` + +If HTTP support is not available in your installed `mcp` version, the server will fail with an ImportError and other servers will continue normally. + +## Security + +### Environment Variable Filtering + +For stdio servers, Hermes does NOT pass your full shell environment to MCP subprocesses. Only safe baseline variables are inherited: + +- `PATH`, `HOME`, `USER`, `LANG`, `LC_ALL`, `TERM`, `SHELL`, `TMPDIR` +- Any `XDG_*` variables + +All other environment variables (API keys, tokens, secrets) are excluded unless you explicitly add them via the `env` config key. This prevents accidental credential leakage to untrusted MCP servers. + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + # Only this token is passed to the subprocess + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..." +``` + +### Credential Stripping in Error Messages + +If an MCP tool call fails, any credential-like patterns in the error message are automatically redacted before being shown to the LLM. This covers: + +- GitHub PATs (`ghp_...`) +- OpenAI-style keys (`sk-...`) +- Bearer tokens +- Generic `token=`, `key=`, `API_KEY=`, `password=`, `secret=` patterns + +## Troubleshooting + +### "MCP SDK not available -- skipping MCP tool discovery" + +The `mcp` Python package is not installed. Install it: + +```bash +pip install mcp +``` + +### "No MCP servers configured" + +No `mcp_servers` key in `~/.hermes/config.yaml`, or it's empty. Add at least one server. + +### "Failed to connect to MCP server 'X'" + +Common causes: +- **Command not found**: The `command` binary isn't on PATH. Ensure `npx`, `uvx`, or the relevant command is installed. +- **Package not found**: For npx servers, the npm package may not exist or may need `-y` in args to auto-install. +- **Timeout**: The server took too long to start. Increase `connect_timeout`. +- **Port conflict**: For HTTP servers, the URL may be unreachable. + +### "MCP server 'X' requires HTTP transport but mcp.client.streamable_http is not available" + +Your `mcp` package version doesn't include HTTP client support. Upgrade: + +```bash +pip install --upgrade mcp +``` + +### Tools not appearing + +- Check that the server is listed under `mcp_servers` (not `mcp` or `servers`) +- Ensure the YAML indentation is correct +- Look at Hermes Agent startup logs for connection messages +- Tool names are prefixed with `mcp_{server}_{tool}` -- look for that pattern + +### Connection keeps dropping + +The client retries up to 5 times with exponential backoff (1s, 2s, 4s, 8s, 16s, capped at 60s). If the server is fundamentally unreachable, it gives up after 5 attempts. Check the server process and network connectivity. + +## Examples + +### Time Server (uvx) + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] +``` + +Registers tools like `mcp_time_get_current_time`. + +### Filesystem Server (npx) + +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/documents"] + timeout: 30 +``` + +Registers tools like `mcp_filesystem_read_file`, `mcp_filesystem_write_file`, `mcp_filesystem_list_directory`. + +### GitHub Server with Authentication + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxxxxxxxxxxxxxxxxxxx" + timeout: 60 +``` + +Registers tools like `mcp_github_list_issues`, `mcp_github_create_pull_request`, etc. + +### Remote HTTP Server + +```yaml +mcp_servers: + company_api: + url: "https://mcp.mycompany.com/v1/mcp" + headers: + Authorization: "Bearer sk-xxxxxxxxxxxxxxxxxxxx" + X-Team-Id: "engineering" + timeout: 180 + connect_timeout: 30 +``` + +### Multiple Servers + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] + + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"] + + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxxxxxxxxxxxxxxxxxxx" + + company_api: + url: "https://mcp.internal.company.com/mcp" + headers: + Authorization: "Bearer sk-xxxxxxxxxxxxxxxxxxxx" + timeout: 300 +``` + +All tools from all servers are registered and available simultaneously. Each server's tools are prefixed with its name to avoid collisions. + +## Sampling (Server-Initiated LLM Requests) + +Hermes supports MCP's `sampling/createMessage` capability — MCP servers can request LLM completions through the agent during tool execution. This enables agent-in-the-loop workflows (data analysis, content generation, decision-making). + +Sampling is **enabled by default**. Configure per server: + +```yaml +mcp_servers: + my_server: + command: "npx" + args: ["-y", "my-mcp-server"] + sampling: + enabled: true # default: true + model: "gemini-3-flash" # model override (optional) + max_tokens_cap: 4096 # max tokens per request + timeout: 30 # LLM call timeout (seconds) + max_rpm: 10 # max requests per minute + allowed_models: [] # model whitelist (empty = all) + max_tool_rounds: 5 # tool loop limit (0 = disable) + log_level: "info" # audit verbosity +``` + +Servers can also include `tools` in sampling requests for multi-turn tool-augmented workflows. The `max_tool_rounds` config prevents infinite tool loops. Per-server audit metrics (requests, errors, tokens, tool use count) are tracked via `get_mcp_status()`. + +Disable sampling for untrusted servers with `sampling: { enabled: false }`. + +## Notes + +- MCP tools are called synchronously from the agent's perspective but run asynchronously on a dedicated background event loop +- Tool results are returned as JSON with either `{"result": "..."}` or `{"error": "..."}` +- The native MCP client is independent of `mcporter` -- you can use both simultaneously +- Server connections are persistent and shared across all conversations in the same agent process +- Adding or removing servers requires restarting the agent (no hot-reload currently) diff --git a/website/docs/user-guide/skills/bundled/media/media-gif-search.md b/website/docs/user-guide/skills/bundled/media/media-gif-search.md new file mode 100644 index 000000000..67b56645d --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-gif-search.md @@ -0,0 +1,101 @@ +--- +title: "Gif Search — Search and download GIFs from Tenor using curl" +sidebar_label: "Gif Search" +description: "Search and download GIFs from Tenor using curl" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Gif Search + +Search and download GIFs from Tenor using curl. No dependencies beyond curl and jq. Useful for finding reaction GIFs, creating visual content, and sending GIFs in chat. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/gif-search` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GIF`, `Media`, `Search`, `Tenor`, `API` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GIF Search (Tenor API) + +Search and download GIFs directly via the Tenor API using curl. No extra tools needed. + +## Setup + +Set your Tenor API key in your environment (add to `~/.hermes/.env`): + +```bash +TENOR_API_KEY=your_key_here +``` + +Get a free API key at https://developers.google.com/tenor/guides/quickstart — the Google Cloud Console Tenor API key is free and has generous rate limits. + +## Prerequisites + +- `curl` and `jq` (both standard on macOS/Linux) +- `TENOR_API_KEY` environment variable + +## Search for GIFs + +```bash +# Search and get GIF URLs +curl -s "https://tenor.googleapis.com/v2/search?q=thumbs+up&limit=5&key=${TENOR_API_KEY}" | jq -r '.results[].media_formats.gif.url' + +# Get smaller/preview versions +curl -s "https://tenor.googleapis.com/v2/search?q=nice+work&limit=3&key=${TENOR_API_KEY}" | jq -r '.results[].media_formats.tinygif.url' +``` + +## Download a GIF + +```bash +# Search and download the top result +URL=$(curl -s "https://tenor.googleapis.com/v2/search?q=celebration&limit=1&key=${TENOR_API_KEY}" | jq -r '.results[0].media_formats.gif.url') +curl -sL "$URL" -o celebration.gif +``` + +## Get Full Metadata + +```bash +curl -s "https://tenor.googleapis.com/v2/search?q=cat&limit=3&key=${TENOR_API_KEY}" | jq '.results[] | {title: .title, url: .media_formats.gif.url, preview: .media_formats.tinygif.url, dimensions: .media_formats.gif.dims}' +``` + +## API Parameters + +| Parameter | Description | +|-----------|-------------| +| `q` | Search query (URL-encode spaces as `+`) | +| `limit` | Max results (1-50, default 20) | +| `key` | API key (from `$TENOR_API_KEY` env var) | +| `media_filter` | Filter formats: `gif`, `tinygif`, `mp4`, `tinymp4`, `webm` | +| `contentfilter` | Safety: `off`, `low`, `medium`, `high` | +| `locale` | Language: `en_US`, `es`, `fr`, etc. | + +## Available Media Formats + +Each result has multiple formats under `.media_formats`: + +| Format | Use case | +|--------|----------| +| `gif` | Full quality GIF | +| `tinygif` | Small preview GIF | +| `mp4` | Video version (smaller file size) | +| `tinymp4` | Small preview video | +| `webm` | WebM video | +| `nanogif` | Tiny thumbnail | + +## Notes + +- URL-encode the query: spaces as `+`, special chars as `%XX` +- For sending in chat, `tinygif` URLs are lighter weight +- GIF URLs can be used directly in markdown: `![alt](https://github.com/NousResearch/hermes-agent/blob/main/skills/media/gif-search/url)` diff --git a/website/docs/user-guide/skills/bundled/media/media-heartmula.md b/website/docs/user-guide/skills/bundled/media/media-heartmula.md new file mode 100644 index 000000000..85dae5e86 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-heartmula.md @@ -0,0 +1,188 @@ +--- +title: "Heartmula — Set up and run HeartMuLa, the open-source music generation model family (Suno-like)" +sidebar_label: "Heartmula" +description: "Set up and run HeartMuLa, the open-source music generation model family (Suno-like)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Heartmula + +Set up and run HeartMuLa, the open-source music generation model family (Suno-like). Generates full songs from lyrics + tags with multilingual support. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/heartmula` | +| Version | `1.0.0` | +| Tags | `music`, `audio`, `generation`, `ai`, `heartmula`, `heartcodec`, `lyrics`, `songs` | +| Related skills | `audiocraft` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# HeartMuLa - Open-Source Music Generation + +## Overview +HeartMuLa is a family of open-source music foundation models (Apache-2.0) that generates music conditioned on lyrics and tags. Comparable to Suno for open-source. Includes: +- **HeartMuLa** - Music language model (3B/7B) for generation from lyrics + tags +- **HeartCodec** - 12.5Hz music codec for high-fidelity audio reconstruction +- **HeartTranscriptor** - Whisper-based lyrics transcription +- **HeartCLAP** - Audio-text alignment model + +## When to Use +- User wants to generate music/songs from text descriptions +- User wants an open-source Suno alternative +- User wants local/offline music generation +- User asks about HeartMuLa, heartlib, or AI music generation + +## Hardware Requirements +- **Minimum**: 8GB VRAM with `--lazy_load true` (loads/unloads models sequentially) +- **Recommended**: 16GB+ VRAM for comfortable single-GPU usage +- **Multi-GPU**: Use `--mula_device cuda:0 --codec_device cuda:1` to split across GPUs +- 3B model with lazy_load peaks at ~6.2GB VRAM + +## Installation Steps + +### 1. Clone Repository +```bash +cd ~/ # or desired directory +git clone https://github.com/HeartMuLa/heartlib.git +cd heartlib +``` + +### 2. Create Virtual Environment (Python 3.10 required) +```bash +uv venv --python 3.10 .venv +. .venv/bin/activate +uv pip install -e . +``` + +### 3. Fix Dependency Compatibility Issues + +**IMPORTANT**: As of Feb 2026, the pinned dependencies have conflicts with newer packages. Apply these fixes: + +```bash +# Upgrade datasets (old version incompatible with current pyarrow) +uv pip install --upgrade datasets + +# Upgrade transformers (needed for huggingface-hub 1.x compatibility) +uv pip install --upgrade transformers +``` + +### 4. Patch Source Code (Required for transformers 5.x) + +**Patch 1 - RoPE cache fix** in `src/heartlib/heartmula/modeling_heartmula.py`: + +In the `setup_caches` method of the `HeartMuLa` class, add RoPE reinitialization after the `reset_caches` try/except block and before the `with device:` block: + +```python +# Re-initialize RoPE caches that were skipped during meta-device loading +from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE +for module in self.modules(): + if isinstance(module, Llama3ScaledRoPE) and not module.is_cache_built: + module.rope_init() + module.to(device) +``` + +**Why**: `from_pretrained` creates model on meta device first; `Llama3ScaledRoPE.rope_init()` skips cache building on meta tensors, then never rebuilds after weights are loaded to real device. + +**Patch 2 - HeartCodec loading fix** in `src/heartlib/pipelines/music_generation.py`: + +Add `ignore_mismatched_sizes=True` to ALL `HeartCodec.from_pretrained()` calls (there are 2: the eager load in `__init__` and the lazy load in the `codec` property). + +**Why**: VQ codebook `initted` buffers have shape `[1]` in checkpoint vs `[]` in model. Same data, just scalar vs 0-d tensor. Safe to ignore. + +### 5. Download Model Checkpoints +```bash +cd heartlib # project root +hf download --local-dir './ckpt' 'HeartMuLa/HeartMuLaGen' +hf download --local-dir './ckpt/HeartMuLa-oss-3B' 'HeartMuLa/HeartMuLa-oss-3B-happy-new-year' +hf download --local-dir './ckpt/HeartCodec-oss' 'HeartMuLa/HeartCodec-oss-20260123' +``` + +All 3 can be downloaded in parallel. Total size is several GB. + +## GPU / CUDA + +HeartMuLa uses CUDA by default (`--mula_device cuda --codec_device cuda`). No extra setup needed if the user has an NVIDIA GPU with PyTorch CUDA support installed. + +- The installed `torch==2.4.1` includes CUDA 12.1 support out of the box +- `torchtune` may report version `0.4.0+cpu` — this is just package metadata, it still uses CUDA via PyTorch +- To verify GPU is being used, look for "CUDA memory" lines in the output (e.g. "CUDA memory before unloading: 6.20 GB") +- **No GPU?** You can run on CPU with `--mula_device cpu --codec_device cpu`, but expect generation to be **extremely slow** (potentially 30-60+ minutes for a single song vs ~4 minutes on GPU). CPU mode also requires significant RAM (~12GB+ free). If the user has no NVIDIA GPU, recommend using a cloud GPU service (Google Colab free tier with T4, Lambda Labs, etc.) or the online demo at https://heartmula.github.io/ instead. + +## Usage + +### Basic Generation +```bash +cd heartlib +. .venv/bin/activate +python ./examples/run_music_generation.py \ + --model_path=./ckpt \ + --version="3B" \ + --lyrics="./assets/lyrics.txt" \ + --tags="./assets/tags.txt" \ + --save_path="./assets/output.mp3" \ + --lazy_load true +``` + +### Input Formatting + +**Tags** (comma-separated, no spaces): +``` +piano,happy,wedding,synthesizer,romantic +``` +or +``` +rock,energetic,guitar,drums,male-vocal +``` + +**Lyrics** (use bracketed structural tags): +``` +[Intro] + +[Verse] +Your lyrics here... + +[Chorus] +Chorus lyrics... + +[Bridge] +Bridge lyrics... + +[Outro] +``` + +### Key Parameters +| Parameter | Default | Description | +|-----------|---------|-------------| +| `--max_audio_length_ms` | 240000 | Max length in ms (240s = 4 min) | +| `--topk` | 50 | Top-k sampling | +| `--temperature` | 1.0 | Sampling temperature | +| `--cfg_scale` | 1.5 | Classifier-free guidance scale | +| `--lazy_load` | false | Load/unload models on demand (saves VRAM) | +| `--mula_dtype` | bfloat16 | Dtype for HeartMuLa (bf16 recommended) | +| `--codec_dtype` | float32 | Dtype for HeartCodec (fp32 recommended for quality) | + +### Performance +- RTF (Real-Time Factor) ≈ 1.0 — a 4-minute song takes ~4 minutes to generate +- Output: MP3, 48kHz stereo, 128kbps + +## Pitfalls +1. **Do NOT use bf16 for HeartCodec** — degrades audio quality. Use fp32 (default). +2. **Tags may be ignored** — known issue (#90). Lyrics tend to dominate; experiment with tag ordering. +3. **Triton not available on macOS** — Linux/CUDA only for GPU acceleration. +4. **RTX 5080 incompatibility** reported in upstream issues. +5. The dependency pin conflicts require the manual upgrades and patches described above. + +## Links +- Repo: https://github.com/HeartMuLa/heartlib +- Models: https://huggingface.co/HeartMuLa +- Paper: https://arxiv.org/abs/2601.10547 +- License: Apache-2.0 diff --git a/website/docs/user-guide/skills/bundled/media/media-songsee.md b/website/docs/user-guide/skills/bundled/media/media-songsee.md new file mode 100644 index 000000000..231b87ea3 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-songsee.md @@ -0,0 +1,97 @@ +--- +title: "Songsee — Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc" +sidebar_label: "Songsee" +description: "Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Songsee + +Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc.) from audio files via CLI. Useful for audio analysis, music production debugging, and visual documentation. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/songsee` | +| Version | `1.0.0` | +| Author | community | +| License | MIT | +| Tags | `Audio`, `Visualization`, `Spectrogram`, `Music`, `Analysis` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# songsee + +Generate spectrograms and multi-panel audio feature visualizations from audio files. + +## Prerequisites + +Requires [Go](https://go.dev/doc/install): +```bash +go install github.com/steipete/songsee/cmd/songsee@latest +``` + +Optional: `ffmpeg` for formats beyond WAV/MP3. + +## Quick Start + +```bash +# Basic spectrogram +songsee track.mp3 + +# Save to specific file +songsee track.mp3 -o spectrogram.png + +# Multi-panel visualization grid +songsee track.mp3 --viz spectrogram,mel,chroma,hpss,selfsim,loudness,tempogram,mfcc,flux + +# Time slice (start at 12.5s, 8s duration) +songsee track.mp3 --start 12.5 --duration 8 -o slice.jpg + +# From stdin +cat track.mp3 | songsee - --format png -o out.png +``` + +## Visualization Types + +Use `--viz` with comma-separated values: + +| Type | Description | +|------|-------------| +| `spectrogram` | Standard frequency spectrogram | +| `mel` | Mel-scaled spectrogram | +| `chroma` | Pitch class distribution | +| `hpss` | Harmonic/percussive separation | +| `selfsim` | Self-similarity matrix | +| `loudness` | Loudness over time | +| `tempogram` | Tempo estimation | +| `mfcc` | Mel-frequency cepstral coefficients | +| `flux` | Spectral flux (onset detection) | + +Multiple `--viz` types render as a grid in a single image. + +## Common Flags + +| Flag | Description | +|------|-------------| +| `--viz` | Visualization types (comma-separated) | +| `--style` | Color palette: `classic`, `magma`, `inferno`, `viridis`, `gray` | +| `--width` / `--height` | Output image dimensions | +| `--window` / `--hop` | FFT window and hop size | +| `--min-freq` / `--max-freq` | Frequency range filter | +| `--start` / `--duration` | Time slice of the audio | +| `--format` | Output format: `jpg` or `png` | +| `-o` | Output file path | + +## Notes + +- WAV and MP3 are decoded natively; other formats require `ffmpeg` +- Output images can be inspected with `vision_analyze` for automated audio analysis +- Useful for comparing audio outputs, debugging synthesis, or documenting audio processing pipelines diff --git a/website/docs/user-guide/skills/bundled/media/media-spotify.md b/website/docs/user-guide/skills/bundled/media/media-spotify.md new file mode 100644 index 000000000..4fbda8439 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-spotify.md @@ -0,0 +1,150 @@ +--- +title: "Spotify" +sidebar_label: "Spotify" +description: "Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Spotify + +Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state. Loads when the user asks to play/pause/queue music, search tracks/albums/artists, manage playlists, or check what's playing. Assumes the Hermes Spotify toolset is enabled and `hermes auth spotify` has been run. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/spotify` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `spotify`, `music`, `playback`, `playlists`, `media` | +| Related skills | [`gif-search`](/docs/user-guide/skills/bundled/media/media-gif-search) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Spotify + +Control the user's Spotify account via the Hermes Spotify toolset (7 tools). Setup guide: https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify + +## When to use this skill + +The user says something like "play X", "pause", "skip", "queue up X", "what's playing", "search for X", "add to my X playlist", "make a playlist", "save this to my library", etc. + +## The 7 tools + +- `spotify_playback` — play, pause, next, previous, seek, set_repeat, set_shuffle, set_volume, get_state, get_currently_playing, recently_played +- `spotify_devices` — list, transfer +- `spotify_queue` — get, add +- `spotify_search` — search the catalog +- `spotify_playlists` — list, get, create, add_items, remove_items, update_details +- `spotify_albums` — get, tracks +- `spotify_library` — list/save/remove with `kind: "tracks"|"albums"` + +Playback-mutating actions require Spotify Premium; search/library/playlist ops work on Free. + +## Canonical patterns (minimize tool calls) + +### "Play <artist/track/album>" +One search, then play by URI. Do NOT loop through search results describing them unless the user asked for options. + +``` +spotify_search({"query": "miles davis kind of blue", "types": ["album"], "limit": 1}) +→ got album URI spotify:album:1weenld61qoidwYuZ1GESA +spotify_playback({"action": "play", "context_uri": "spotify:album:1weenld61qoidwYuZ1GESA"}) +``` + +For "play some <artist>" (no specific song), prefer `types: ["artist"]` and play the artist context URI — Spotify handles smart shuffle. If the user says "the song" or "that track", search `types: ["track"]` and pass `uris: [track_uri]` to play. + +### "What's playing?" / "What am I listening to?" +Single call — don't chain get_state after get_currently_playing. + +``` +spotify_playback({"action": "get_currently_playing"}) +``` + +If it returns 204/empty (`is_playing: false`), tell the user nothing is playing. Don't retry. + +### "Pause" / "Skip" / "Volume 50" +Direct action, no preflight inspection needed. + +``` +spotify_playback({"action": "pause"}) +spotify_playback({"action": "next"}) +spotify_playback({"action": "set_volume", "volume_percent": 50}) +``` + +### "Add to my <playlist name> playlist" +1. `spotify_playlists list` to find the playlist ID by name +2. Get the track URI (from currently playing, or search) +3. `spotify_playlists add_items` with the playlist_id and URIs + +``` +spotify_playlists({"action": "list"}) +→ found "Late Night Jazz" = 37i9dQZF1DX4wta20PHgwo +spotify_playback({"action": "get_currently_playing"}) +→ current track uri = spotify:track:0DiWol3AO6WpXZgp0goxAV +spotify_playlists({"action": "add_items", + "playlist_id": "37i9dQZF1DX4wta20PHgwo", + "uris": ["spotify:track:0DiWol3AO6WpXZgp0goxAV"]}) +``` + +### "Create a playlist called X and add the last 3 songs I played" +``` +spotify_playback({"action": "recently_played", "limit": 3}) +spotify_playlists({"action": "create", "name": "Focus 2026"}) +→ got playlist_id back in response +spotify_playlists({"action": "add_items", "playlist_id": , "uris": [<3 uris>]}) +``` + +### "Save / unsave / is this saved?" +Use `spotify_library` with the right `kind`. + +``` +spotify_library({"kind": "tracks", "action": "save", "uris": ["spotify:track:..."]}) +spotify_library({"kind": "albums", "action": "list", "limit": 50}) +``` + +### "Transfer playback to my <device>" +``` +spotify_devices({"action": "list"}) +→ pick the device_id by matching name/type +spotify_devices({"action": "transfer", "device_id": "", "play": true}) +``` + +## Critical failure modes + +**`403 Forbidden — No active device found`** on any playback action means Spotify isn't running anywhere. Tell the user: "Open Spotify on your phone/desktop/web player first, start any track for a second, then retry." Don't retry the tool call blindly — it will fail the same way. You can call `spotify_devices list` to confirm; an empty list means no active device. + +**`403 Forbidden — Premium required`** means the user is on Free and tried to mutate playback. Don't retry; tell them this action needs Premium. Reads still work (search, playlists, library, get_state). + +**`204 No Content` on `get_currently_playing`** is NOT an error — it means nothing is playing. The tool returns `is_playing: false`. Just report that to the user. + +**`429 Too Many Requests`** = rate limit. Wait and retry once. If it keeps happening, you're looping — stop. + +**`401 Unauthorized` after a retry** — refresh token revoked. Tell the user to run `hermes auth spotify` again. + +## URI and ID formats + +Spotify uses three interchangeable ID formats. The tools accept all three and normalize: + +- URI: `spotify:track:0DiWol3AO6WpXZgp0goxAV` (preferred) +- URL: `https://open.spotify.com/track/0DiWol3AO6WpXZgp0goxAV` +- Bare ID: `0DiWol3AO6WpXZgp0goxAV` + +When in doubt, use full URIs. Search results return URIs in the `uri` field — pass those directly. + +Entity types: `track`, `album`, `artist`, `playlist`, `show`, `episode`. Use the right type for the action — `spotify_playback.play` with a `context_uri` expects album/playlist/artist; `uris` expects an array of track URIs. + +## What NOT to do + +- **Don't call `get_state` before every action.** Spotify accepts play/pause/skip without preflight. Only inspect state when the user asked "what's playing" or you need to reason about device/track. +- **Don't describe search results unless asked.** If the user said "play X", search, grab the top URI, play it. They'll hear it's wrong if it's wrong. +- **Don't retry on `403 Premium required` or `403 No active device`.** Those are permanent until user action. +- **Don't use `spotify_search` to find a playlist by name** — that searches the public Spotify catalog. User playlists come from `spotify_playlists list`. +- **Don't mix `kind: "tracks"` with album URIs** in `spotify_library` (or vice versa). The tool normalizes IDs but the API endpoint differs. diff --git a/website/docs/user-guide/skills/bundled/media/media-youtube-content.md b/website/docs/user-guide/skills/bundled/media/media-youtube-content.md new file mode 100644 index 000000000..e94c755c9 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-youtube-content.md @@ -0,0 +1,88 @@ +--- +title: "Youtube Content" +sidebar_label: "Youtube Content" +description: "Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Youtube Content + +Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). Use when the user shares a YouTube URL or video link, asks to summarize a video, requests a transcript, or wants to extract and reformat content from any YouTube video. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/youtube-content` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# YouTube Content Tool + +Extract transcripts from YouTube videos and convert them into useful formats. + +## Setup + +```bash +pip install youtube-transcript-api +``` + +## Helper Script + +`SKILL_DIR` is the directory containing this SKILL.md file. The script accepts any standard YouTube URL format, short links (youtu.be), shorts, embeds, live links, or a raw 11-character video ID. + +```bash +# JSON output with metadata +python3 SKILL_DIR/scripts/fetch_transcript.py "https://youtube.com/watch?v=VIDEO_ID" + +# Plain text (good for piping into further processing) +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --text-only + +# With timestamps +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --timestamps + +# Specific language with fallback chain +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --language tr,en +``` + +## Output Formats + +After fetching the transcript, format it based on what the user asks for: + +- **Chapters**: Group by topic shifts, output timestamped chapter list +- **Summary**: Concise 5-10 sentence overview of the entire video +- **Chapter summaries**: Chapters with a short paragraph summary for each +- **Thread**: Twitter/X thread format — numbered posts, each under 280 chars +- **Blog post**: Full article with title, sections, and key takeaways +- **Quotes**: Notable quotes with timestamps + +### Example — Chapters Output + +``` +00:00 Introduction — host opens with the problem statement +03:45 Background — prior work and why existing solutions fall short +12:20 Core method — walkthrough of the proposed approach +24:10 Results — benchmark comparisons and key takeaways +31:55 Q&A — audience questions on scalability and next steps +``` + +## Workflow + +1. **Fetch** the transcript using the helper script with `--text-only --timestamps`. +2. **Validate**: confirm the output is non-empty and in the expected language. If empty, retry without `--language` to get any available transcript. If still empty, tell the user the video likely has transcripts disabled. +3. **Chunk if needed**: if the transcript exceeds ~50K characters, split into overlapping chunks (~40K with 2K overlap) and summarize each chunk before merging. +4. **Transform** into the requested output format. If the user did not specify a format, default to a summary. +5. **Verify**: re-read the transformed output to check for coherence, correct timestamps, and completeness before presenting. + +## Error Handling + +- **Transcript disabled**: tell the user; suggest they check if subtitles are available on the video page. +- **Private/unavailable video**: relay the error and ask the user to verify the URL. +- **No matching language**: retry without `--language` to fetch any available transcript, then note the actual language to the user. +- **Dependency missing**: run `pip install youtube-transcript-api` and retry. diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md new file mode 100644 index 000000000..0112f747a --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md @@ -0,0 +1,507 @@ +--- +title: "Evaluating Llms Harness — Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag)" +sidebar_label: "Evaluating Llms Harness" +description: "Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Evaluating Llms Harness + +Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. Supports HuggingFace, vLLM, APIs. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/evaluation/lm-evaluation-harness` | +| Version | `1.0.0` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `lm-eval`, `transformers`, `vllm` | +| Tags | `Evaluation`, `LM Evaluation Harness`, `Benchmarking`, `MMLU`, `HumanEval`, `GSM8K`, `EleutherAI`, `Model Quality`, `Academic Benchmarks`, `Industry Standard` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# lm-evaluation-harness - LLM Benchmarking + +## Quick start + +lm-evaluation-harness evaluates LLMs across 60+ academic benchmarks using standardized prompts and metrics. + +**Installation**: +```bash +pip install lm-eval +``` + +**Evaluate any HuggingFace model**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu,gsm8k,hellaswag \ + --device cuda:0 \ + --batch_size 8 +``` + +**View available tasks**: +```bash +lm_eval --tasks list +``` + +## Common workflows + +### Workflow 1: Standard benchmark evaluation + +Evaluate model on core benchmarks (MMLU, GSM8K, HumanEval). + +Copy this checklist: + +``` +Benchmark Evaluation: +- [ ] Step 1: Choose benchmark suite +- [ ] Step 2: Configure model +- [ ] Step 3: Run evaluation +- [ ] Step 4: Analyze results +``` + +**Step 1: Choose benchmark suite** + +**Core reasoning benchmarks**: +- **MMLU** (Massive Multitask Language Understanding) - 57 subjects, multiple choice +- **GSM8K** - Grade school math word problems +- **HellaSwag** - Common sense reasoning +- **TruthfulQA** - Truthfulness and factuality +- **ARC** (AI2 Reasoning Challenge) - Science questions + +**Code benchmarks**: +- **HumanEval** - Python code generation (164 problems) +- **MBPP** (Mostly Basic Python Problems) - Python coding + +**Standard suite** (recommended for model releases): +```bash +--tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge +``` + +**Step 2: Configure model** + +**HuggingFace model**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,dtype=bfloat16 \ + --tasks mmlu \ + --device cuda:0 \ + --batch_size auto # Auto-detect optimal batch size +``` + +**Quantized model (4-bit/8-bit)**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,load_in_4bit=True \ + --tasks mmlu \ + --device cuda:0 +``` + +**Custom checkpoint**: +```bash +lm_eval --model hf \ + --model_args pretrained=/path/to/my-model,tokenizer=/path/to/tokenizer \ + --tasks mmlu \ + --device cuda:0 +``` + +**Step 3: Run evaluation** + +```bash +# Full MMLU evaluation (57 subjects) +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu \ + --num_fewshot 5 \ # 5-shot evaluation (standard) + --batch_size 8 \ + --output_path results/ \ + --log_samples # Save individual predictions + +# Multiple benchmarks at once +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge \ + --num_fewshot 5 \ + --batch_size 8 \ + --output_path results/llama2-7b-eval.json +``` + +**Step 4: Analyze results** + +Results saved to `results/llama2-7b-eval.json`: + +```json +{ + "results": { + "mmlu": { + "acc": 0.459, + "acc_stderr": 0.004 + }, + "gsm8k": { + "exact_match": 0.142, + "exact_match_stderr": 0.006 + }, + "hellaswag": { + "acc_norm": 0.765, + "acc_norm_stderr": 0.004 + } + }, + "config": { + "model": "hf", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf", + "num_fewshot": 5 + } +} +``` + +### Workflow 2: Track training progress + +Evaluate checkpoints during training. + +``` +Training Progress Tracking: +- [ ] Step 1: Set up periodic evaluation +- [ ] Step 2: Choose quick benchmarks +- [ ] Step 3: Automate evaluation +- [ ] Step 4: Plot learning curves +``` + +**Step 1: Set up periodic evaluation** + +Evaluate every N training steps: + +```bash +#!/bin/bash +# eval_checkpoint.sh + +CHECKPOINT_DIR=$1 +STEP=$2 + +lm_eval --model hf \ + --model_args pretrained=$CHECKPOINT_DIR/checkpoint-$STEP \ + --tasks gsm8k,hellaswag \ + --num_fewshot 0 \ # 0-shot for speed + --batch_size 16 \ + --output_path results/step-$STEP.json +``` + +**Step 2: Choose quick benchmarks** + +Fast benchmarks for frequent evaluation: +- **HellaSwag**: ~10 minutes on 1 GPU +- **GSM8K**: ~5 minutes +- **PIQA**: ~2 minutes + +Avoid for frequent eval (too slow): +- **MMLU**: ~2 hours (57 subjects) +- **HumanEval**: Requires code execution + +**Step 3: Automate evaluation** + +Integrate with training script: + +```python +# In training loop +if step % eval_interval == 0: + model.save_pretrained(f"checkpoints/step-{step}") + + # Run evaluation + os.system(f"./eval_checkpoint.sh checkpoints step-{step}") +``` + +Or use PyTorch Lightning callbacks: + +```python +from pytorch_lightning import Callback + +class EvalHarnessCallback(Callback): + def on_validation_epoch_end(self, trainer, pl_module): + step = trainer.global_step + checkpoint_path = f"checkpoints/step-{step}" + + # Save checkpoint + trainer.save_checkpoint(checkpoint_path) + + # Run lm-eval + os.system(f"lm_eval --model hf --model_args pretrained={checkpoint_path} ...") +``` + +**Step 4: Plot learning curves** + +```python +import json +import matplotlib.pyplot as plt + +# Load all results +steps = [] +mmlu_scores = [] + +for file in sorted(glob.glob("results/step-*.json")): + with open(file) as f: + data = json.load(f) + step = int(file.split("-")[1].split(".")[0]) + steps.append(step) + mmlu_scores.append(data["results"]["mmlu"]["acc"]) + +# Plot +plt.plot(steps, mmlu_scores) +plt.xlabel("Training Step") +plt.ylabel("MMLU Accuracy") +plt.title("Training Progress") +plt.savefig("training_curve.png") +``` + +### Workflow 3: Compare multiple models + +Benchmark suite for model comparison. + +``` +Model Comparison: +- [ ] Step 1: Define model list +- [ ] Step 2: Run evaluations +- [ ] Step 3: Generate comparison table +``` + +**Step 1: Define model list** + +```bash +# models.txt +meta-llama/Llama-2-7b-hf +meta-llama/Llama-2-13b-hf +mistralai/Mistral-7B-v0.1 +microsoft/phi-2 +``` + +**Step 2: Run evaluations** + +```bash +#!/bin/bash +# eval_all_models.sh + +TASKS="mmlu,gsm8k,hellaswag,truthfulqa" + +while read model; do + echo "Evaluating $model" + + # Extract model name for output file + model_name=$(echo $model | sed 's/\//-/g') + + lm_eval --model hf \ + --model_args pretrained=$model,dtype=bfloat16 \ + --tasks $TASKS \ + --num_fewshot 5 \ + --batch_size auto \ + --output_path results/$model_name.json + +done < models.txt +``` + +**Step 3: Generate comparison table** + +```python +import json +import pandas as pd + +models = [ + "meta-llama-Llama-2-7b-hf", + "meta-llama-Llama-2-13b-hf", + "mistralai-Mistral-7B-v0.1", + "microsoft-phi-2" +] + +tasks = ["mmlu", "gsm8k", "hellaswag", "truthfulqa"] + +results = [] +for model in models: + with open(f"results/{model}.json") as f: + data = json.load(f) + row = {"Model": model.replace("-", "/")} + for task in tasks: + # Get primary metric for each task + metrics = data["results"][task] + if "acc" in metrics: + row[task.upper()] = f"{metrics['acc']:.3f}" + elif "exact_match" in metrics: + row[task.upper()] = f"{metrics['exact_match']:.3f}" + results.append(row) + +df = pd.DataFrame(results) +print(df.to_markdown(index=False)) +``` + +Output: +``` +| Model | MMLU | GSM8K | HELLASWAG | TRUTHFULQA | +|------------------------|-------|-------|-----------|------------| +| meta-llama/Llama-2-7b | 0.459 | 0.142 | 0.765 | 0.391 | +| meta-llama/Llama-2-13b | 0.549 | 0.287 | 0.801 | 0.430 | +| mistralai/Mistral-7B | 0.626 | 0.395 | 0.812 | 0.428 | +| microsoft/phi-2 | 0.560 | 0.613 | 0.682 | 0.447 | +``` + +### Workflow 4: Evaluate with vLLM (faster inference) + +Use vLLM backend for 5-10x faster evaluation. + +``` +vLLM Evaluation: +- [ ] Step 1: Install vLLM +- [ ] Step 2: Configure vLLM backend +- [ ] Step 3: Run evaluation +``` + +**Step 1: Install vLLM** + +```bash +pip install vllm +``` + +**Step 2: Configure vLLM backend** + +```bash +lm_eval --model vllm \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8 \ + --tasks mmlu \ + --batch_size auto +``` + +**Step 3: Run evaluation** + +vLLM is 5-10× faster than standard HuggingFace: + +```bash +# Standard HF: ~2 hours for MMLU on 7B model +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu \ + --batch_size 8 + +# vLLM: ~15-20 minutes for MMLU on 7B model +lm_eval --model vllm \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=2 \ + --tasks mmlu \ + --batch_size auto +``` + +## When to use vs alternatives + +**Use lm-evaluation-harness when:** +- Benchmarking models for academic papers +- Comparing model quality across standard tasks +- Tracking training progress +- Reporting standardized metrics (everyone uses same prompts) +- Need reproducible evaluation + +**Use alternatives instead:** +- **HELM** (Stanford): Broader evaluation (fairness, efficiency, calibration) +- **AlpacaEval**: Instruction-following evaluation with LLM judges +- **MT-Bench**: Conversational multi-turn evaluation +- **Custom scripts**: Domain-specific evaluation + +## Common issues + +**Issue: Evaluation too slow** + +Use vLLM backend: +```bash +lm_eval --model vllm \ + --model_args pretrained=model-name,tensor_parallel_size=2 +``` + +Or reduce fewshot examples: +```bash +--num_fewshot 0 # Instead of 5 +``` + +Or evaluate subset of MMLU: +```bash +--tasks mmlu_stem # Only STEM subjects +``` + +**Issue: Out of memory** + +Reduce batch size: +```bash +--batch_size 1 # Or --batch_size auto +``` + +Use quantization: +```bash +--model_args pretrained=model-name,load_in_8bit=True +``` + +Enable CPU offloading: +```bash +--model_args pretrained=model-name,device_map=auto,offload_folder=offload +``` + +**Issue: Different results than reported** + +Check fewshot count: +```bash +--num_fewshot 5 # Most papers use 5-shot +``` + +Check exact task name: +```bash +--tasks mmlu # Not mmlu_direct or mmlu_fewshot +``` + +Verify model and tokenizer match: +```bash +--model_args pretrained=model-name,tokenizer=same-model-name +``` + +**Issue: HumanEval not executing code** + +Install execution dependencies: +```bash +pip install human-eval +``` + +Enable code execution: +```bash +lm_eval --model hf \ + --model_args pretrained=model-name \ + --tasks humaneval \ + --allow_code_execution # Required for HumanEval +``` + +## Advanced topics + +**Benchmark descriptions**: See [references/benchmark-guide.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md) for detailed description of all 60+ tasks, what they measure, and interpretation. + +**Custom tasks**: See [references/custom-tasks.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md) for creating domain-specific evaluation tasks. + +**API evaluation**: See [references/api-evaluation.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md) for evaluating OpenAI, Anthropic, and other API models. + +**Multi-GPU strategies**: See [references/distributed-eval.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md) for data parallel and tensor parallel evaluation. + +## Hardware requirements + +- **GPU**: NVIDIA (CUDA 11.8+), works on CPU (very slow) +- **VRAM**: + - 7B model: 16GB (bf16) or 8GB (8-bit) + - 13B model: 28GB (bf16) or 14GB (8-bit) + - 70B model: Requires multi-GPU or quantization +- **Time** (7B model, single A100): + - HellaSwag: 10 minutes + - GSM8K: 5 minutes + - MMLU (full): 2 hours + - HumanEval: 20 minutes + +## Resources + +- GitHub: https://github.com/EleutherAI/lm-evaluation-harness +- Docs: https://github.com/EleutherAI/lm-evaluation-harness/tree/main/docs +- Task library: 60+ tasks including MMLU, GSM8K, HumanEval, TruthfulQA, HellaSwag, ARC, WinoGrande, etc. +- Leaderboard: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard (uses this harness) diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md new file mode 100644 index 000000000..db8c4d4d7 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md @@ -0,0 +1,608 @@ +--- +title: "Weights And Biases" +sidebar_label: "Weights And Biases" +description: "Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - coll..." +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Weights And Biases + +Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - collaborative MLOps platform + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/evaluation/weights-and-biases` | +| Version | `1.0.0` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `wandb` | +| Tags | `MLOps`, `Weights And Biases`, `WandB`, `Experiment Tracking`, `Hyperparameter Tuning`, `Model Registry`, `Collaboration`, `Real-Time Visualization`, `PyTorch`, `TensorFlow`, `HuggingFace` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Weights & Biases: ML Experiment Tracking & MLOps + +## When to Use This Skill + +Use Weights & Biases (W&B) when you need to: +- **Track ML experiments** with automatic metric logging +- **Visualize training** in real-time dashboards +- **Compare runs** across hyperparameters and configurations +- **Optimize hyperparameters** with automated sweeps +- **Manage model registry** with versioning and lineage +- **Collaborate on ML projects** with team workspaces +- **Track artifacts** (datasets, models, code) with lineage + +**Users**: 200,000+ ML practitioners | **GitHub Stars**: 10.5k+ | **Integrations**: 100+ + +## Installation + +```bash +# Install W&B +pip install wandb + +# Login (creates API key) +wandb login + +# Or set API key programmatically +export WANDB_API_KEY=your_api_key_here +``` + +## Quick Start + +### Basic Experiment Tracking + +```python +import wandb + +# Initialize a run +run = wandb.init( + project="my-project", + config={ + "learning_rate": 0.001, + "epochs": 10, + "batch_size": 32, + "architecture": "ResNet50" + } +) + +# Training loop +for epoch in range(run.config.epochs): + # Your training code + train_loss = train_epoch() + val_loss = validate() + + # Log metrics + wandb.log({ + "epoch": epoch, + "train/loss": train_loss, + "val/loss": val_loss, + "train/accuracy": train_acc, + "val/accuracy": val_acc + }) + +# Finish the run +wandb.finish() +``` + +### With PyTorch + +```python +import torch +import wandb + +# Initialize +wandb.init(project="pytorch-demo", config={ + "lr": 0.001, + "epochs": 10 +}) + +# Access config +config = wandb.config + +# Training loop +for epoch in range(config.epochs): + for batch_idx, (data, target) in enumerate(train_loader): + # Forward pass + output = model(data) + loss = criterion(output, target) + + # Backward pass + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # Log every 100 batches + if batch_idx % 100 == 0: + wandb.log({ + "loss": loss.item(), + "epoch": epoch, + "batch": batch_idx + }) + +# Save model +torch.save(model.state_dict(), "model.pth") +wandb.save("model.pth") # Upload to W&B + +wandb.finish() +``` + +## Core Concepts + +### 1. Projects and Runs + +**Project**: Collection of related experiments +**Run**: Single execution of your training script + +```python +# Create/use project +run = wandb.init( + project="image-classification", + name="resnet50-experiment-1", # Optional run name + tags=["baseline", "resnet"], # Organize with tags + notes="First baseline run" # Add notes +) + +# Each run has unique ID +print(f"Run ID: {run.id}") +print(f"Run URL: {run.url}") +``` + +### 2. Configuration Tracking + +Track hyperparameters automatically: + +```python +config = { + # Model architecture + "model": "ResNet50", + "pretrained": True, + + # Training params + "learning_rate": 0.001, + "batch_size": 32, + "epochs": 50, + "optimizer": "Adam", + + # Data params + "dataset": "ImageNet", + "augmentation": "standard" +} + +wandb.init(project="my-project", config=config) + +# Access config during training +lr = wandb.config.learning_rate +batch_size = wandb.config.batch_size +``` + +### 3. Metric Logging + +```python +# Log scalars +wandb.log({"loss": 0.5, "accuracy": 0.92}) + +# Log multiple metrics +wandb.log({ + "train/loss": train_loss, + "train/accuracy": train_acc, + "val/loss": val_loss, + "val/accuracy": val_acc, + "learning_rate": current_lr, + "epoch": epoch +}) + +# Log with custom x-axis +wandb.log({"loss": loss}, step=global_step) + +# Log media (images, audio, video) +wandb.log({"examples": [wandb.Image(img) for img in images]}) + +# Log histograms +wandb.log({"gradients": wandb.Histogram(gradients)}) + +# Log tables +table = wandb.Table(columns=["id", "prediction", "ground_truth"]) +wandb.log({"predictions": table}) +``` + +### 4. Model Checkpointing + +```python +import torch +import wandb + +# Save model checkpoint +checkpoint = { + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': loss, +} + +torch.save(checkpoint, 'checkpoint.pth') + +# Upload to W&B +wandb.save('checkpoint.pth') + +# Or use Artifacts (recommended) +artifact = wandb.Artifact('model', type='model') +artifact.add_file('checkpoint.pth') +wandb.log_artifact(artifact) +``` + +## Hyperparameter Sweeps + +Automatically search for optimal hyperparameters. + +### Define Sweep Configuration + +```python +sweep_config = { + 'method': 'bayes', # or 'grid', 'random' + 'metric': { + 'name': 'val/accuracy', + 'goal': 'maximize' + }, + 'parameters': { + 'learning_rate': { + 'distribution': 'log_uniform', + 'min': 1e-5, + 'max': 1e-1 + }, + 'batch_size': { + 'values': [16, 32, 64, 128] + }, + 'optimizer': { + 'values': ['adam', 'sgd', 'rmsprop'] + }, + 'dropout': { + 'distribution': 'uniform', + 'min': 0.1, + 'max': 0.5 + } + } +} + +# Initialize sweep +sweep_id = wandb.sweep(sweep_config, project="my-project") +``` + +### Define Training Function + +```python +def train(): + # Initialize run + run = wandb.init() + + # Access sweep parameters + lr = wandb.config.learning_rate + batch_size = wandb.config.batch_size + optimizer_name = wandb.config.optimizer + + # Build model with sweep config + model = build_model(wandb.config) + optimizer = get_optimizer(optimizer_name, lr) + + # Training loop + for epoch in range(NUM_EPOCHS): + train_loss = train_epoch(model, optimizer, batch_size) + val_acc = validate(model) + + # Log metrics + wandb.log({ + "train/loss": train_loss, + "val/accuracy": val_acc + }) + +# Run sweep +wandb.agent(sweep_id, function=train, count=50) # Run 50 trials +``` + +### Sweep Strategies + +```python +# Grid search - exhaustive +sweep_config = { + 'method': 'grid', + 'parameters': { + 'lr': {'values': [0.001, 0.01, 0.1]}, + 'batch_size': {'values': [16, 32, 64]} + } +} + +# Random search +sweep_config = { + 'method': 'random', + 'parameters': { + 'lr': {'distribution': 'uniform', 'min': 0.0001, 'max': 0.1}, + 'dropout': {'distribution': 'uniform', 'min': 0.1, 'max': 0.5} + } +} + +# Bayesian optimization (recommended) +sweep_config = { + 'method': 'bayes', + 'metric': {'name': 'val/loss', 'goal': 'minimize'}, + 'parameters': { + 'lr': {'distribution': 'log_uniform', 'min': 1e-5, 'max': 1e-1} + } +} +``` + +## Artifacts + +Track datasets, models, and other files with lineage. + +### Log Artifacts + +```python +# Create artifact +artifact = wandb.Artifact( + name='training-dataset', + type='dataset', + description='ImageNet training split', + metadata={'size': '1.2M images', 'split': 'train'} +) + +# Add files +artifact.add_file('data/train.csv') +artifact.add_dir('data/images/') + +# Log artifact +wandb.log_artifact(artifact) +``` + +### Use Artifacts + +```python +# Download and use artifact +run = wandb.init(project="my-project") + +# Download artifact +artifact = run.use_artifact('training-dataset:latest') +artifact_dir = artifact.download() + +# Use the data +data = load_data(f"{artifact_dir}/train.csv") +``` + +### Model Registry + +```python +# Log model as artifact +model_artifact = wandb.Artifact( + name='resnet50-model', + type='model', + metadata={'architecture': 'ResNet50', 'accuracy': 0.95} +) + +model_artifact.add_file('model.pth') +wandb.log_artifact(model_artifact, aliases=['best', 'production']) + +# Link to model registry +run.link_artifact(model_artifact, 'model-registry/production-models') +``` + +## Integration Examples + +### HuggingFace Transformers + +```python +from transformers import Trainer, TrainingArguments +import wandb + +# Initialize W&B +wandb.init(project="hf-transformers") + +# Training arguments with W&B +training_args = TrainingArguments( + output_dir="./results", + report_to="wandb", # Enable W&B logging + run_name="bert-finetuning", + logging_steps=100, + save_steps=500 +) + +# Trainer automatically logs to W&B +trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset +) + +trainer.train() +``` + +### PyTorch Lightning + +```python +from pytorch_lightning import Trainer +from pytorch_lightning.loggers import WandbLogger +import wandb + +# Create W&B logger +wandb_logger = WandbLogger( + project="lightning-demo", + log_model=True # Log model checkpoints +) + +# Use with Trainer +trainer = Trainer( + logger=wandb_logger, + max_epochs=10 +) + +trainer.fit(model, datamodule=dm) +``` + +### Keras/TensorFlow + +```python +import wandb +from wandb.keras import WandbCallback + +# Initialize +wandb.init(project="keras-demo") + +# Add callback +model.fit( + x_train, y_train, + validation_data=(x_val, y_val), + epochs=10, + callbacks=[WandbCallback()] # Auto-logs metrics +) +``` + +## Visualization & Analysis + +### Custom Charts + +```python +# Log custom visualizations +import matplotlib.pyplot as plt + +fig, ax = plt.subplots() +ax.plot(x, y) +wandb.log({"custom_plot": wandb.Image(fig)}) + +# Log confusion matrix +wandb.log({"conf_mat": wandb.plot.confusion_matrix( + probs=None, + y_true=ground_truth, + preds=predictions, + class_names=class_names +)}) +``` + +### Reports + +Create shareable reports in W&B UI: +- Combine runs, charts, and text +- Markdown support +- Embeddable visualizations +- Team collaboration + +## Best Practices + +### 1. Organize with Tags and Groups + +```python +wandb.init( + project="my-project", + tags=["baseline", "resnet50", "imagenet"], + group="resnet-experiments", # Group related runs + job_type="train" # Type of job +) +``` + +### 2. Log Everything Relevant + +```python +# Log system metrics +wandb.log({ + "gpu/util": gpu_utilization, + "gpu/memory": gpu_memory_used, + "cpu/util": cpu_utilization +}) + +# Log code version +wandb.log({"git_commit": git_commit_hash}) + +# Log data splits +wandb.log({ + "data/train_size": len(train_dataset), + "data/val_size": len(val_dataset) +}) +``` + +### 3. Use Descriptive Names + +```python +# ✅ Good: Descriptive run names +wandb.init( + project="nlp-classification", + name="bert-base-lr0.001-bs32-epoch10" +) + +# ❌ Bad: Generic names +wandb.init(project="nlp", name="run1") +``` + +### 4. Save Important Artifacts + +```python +# Save final model +artifact = wandb.Artifact('final-model', type='model') +artifact.add_file('model.pth') +wandb.log_artifact(artifact) + +# Save predictions for analysis +predictions_table = wandb.Table( + columns=["id", "input", "prediction", "ground_truth"], + data=predictions_data +) +wandb.log({"predictions": predictions_table}) +``` + +### 5. Use Offline Mode for Unstable Connections + +```python +import os + +# Enable offline mode +os.environ["WANDB_MODE"] = "offline" + +wandb.init(project="my-project") +# ... your code ... + +# Sync later +# wandb sync +``` + +## Team Collaboration + +### Share Runs + +```python +# Runs are automatically shareable via URL +run = wandb.init(project="team-project") +print(f"Share this URL: {run.url}") +``` + +### Team Projects + +- Create team account at wandb.ai +- Add team members +- Set project visibility (private/public) +- Use team-level artifacts and model registry + +## Pricing + +- **Free**: Unlimited public projects, 100GB storage +- **Academic**: Free for students/researchers +- **Teams**: $50/seat/month, private projects, unlimited storage +- **Enterprise**: Custom pricing, on-prem options + +## Resources + +- **Documentation**: https://docs.wandb.ai +- **GitHub**: https://github.com/wandb/wandb (10.5k+ stars) +- **Examples**: https://github.com/wandb/examples +- **Community**: https://wandb.ai/community +- **Discord**: https://wandb.me/discord + +## See Also + +- `references/sweeps.md` - Comprehensive hyperparameter optimization guide +- `references/artifacts.md` - Data and model versioning patterns +- `references/integrations.md` - Framework-specific examples diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md b/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md new file mode 100644 index 000000000..27ab41b5e --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md @@ -0,0 +1,99 @@ +--- +title: "Huggingface Hub" +sidebar_label: "Huggingface Hub" +description: "Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Space..." +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Huggingface Hub + +Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/huggingface-hub` | +| Version | `1.0.0` | +| Author | Hugging Face | +| License | MIT | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Hugging Face CLI (`hf`) Reference Guide + +The `hf` command is the modern command-line interface for interacting with the Hugging Face Hub, providing tools to manage repositories, models, datasets, and Spaces. + +> **IMPORTANT:** The `hf` command replaces the now deprecated `huggingface-cli` command. + +## Quick Start +* **Installation:** `curl -LsSf https://hf.co/cli/install.sh | bash -s` +* **Help:** Use `hf --help` to view all available functions and real-world examples. +* **Authentication:** Recommended via `HF_TOKEN` environment variable or the `--token` flag. + +--- + +## Core Commands + +### General Operations +* `hf download REPO_ID`: Download files from the Hub. +* `hf upload REPO_ID`: Upload files/folders (recommended for single-commit). +* `hf upload-large-folder REPO_ID LOCAL_PATH`: Recommended for resumable uploads of large directories. +* `hf sync`: Sync files between a local directory and a bucket. +* `hf env` / `hf version`: View environment and version details. + +### Authentication (`hf auth`) +* `login` / `logout`: Manage sessions using tokens from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). +* `list` / `switch`: Manage and toggle between multiple stored access tokens. +* `whoami`: Identify the currently logged-in account. + +### Repository Management (`hf repos`) +* `create` / `delete`: Create or permanently remove repositories. +* `duplicate`: Clone a model, dataset, or Space to a new ID. +* `move`: Transfer a repository between namespaces. +* `branch` / `tag`: Manage Git-like references. +* `delete-files`: Remove specific files using patterns. + +--- + +## Specialized Hub Interactions + +### Datasets & Models +* **Datasets:** `hf datasets list`, `info`, and `parquet` (list parquet URLs). +* **SQL Queries:** `hf datasets sql SQL` — Execute raw SQL via DuckDB against dataset parquet URLs. +* **Models:** `hf models list` and `info`. +* **Papers:** `hf papers list` — View daily papers. + +### Discussions & Pull Requests (`hf discussions`) +* Manage the lifecycle of Hub contributions: `list`, `create`, `info`, `comment`, `close`, `reopen`, and `rename`. +* `diff`: View changes in a PR. +* `merge`: Finalize pull requests. + +### Infrastructure & Compute +* **Endpoints:** Deploy and manage Inference Endpoints (`deploy`, `pause`, `resume`, `scale-to-zero`, `catalog`). +* **Jobs:** Run compute tasks on HF infrastructure. Includes `hf jobs uv` for running Python scripts with inline dependencies and `stats` for resource monitoring. +* **Spaces:** Manage interactive apps. Includes `dev-mode` and `hot-reload` for Python files without full restarts. + +### Storage & Automation +* **Buckets:** Full S3-like bucket management (`create`, `cp`, `mv`, `rm`, `sync`). +* **Cache:** Manage local storage with `list`, `prune` (remove detached revisions), and `verify` (checksum checks). +* **Webhooks:** Automate workflows by managing Hub webhooks (`create`, `watch`, `enable`/`disable`). +* **Collections:** Organize Hub items into collections (`add-item`, `update`, `list`). + +--- + +## Advanced Usage & Tips + +### Global Flags +* `--format json`: Produces machine-readable output for automation. +* `-q` / `--quiet`: Limits output to IDs only. + +### Extensions & Skills +* **Extensions:** Extend CLI functionality via GitHub repositories using `hf extensions install REPO_ID`. +* **Skills:** Manage AI assistant skills with `hf skills add`. diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md new file mode 100644 index 000000000..19f08067f --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md @@ -0,0 +1,266 @@ +--- +title: "Llama Cpp — llama" +sidebar_label: "Llama Cpp" +description: "llama" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Llama Cpp + +llama.cpp local GGUF inference + HF Hub model discovery. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/inference/llama-cpp` | +| Version | `2.1.2` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `llama-cpp-python>=0.2.0` | +| Tags | `llama.cpp`, `GGUF`, `Quantization`, `Hugging Face Hub`, `CPU Inference`, `Apple Silicon`, `Edge Deployment`, `AMD GPUs`, `Intel GPUs`, `NVIDIA`, `URL-first` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# llama.cpp + GGUF + +Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp. + +## When to use + +- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs +- Find the right GGUF for a specific Hugging Face repo +- Build a `llama-server` or `llama-cli` command from the Hub +- Search the Hub for models that already support llama.cpp +- Enumerate available `.gguf` files and sizes for a repo +- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM + +## Model Discovery workflow + +Prefer URL workflows before asking for `hf`, Python, or custom scripts. + +1. Search for candidate repos on the Hub: + - Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending` + - Add `search=` for a model family + - Add `num_parameters=min:0,max:24B` or similar when the user has size constraints +2. Open the repo with the llama.cpp local-app view: + - `https://huggingface.co/?local-app=llama.cpp` +3. Treat the local-app snippet as the source of truth when it is visible: + - copy the exact `llama-server` or `llama-cli` command + - report the recommended quant exactly as HF shows it +4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`: + - prefer its exact quant labels and sizes over generic tables + - keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL` + - if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance +5. Query the tree API to confirm what actually exists: + - `https://huggingface.co/api/models//tree/main?recursive=true` + - keep entries where `type` is `file` and `path` ends with `.gguf` + - use `path` and `size` as the source of truth for filenames and byte sizes + - separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files + - use `https://huggingface.co//tree/main` only as a human fallback +6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant: + - shorthand quant selection: `llama-server -hf :` + - exact-file fallback: `llama-server --hf-repo --hf-file ` +7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files. + +## Quick start + +### Install llama.cpp + +```bash +# macOS / Linux (simplest) +brew install llama.cpp +``` + +```bash +winget install llama.cpp +``` + +```bash +git clone https://github.com/ggml-org/llama.cpp +cd llama.cpp +cmake -B build +cmake --build build --config Release +``` + +### Run directly from the Hugging Face Hub + +```bash +llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +``` + +```bash +llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +``` + +### Run an exact GGUF file from the Hub + +Use this when the tree API shows custom file naming or the exact HF snippet is missing. + +```bash +llama-server \ + --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \ + --hf-file Phi-3-mini-4k-instruct-q4.gguf \ + -c 4096 +``` + +### OpenAI-compatible server check + +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "Write a limerick about Python exceptions"} + ] + }' +``` + +## Python bindings (llama-cpp-python) + +`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`). + +### Basic generation + +```python +from llama_cpp import Llama + +llm = Llama( + model_path="./model-q4_k_m.gguf", + n_ctx=4096, + n_gpu_layers=35, # 0 for CPU, 99 to offload everything + n_threads=8, +) + +out = llm("What is machine learning?", max_tokens=256, temperature=0.7) +print(out["choices"][0]["text"]) +``` + +### Chat + streaming + +```python +llm = Llama( + model_path="./model-q4_k_m.gguf", + n_ctx=4096, + n_gpu_layers=35, + chat_format="llama-3", # or "chatml", "mistral", etc. +) + +resp = llm.create_chat_completion( + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is Python?"}, + ], + max_tokens=256, +) +print(resp["choices"][0]["message"]["content"]) + +# Streaming +for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True): + print(chunk["choices"][0]["text"], end="", flush=True) +``` + +### Embeddings + +```python +llm = Llama(model_path="./model-q4_k_m.gguf", embedding=True, n_gpu_layers=35) +vec = llm.embed("This is a test sentence.") +print(f"Embedding dimension: {len(vec)}") +``` + +You can also load a GGUF straight from the Hub: + +```python +llm = Llama.from_pretrained( + repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", + filename="*Q4_K_M.gguf", + n_gpu_layers=35, +) +``` + +## Choosing a quant + +Use the Hub page first, generic heuristics second. + +- Prefer the exact quant that HF marks as compatible for the user's hardware profile. +- For general chat, start with `Q4_K_M`. +- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows. +- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality. +- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file. +- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`. + +## Extracting available GGUFs from a repo + +When the user asks what GGUFs exist, return: + +- filename +- file size +- quant label +- whether it is a main model or an auxiliary projector + +Ignore unless requested: + +- README +- BF16 shard files +- imatrix blobs or calibration artifacts + +Use the tree API for this step: + +- `https://huggingface.co/api/models//tree/main?recursive=true` + +For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename. + +## Search patterns + +Use these URL shapes directly: + +```text +https://huggingface.co/models?apps=llama.cpp&sort=trending +https://huggingface.co/models?search=&apps=llama.cpp&sort=trending +https://huggingface.co/models?search=&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending +https://huggingface.co/?local-app=llama.cpp +https://huggingface.co/api/models//tree/main?recursive=true +https://huggingface.co//tree/main +``` + +## Output format + +When answering discovery requests, prefer a compact structured result like: + +```text +Repo: +Recommended quant from HF: