diff --git a/.env.example b/.env.example index 98b7aae9326..f66b358b888 100644 --- a/.env.example +++ b/.env.example @@ -425,3 +425,24 @@ IMAGE_TOOLS_DEBUG=false # TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery # TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel # TEAMS_PORT=3978 # Webhook listen port (Bot Framework default) + +# ============================================================================= +# GOOGLE CHAT INTEGRATION +# ============================================================================= +# Connects via Cloud Pub/Sub pull subscription (no public URL required). +# Setup walkthrough: website/docs/user-guide/messaging/google_chat.md. +# 1. Create a GCP project, enable the Google Chat API and Cloud Pub/Sub. +# 2. Create a Service Account with roles/pubsub.subscriber on the +# subscription (NOT project-wide); download the JSON key. +# 3. Configure your Chat app at console.cloud.google.com/apis/credentials +# → Google Chat API → Configuration → Cloud Pub/Sub topic. +# 4. (Optional, for native attachment delivery) Each user runs +# `/setup-files` once in their own DM after Pub/Sub is wired up. +# +# GOOGLE_CHAT_PROJECT_ID= # GCP project hosting the topic (or set GOOGLE_CLOUD_PROJECT) +# GOOGLE_CHAT_SUBSCRIPTION_NAME= # Full path: projects//subscriptions/ +# GOOGLE_CHAT_SERVICE_ACCOUNT_JSON= # Path to SA JSON (or set GOOGLE_APPLICATION_CREDENTIALS) +# GOOGLE_CHAT_ALLOWED_USERS= # Comma-separated emails allowed to talk to the bot +# GOOGLE_CHAT_ALLOW_ALL_USERS=false # Set true to skip the allowlist +# GOOGLE_CHAT_HOME_CHANNEL= # Default space (spaces/XXXX) for cron delivery +# GOOGLE_CHAT_HOME_CHANNEL_NAME= # Display name for the home channel diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 228ee339646..b643ae12fcc 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -16,9 +16,13 @@ on: permissions: contents: read +# Top-level concurrency: do NOT cancel in-flight builds when a new push lands. +# Every commit deserves its own SHA-tagged image in the registry, and we guard +# the :latest tag in a separate job below (with its own concurrency group) so +# a slow run can't clobber :latest with older bits. concurrency: group: docker-${{ github.ref }} - cancel-in-progress: true + cancel-in-progress: false jobs: build-and-push: @@ -26,11 +30,18 @@ jobs: if: github.repository == 'NousResearch/hermes-agent' runs-on: ubuntu-latest timeout-minutes: 60 + outputs: + pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }} steps: - name: Checkout code uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: submodules: recursive + # Fetch enough history to run `git merge-base --is-ancestor` in the + # move-latest job. That job reuses this checkout via its own + # actions/checkout call, but commits reachable from main up to ~1000 + # back are plenty for any realistic race window. + fetch-depth: 1000 - name: Set up QEMU uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 @@ -54,19 +65,31 @@ jobs: - name: Test image starts run: | + mkdir -p /tmp/hermes-test + sudo chown -R 10000:10000 /tmp/hermes-test # The image runs as the hermes user (UID 10000). GitHub Actions # creates /tmp/hermes-test root-owned by default, which hermes # can't write to — chown it to match the in-container UID before # bind-mounting. Real users doing `docker run -v ~/.hermes:...` # with their own UID hit the same issue and have their own # remediations (HERMES_UID env var, or chown locally). - mkdir -p /tmp/hermes-test - sudo chown -R 10000:10000 /tmp/hermes-test docker run --rm \ -v /tmp/hermes-test:/opt/data \ --entrypoint /opt/hermes/docker/entrypoint.sh \ nousresearch/hermes-agent:test --help + - name: Test dashboard subcommand + run: | + mkdir -p /tmp/hermes-test + sudo chown -R 10000:10000 /tmp/hermes-test + # Verify the dashboard subcommand is included in the Docker image. + # This prevents regressions like #9153 where the dashboard command + # was present in source but missing from the published image. + docker run --rm \ + -v /tmp/hermes-test:/opt/data \ + --entrypoint /opt/hermes/docker/entrypoint.sh \ + nousresearch/hermes-agent:test dashboard --help + - name: Log in to Docker Hub if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release' uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 @@ -74,7 +97,12 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Push multi-arch image (main branch) + # Always push a per-commit SHA tag on main. This is race-free because + # every commit has a unique SHA — concurrent runs can't clobber each + # other here. We also embed the git SHA as an OCI label so the + # move-latest job (below) can read it back off the registry's `:latest`. + - name: Push multi-arch image with SHA tag (main branch) + id: push_sha if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 with: @@ -82,10 +110,17 @@ jobs: file: Dockerfile push: true platforms: linux/amd64,linux/arm64 - tags: nousresearch/hermes-agent:latest + tags: nousresearch/hermes-agent:sha-${{ github.sha }} + labels: | + org.opencontainers.image.revision=${{ github.sha }} cache-from: type=gha cache-to: type=gha,mode=max + - name: Mark SHA tag pushed + id: mark_pushed + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: echo "pushed=true" >> "$GITHUB_OUTPUT" + - name: Push multi-arch image (release) if: github.event_name == 'release' uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 @@ -97,3 +132,119 @@ jobs: tags: nousresearch/hermes-agent:${{ github.event.release.tag_name }} cache-from: type=gha cache-to: type=gha,mode=max + + # Second job: moves `:latest` to point at the SHA tag the first job pushed. + # + # Has its own concurrency group with `cancel-in-progress: true`, which + # gives us the serialization we need: if a newer push arrives while an + # older run is mid-way through this job, the older run is cancelled + # before it can clobber `:latest`. Combined with the ancestor check + # below, this means `:latest` only ever moves forward in git history. + move-latest: + if: | + github.repository == 'NousResearch/hermes-agent' + && github.event_name == 'push' + && github.ref == 'refs/heads/main' + && needs.build-and-push.outputs.pushed_sha_tag == 'true' + needs: build-and-push + runs-on: ubuntu-latest + timeout-minutes: 10 + concurrency: + group: docker-move-latest-${{ github.ref }} + cancel-in-progress: true + steps: + - name: Checkout code + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 1000 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 + + - name: Log in to Docker Hub + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + # Read the git revision label off the current `:latest` manifest, then + # use `git merge-base --is-ancestor` to check whether our commit is a + # descendant of it. If `:latest` doesn't exist yet, or its label is + # missing, we treat that as "safe to publish". If another run already + # advanced `:latest` past us (or diverged), we skip and leave it alone. + - name: Decide whether to move :latest + id: latest_check + run: | + set -euo pipefail + image=nousresearch/hermes-agent + + # Pull the JSON for the linux/amd64 sub-manifest's config and extract + # the OCI revision label with jq — Go template field access can't + # handle dots in map keys, so using json+jq is the robust route. + image_json=$( + docker buildx imagetools inspect "${image}:latest" \ + --format '{{ json (index .Image "linux/amd64") }}' \ + 2>/dev/null || true + ) + + if [ -z "${image_json}" ]; then + echo "No existing :latest (or inspect failed) — safe to publish." + echo "push_latest=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + current_sha=$( + printf '%s' "${image_json}" \ + | jq -r '.config.Labels."org.opencontainers.image.revision" // ""' + ) + + if [ -z "${current_sha}" ]; then + echo "Registry :latest has no revision label — safe to publish." + echo "push_latest=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "Registry :latest is at ${current_sha}" + echo "This run is at ${GITHUB_SHA}" + + if [ "${current_sha}" = "${GITHUB_SHA}" ]; then + echo ":latest already points at our SHA — nothing to do." + echo "push_latest=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Make sure we have the :latest commit locally for merge-base. + if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then + git fetch --no-tags --prune origin \ + "+refs/heads/main:refs/remotes/origin/main" \ + || true + fi + + if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then + echo "Registry :latest points at an unknown commit (${current_sha}); refusing to overwrite." + echo "push_latest=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Our SHA must be a descendant of the current :latest to be safe. + if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then + echo "Our commit is a descendant of :latest — safe to advance." + echo "push_latest=true" >> "$GITHUB_OUTPUT" + else + echo "Another run advanced :latest past us (or diverged) — leaving it alone." + echo "push_latest=false" >> "$GITHUB_OUTPUT" + fi + + # Retag the already-pushed SHA manifest as :latest. This is a registry- + # side operation — no rebuild, no layer re-push — so it's quick and + # atomic per-tag. The ancestor check above plus the cancel-in-progress + # concurrency on this job together guarantee we only ever move :latest + # forward in git history. + - name: Move :latest to this SHA + if: steps.latest_check.outputs.push_latest == 'true' + run: | + set -euo pipefail + image=nousresearch/hermes-agent + docker buildx imagetools create \ + --tag "${image}:latest" \ + "${image}:sha-${GITHUB_SHA}" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 30d171543bb..78c608c73a7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -106,6 +106,11 @@ hermes chat -q "Hello" ### Run tests ```bash +# Preferred — matches CI (hermetic env, 4 xdist workers); see AGENTS.md +scripts/run_tests.sh + +# Alternative (activate the venv first). The wrapper is still recommended +# for parity with GitHub Actions before you open a PR: pytest tests/ -v ``` @@ -286,16 +291,18 @@ registry.register( ) ``` -Then add the import to `model_tools.py` in the `_modules` list: +**Wire into a toolset (required):** Built-in tools are auto-discovered: any +`tools/*.py` file that contains a top-level `registry.register(...)` call is +imported by `discover_builtin_tools()` in `tools/registry.py` when `model_tools` +loads. There is **no** manual import list in `model_tools.py` to maintain. -```python -_modules = [ - # ... existing modules ... - "tools.my_tool", -] -``` +You must still add the tool name to the appropriate list in `toolsets.py` +(for example `_HERMES_CORE_TOOLS` or a dedicated toolset); otherwise the tool +registers but is never exposed to the agent. If you introduce a new toolset, +add it in `toolsets.py` and wire it into the relevant platform presets. -If it's a new toolset, add it to `toolsets.py` and to the relevant platform presets. +See `AGENTS.md` (section **Adding New Tools**) for profile-aware paths and +plugin vs core guidance. --- @@ -595,7 +602,7 @@ refactor/description # Code restructuring ### Before submitting -1. **Run tests**: `pytest tests/ -v` +1. **Run tests**: `scripts/run_tests.sh` (recommended; same as CI) or `pytest tests/ -v` with the project venv activated 2. **Test manually**: Run `hermes` and exercise the code path you changed 3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature. diff --git a/Dockerfile b/Dockerfile index 08a5b6a2754..6ed111f5b2c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -66,8 +66,14 @@ RUN cd web && npm run build && \ # ---------- Permissions ---------- # Make install dir world-readable so any HERMES_UID can read it at runtime. # The venv needs to be traversable too. +# node_modules trees additionally need to be writable by the hermes user +# so the runtime `npm install` triggered by _tui_need_npm_install() in +# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time +# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally +# not chowned here. USER root -RUN chmod -R a+rX /opt/hermes +RUN chmod -R a+rX /opt/hermes && \ + chown -R hermes:hermes /opt/hermes/ui-tui /opt/hermes/node_modules # Start as root so the entrypoint can usermod/groupmod + gosu. # If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000). diff --git a/README.md b/README.md index 2674cabe77f..00458582619 100644 --- a/README.md +++ b/README.md @@ -155,13 +155,13 @@ Manual path (equivalent to the above): ```bash curl -LsSf https://astral.sh/uv/install.sh | sh -uv venv venv --python 3.11 -source venv/bin/activate +uv venv .venv --python 3.11 +source .venv/bin/activate uv pip install -e ".[all,dev]" scripts/run_tests.sh ``` -> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required. +> **RL Training (optional):** The RL/Atropos integration (`environments/`) — see [`CONTRIBUTING.md`](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#development-setup) for the full setup. --- diff --git a/RELEASE_v0.13.0.md b/RELEASE_v0.13.0.md new file mode 100644 index 00000000000..7efcb7aee02 --- /dev/null +++ b/RELEASE_v0.13.0.md @@ -0,0 +1,641 @@ +# Hermes Agent v0.13.0 (v2026.5.7) + +**Release Date:** May 7, 2026 +**Since v0.12.0:** 864 commits · 588 merged PRs · 829 files changed · 128,366 insertions · 282 issues closed (13 P0, 36 P1) · 295 community contributors (including co-authors) + +> The Tenacity Release — Hermes Agent now finishes what it starts. Kanban ships as a durable multi-agent board (heartbeat, reclaim, zombie detection, auto-block on incomplete exit, per-task retries, hallucination recovery). `/goal` keeps the agent locked on a target across turns (Ralph loop). Checkpoints v2 rewrites state persistence with real pruning. Gateway auto-resumes interrupted sessions after restart. Cron grows a `no_agent` watchdog mode. A security wave closes 8 P0s — redaction is now ON by default, Discord role-allowlists are guild-scoped, WhatsApp rejects strangers by default, and TOCTOU windows close across auth.json and MCP OAuth. Google Chat becomes the 20th platform. Providers become a pluggable surface. Seven i18n locales ship. + +--- + +## ✨ Highlights + +- **Multi-agent Kanban — delegate to an AI team that actually finishes** — Spin up a durable board, drop tasks on it, and let multiple Hermes workers pick them up, hand off, and close them out. Heartbeats, reclaim, zombie detection, retry budgets, and a hallucination gate keep the team honest. One install, many kanbans. ([#17805](https://github.com/NousResearch/hermes-agent/pull/17805), [#19653](https://github.com/NousResearch/hermes-agent/pull/19653), [#20232](https://github.com/NousResearch/hermes-agent/pull/20232), [#20332](https://github.com/NousResearch/hermes-agent/pull/20332), [#21330](https://github.com/NousResearch/hermes-agent/pull/21330), [#21183](https://github.com/NousResearch/hermes-agent/pull/21183), [#21214](https://github.com/NousResearch/hermes-agent/pull/21214)) + +- **`/goal` — the agent doesn't forget what you asked it to do** — Lock the agent onto a target and it stays on task across turns. The Ralph loop as a first-class primitive. ([#18262](https://github.com/NousResearch/hermes-agent/pull/18262), [#18275](https://github.com/NousResearch/hermes-agent/pull/18275), [#21287](https://github.com/NousResearch/hermes-agent/pull/21287)) + +- **Show it a video** — new `video_analyze` tool for native video understanding on Gemini and compatible multimodal models. (@alt-glitch) ([#19301](https://github.com/NousResearch/hermes-agent/pull/19301)) + +- **Clone a voice** — xAI Custom Voices lands as a TTS provider with voice cloning support. (@alt-glitch) ([#18776](https://github.com/NousResearch/hermes-agent/pull/18776)) + +- **Hermes speaks your language** — static gateway + CLI messages translate to 7 locales: Chinese, Japanese, German, Spanish, French, Ukrainian, and Turkish. Docs site gains a Chinese (zh-Hans) locale. ([#20231](https://github.com/NousResearch/hermes-agent/pull/20231), [#20329](https://github.com/NousResearch/hermes-agent/pull/20329), [#20467](https://github.com/NousResearch/hermes-agent/pull/20467), [#20474](https://github.com/NousResearch/hermes-agent/pull/20474), [#20430](https://github.com/NousResearch/hermes-agent/pull/20430), [#20431](https://github.com/NousResearch/hermes-agent/pull/20431)) + +- **Google Chat — the 20th messaging platform** — plus a generic platform-plugin hooks surface so third-party adapters drop in without touching core (IRC and Teams migrated). ([#21306](https://github.com/NousResearch/hermes-agent/pull/21306), [#21331](https://github.com/NousResearch/hermes-agent/pull/21331)) + +- **Sessions survive restarts** — gateway bounces mid-agent, `/update` restarts, source-file reloads — conversations auto-resume when the gateway comes back. ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192)) + +- **Security wave — 8 P0 closures** — redaction ON by default, Discord role-allowlists guild-scoped (CVSS 8.1 cross-guild DM bypass closed), WhatsApp rejects strangers by default, TOCTOU windows closed across `auth.json` and MCP OAuth, browser enforces cloud-metadata SSRF floor, cron prompt-injection scans assembled skill content, `hermes debug share` redacts at upload. ([#21193](https://github.com/NousResearch/hermes-agent/pull/21193), [#21241](https://github.com/NousResearch/hermes-agent/pull/21241), [#21291](https://github.com/NousResearch/hermes-agent/pull/21291), [#21176](https://github.com/NousResearch/hermes-agent/pull/21176), [#21194](https://github.com/NousResearch/hermes-agent/pull/21194), [#21228](https://github.com/NousResearch/hermes-agent/pull/21228), [#21350](https://github.com/NousResearch/hermes-agent/pull/21350), [#19318](https://github.com/NousResearch/hermes-agent/pull/19318)) + +- **Checkpoints v2** — state persistence rewritten. Real pruning, disk guardrails, no more orphan shadow repos. ([#20709](https://github.com/NousResearch/hermes-agent/pull/20709)) + +- **The agent lints its own writes** — post-write delta lint on `write_file` + `patch`. Python, JSON, YAML, TOML. Syntax errors surface immediately instead of shipping downstream. ([#20191](https://github.com/NousResearch/hermes-agent/pull/20191)) + +- **`no_agent` cron mode — script-only watchdog** — cron jobs can now skip the agent entirely and just run a script. Empty stdout is silent, non-empty gets delivered verbatim. ([#19709](https://github.com/NousResearch/hermes-agent/pull/19709)) + +- **Platform allowlists everywhere** — `allowed_channels` / `allowed_chats` / `allowed_rooms` config across Slack, Telegram, Mattermost, Matrix, and DingTalk. ([#21251](https://github.com/NousResearch/hermes-agent/pull/21251)) + +- **Providers are now plugins** — `ProviderProfile` ABC + `plugins/model-providers/`. Drop in third-party providers without touching core. ([#20324](https://github.com/NousResearch/hermes-agent/pull/20324)) + +- **API server — long-term memory per session** — `X-Hermes-Session-Key` header gives memory providers a stable session identifier. ([#20199](https://github.com/NousResearch/hermes-agent/pull/20199)) + +- **MCP levels up** — SSE transport with OAuth forwarding, stale-pipe retries, image results surface as MEDIA tags instead of getting dropped, keepalive on long-lived lifecycle waits. ([#21227](https://github.com/NousResearch/hermes-agent/pull/21227), [#21323](https://github.com/NousResearch/hermes-agent/pull/21323), [#21289](https://github.com/NousResearch/hermes-agent/pull/21289), [#21328](https://github.com/NousResearch/hermes-agent/pull/21328), [#20209](https://github.com/NousResearch/hermes-agent/pull/20209)) + +- **Curator grows subcommands** — `hermes curator archive`, `prune`, `list-archived`. Manual `hermes curator run` is synchronous now — you see results without polling. ([#20200](https://github.com/NousResearch/hermes-agent/pull/20200), [#21236](https://github.com/NousResearch/hermes-agent/pull/21236), [#21216](https://github.com/NousResearch/hermes-agent/pull/21216)) + +- **ACP — `/steer` and `/queue`** — direct the in-flight agent or queue follow-ups from Zed, VS Code, or JetBrains. Plus atomic session persistence and reasoning-metadata preservation across restarts. (@HenkDz) ([#18114](https://github.com/NousResearch/hermes-agent/pull/18114), [#20279](https://github.com/NousResearch/hermes-agent/pull/20279), [#20296](https://github.com/NousResearch/hermes-agent/pull/20296), [#20433](https://github.com/NousResearch/hermes-agent/pull/20433)) + +- **TUI glow-up** — `/model` picker matches `hermes model` with inline auth (@austinpickett), collapsible startup banner sections (@kshitijk4poor), context-compression counter in the status bar. ([#18117](https://github.com/NousResearch/hermes-agent/pull/18117), [#20625](https://github.com/NousResearch/hermes-agent/pull/20625), [#21218](https://github.com/NousResearch/hermes-agent/pull/21218)) + +- **Dashboard grows up** — Plugins page (manage, enable/disable, auth status) (@austinpickett), Profiles management page (@vincez-hms-coder), sortable analytics tables, reverse-proxy support via `X-Forwarded-Prefix`, new `default-large` 18px theme. ([#18095](https://github.com/NousResearch/hermes-agent/pull/18095), [#16419](https://github.com/NousResearch/hermes-agent/pull/16419), [#18192](https://github.com/NousResearch/hermes-agent/pull/18192), [#21296](https://github.com/NousResearch/hermes-agent/pull/21296), [#20820](https://github.com/NousResearch/hermes-agent/pull/20820)) + +- **SearXNG + split web tools** — SearXNG ships as a native search-only backend; web tools now let you pick different backends per capability (search vs extract vs browse). (@kshitijk4poor) ([#20823](https://github.com/NousResearch/hermes-agent/pull/20823), [#20061](https://github.com/NousResearch/hermes-agent/pull/20061), [#20841](https://github.com/NousResearch/hermes-agent/pull/20841)) + +- **OpenRouter response caching** — explicit cache control for models that expose it. (@kshitijk4poor) ([#19132](https://github.com/NousResearch/hermes-agent/pull/19132)) + +- **`[[as_document]]` — skill media-routing directive** — skills can force the gateway to deliver output as a document on platforms that support it. ([#21210](https://github.com/NousResearch/hermes-agent/pull/21210)) + +- **`transform_llm_output` plugin hook** — new lifecycle hook that lets plugins reshape or filter LLM output before it hits the conversation. Useful for context-window reducers and content filters. ([#21235](https://github.com/NousResearch/hermes-agent/pull/21235)) + +- **Nous OAuth persists across profiles** — shared token store: sign in once, every profile inherits the session. ([#19712](https://github.com/NousResearch/hermes-agent/pull/19712)) + +- **QQBot — native approval keyboards** — feature parity with Telegram / Discord approval UX. Chunked upload, quoted attachments. ([#21342](https://github.com/NousResearch/hermes-agent/pull/21342), [#21353](https://github.com/NousResearch/hermes-agent/pull/21353)) + +- **6 new optional skills** — Shopify (Admin + Storefront GraphQL), here.now, shop-app personal shopping assistant, Anthropic financial-services bundle, kanban-video-orchestrator (@SHL0MS), searxng-search (@kshitijk4poor). ([#18116](https://github.com/NousResearch/hermes-agent/pull/18116), [#18170](https://github.com/NousResearch/hermes-agent/pull/18170), [#20702](https://github.com/NousResearch/hermes-agent/pull/20702), [#21180](https://github.com/NousResearch/hermes-agent/pull/21180), [#19281](https://github.com/NousResearch/hermes-agent/pull/19281), [#20841](https://github.com/NousResearch/hermes-agent/pull/20841)) + +- **New models** — `deepseek/deepseek-v4-pro`, `x-ai/grok-4.3`, `openrouter/owl-alpha` (free), `tencent/hy3-preview` (@Contentment003111), Arcee Trinity Large Thinking temperature + compression overrides. ([#20495](https://github.com/NousResearch/hermes-agent/pull/20495), [#20497](https://github.com/NousResearch/hermes-agent/pull/20497), [#18071](https://github.com/NousResearch/hermes-agent/pull/18071), [#21077](https://github.com/NousResearch/hermes-agent/pull/21077), [#20473](https://github.com/NousResearch/hermes-agent/pull/20473)) + +- **100 fresh CLI startup tips** — the random tip banner gets 100 new entries covering cron, kanban, curator, plugins, and lesser-known flags. ([#20168](https://github.com/NousResearch/hermes-agent/pull/20168)) + +--- + +## 🧩 Multi-Agent Kanban (Durable) + +### New — durable multi-profile collaboration board +- **`feat(kanban): durable multi-profile collaboration board`** — post-revert reimplementation, multi-profile by design ([#17805](https://github.com/NousResearch/hermes-agent/pull/17805)) +- **Multi-project boards** — one install, many kanbans ([#19653](https://github.com/NousResearch/hermes-agent/pull/19653), [#19679](https://github.com/NousResearch/hermes-agent/pull/19679)) +- **Share board, workspaces, and worker logs across profiles** ([#19378](https://github.com/NousResearch/hermes-agent/pull/19378)) +- **Hallucination gate + recovery UX for worker-created-card claims** (closes #20017) ([#20232](https://github.com/NousResearch/hermes-agent/pull/20232)) +- **Generic diagnostics engine for task distress signals** ([#20332](https://github.com/NousResearch/hermes-agent/pull/20332)) +- **Per-task `max_retries` override** (supersedes #20972) ([#21330](https://github.com/NousResearch/hermes-agent/pull/21330)) +- **Multiline textarea for inline-create title** (salvage of #20970) ([#21243](https://github.com/NousResearch/hermes-agent/pull/21243)) + +### Kanban Dashboard +- **Workspace kind + path inputs in inline create form** ([#19679](https://github.com/NousResearch/hermes-agent/pull/19679)) +- **Per-platform home-channel notification toggles** ([#19864](https://github.com/NousResearch/hermes-agent/pull/19864)) +- **Sharper home-channel toggle contrast + drop → running action** ([#19916](https://github.com/NousResearch/hermes-agent/pull/19916)) +- Fix: reject direct status transition to 'running' via dashboard API (salvage of #19554) ([#19705](https://github.com/NousResearch/hermes-agent/pull/19705)) +- Fix: dashboard board pin authoritative over server current file (#20879) ([#21230](https://github.com/NousResearch/hermes-agent/pull/21230)) +- Fix: treat dashboard event-stream cancellation as normal shutdown (#20790) ([#21222](https://github.com/NousResearch/hermes-agent/pull/21222)) +- Fix: filter dashboard board by selected tenant (#19817) ([#21349](https://github.com/NousResearch/hermes-agent/pull/21349)) +- Fix: code/pre styling theme-immune across all themes (#21086) ([#21247](https://github.com/NousResearch/hermes-agent/pull/21247)) +- Fix: reset `` background inside dashboard board ([#20687](https://github.com/NousResearch/hermes-agent/pull/20687)) +- Fix: preserve dashboard completion summaries + add kanban edit (salvages #20016) ([#20195](https://github.com/NousResearch/hermes-agent/pull/20195)) +- Fix: avoid fragile failure-column renames (salvage #20848) (@kshitijk4poor) ([#20855](https://github.com/NousResearch/hermes-agent/pull/20855)) + +### Worker lifecycle + reliability +- **Heartbeat + reclaim + zombie + retry-cap fixes** (#21147, #21141, #21169, #20881) ([#21183](https://github.com/NousResearch/hermes-agent/pull/21183)) +- **Auto-block workers that exit without completing + shutdown race** (#20894) ([#21214](https://github.com/NousResearch/hermes-agent/pull/21214)) +- **Detect darwin zombie workers** (salvages #20023) ([#20188](https://github.com/NousResearch/hermes-agent/pull/20188)) +- **Unify failure counter across spawn/timeout/crash outcomes** ([#20410](https://github.com/NousResearch/hermes-agent/pull/20410)) +- **Enforce worker task-ownership on destructive tool calls** ([#19713](https://github.com/NousResearch/hermes-agent/pull/19713)) +- **Drop worker identity claim from KANBAN_GUIDANCE** ([#19427](https://github.com/NousResearch/hermes-agent/pull/19427)) +- Fix: skip dispatch for tasks assigned to non-profile lanes (salvages #20105, #20134) ([#20165](https://github.com/NousResearch/hermes-agent/pull/20165)) +- Fix: include default profile in on-disk assignee enumeration (salvages #20123) ([#20170](https://github.com/NousResearch/hermes-agent/pull/20170)) +- Fix: ignore stale current board pointers (salvages #20063) ([#20183](https://github.com/NousResearch/hermes-agent/pull/20183)) +- Fix: profile discovery ignores HERMES_HOME in custom-root deployments (@jackey8616) ([#19020](https://github.com/NousResearch/hermes-agent/pull/19020)) +- Fix: allow orchestrator profiles to see kanban tools via toolsets config ([#19606](https://github.com/NousResearch/hermes-agent/pull/19606)) + +### Batch salvages +- Tier-1 batch — metadata test, max_spawn config, run-id lifecycle guard (salvages #19522 #19556 #19829) ([#20440](https://github.com/NousResearch/hermes-agent/pull/20440)) +- Tier-2 batch — doctor, started_at, parent-guard, latest_summary, selects, linked-children ([#20448](https://github.com/NousResearch/hermes-agent/pull/20448)) + +### Documentation +- Backfill multi-board refs in reference docs ([#19704](https://github.com/NousResearch/hermes-agent/pull/19704)) +- Document `/kanban` slash command ([#19584](https://github.com/NousResearch/hermes-agent/pull/19584)) +- Document recommended handoff evidence metadata (salvage #19512) ([#20415](https://github.com/NousResearch/hermes-agent/pull/20415)) +- Fix orchestrator + worker skill setup instructions (@helix4u) ([#20958](https://github.com/NousResearch/hermes-agent/pull/20958), [#20960](https://github.com/NousResearch/hermes-agent/pull/20960)) + +--- + +## 🎯 Persistent Goals, Checkpoints & Session Durability + +### `/goal` — persistent cross-turn goals (Ralph loop) +- **`feat: /goal — persistent cross-turn goals`** ([#18262](https://github.com/NousResearch/hermes-agent/pull/18262)) +- **Docs page — Persistent Goals (/goal)** ([#18275](https://github.com/NousResearch/hermes-agent/pull/18275)) +- Fix: honor configured goal turn budget (salvage #19423) ([#21287](https://github.com/NousResearch/hermes-agent/pull/21287)) + +### Checkpoints v2 +- **Single-store rewrite with real pruning + disk guardrails** ([#20709](https://github.com/NousResearch/hermes-agent/pull/20709)) + +### Session durability +- **Auto-resume interrupted sessions after gateway restart** (salvage #20888) ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192)) +- **Preserve pending update prompts across restarts** ([#20160](https://github.com/NousResearch/hermes-agent/pull/20160)) +- **Preserve home-channel thread targets across restart notifications** (salvage #18440) ([#19271](https://github.com/NousResearch/hermes-agent/pull/19271)) +- **Preserve thread routing from cached live session sources** ([#21206](https://github.com/NousResearch/hermes-agent/pull/21206)) +- **Preserve assistant metadata when branching sessions** ([#18222](https://github.com/NousResearch/hermes-agent/pull/18222)) +- **Preserve thread routing for /update progress and prompts** ([#18193](https://github.com/NousResearch/hermes-agent/pull/18193)) +- **Preserve document type when merging queued events** ([#18215](https://github.com/NousResearch/hermes-agent/pull/18215)) + +--- + +## 🛡️ Security & Reliability + +### Security hardening (8 P0 closures) +- **Enable secret redaction by default** (#17691, #20785) ([#21193](https://github.com/NousResearch/hermes-agent/pull/21193)) +- **Discord — scope `DISCORD_ALLOWED_ROLES` to originating guild** (#12136, CVSS 8.1) ([#21241](https://github.com/NousResearch/hermes-agent/pull/21241)) +- **WhatsApp — reject strangers by default, never respond in self-chat** (#8389) ([#21291](https://github.com/NousResearch/hermes-agent/pull/21291)) +- **MCP OAuth — close TOCTOU window when saving credentials** ([#21176](https://github.com/NousResearch/hermes-agent/pull/21176)) +- **`hermes_cli/auth.py` — close TOCTOU window in credential writers** ([#21194](https://github.com/NousResearch/hermes-agent/pull/21194)) +- **Browser — enforce cloud-metadata SSRF floor in hybrid routing** (#16234) ([#21228](https://github.com/NousResearch/hermes-agent/pull/21228)) +- **`hermes debug share` — redact log content at upload time** (@GodsBoy) ([#19318](https://github.com/NousResearch/hermes-agent/pull/19318)) +- **Cron — scan assembled prompt including skill content for prompt injection** (#3968) ([#21350](https://github.com/NousResearch/hermes-agent/pull/21350)) +- **Restore .env/auth.json/state.db with 0600 perms** ([#19699](https://github.com/NousResearch/hermes-agent/pull/19699)) +- **SRI integrity for dashboard plugin scripts** (salvage #19389) ([#21277](https://github.com/NousResearch/hermes-agent/pull/21277)) +- **Bind Meet node server to localhost, restrict token file to owner read** ([#19597](https://github.com/NousResearch/hermes-agent/pull/19597)) +- **Extend sensitive-write target to cover shell RC and credential files** ([#19282](https://github.com/NousResearch/hermes-agent/pull/19282)) +- **Harden YOLO mode env parsing against quoted-bool strings** ([#18214](https://github.com/NousResearch/hermes-agent/pull/18214)) +- **OSV-Scanner CI + Dependabot for github-actions only** ([#20037](https://github.com/NousResearch/hermes-agent/pull/20037)) + +### Reliability — critical bug closures +- **CLI crash on startup — `Invalid key 'c-S-c'`** (P0, prompt_toolkit doesn't support Shift modifier) ([#19895](https://github.com/NousResearch/hermes-agent/pull/19895), [#19919](https://github.com/NousResearch/hermes-agent/pull/19919)) +- **CLOSE_WAIT fd leak audit** — httpx keepalive + WhatsApp aiohttp leak + Feishu hygiene (#18451) ([#18766](https://github.com/NousResearch/hermes-agent/pull/18766)) +- **Gateway creates AIAgent with empty OpenRouter API key when OPENROUTER_API_KEY is missing** (#20982) — fallback providers correctly honored +- **Background review + curator protected from overwriting bundled/hub skills** (#20273) ([#20194](https://github.com/NousResearch/hermes-agent/pull/20194)) +- **TUI compression continuation — ghost sessions with incomplete metadata** (#20001) +- **`hermes mcp add` silently launches chat instead of registering MCP server** (#19785) ([#21204](https://github.com/NousResearch/hermes-agent/pull/21204)) +- **Background review agent runtime propagation** — provider/model/credentials now actually inherit from parent +- **Inbound document host paths translated to container paths for Docker backend** (salvage #19048) ([#21184](https://github.com/NousResearch/hermes-agent/pull/21184)) +- **Matrix gateway race between auto-redaction and message delivery with high-speed models** (#19075) +- **`/new` during active agent session never sends response on Telegram** (#18912) + +--- + +## 📱 Messaging Platforms (Gateway) + +### New platform +- **Google Chat — 20th platform** + generic `env_enablement_fn` / `cron_deliver_env_var` platform-plugin hooks (IRC + Teams migrated) ([#21306](https://github.com/NousResearch/hermes-agent/pull/21306), [#21331](https://github.com/NousResearch/hermes-agent/pull/21331)) + +### Cross-platform +- **`allowed_{channels,chats,rooms}` whitelist** — Slack (salvage #7401), Telegram, Mattermost, Matrix, DingTalk ([#21251](https://github.com/NousResearch/hermes-agent/pull/21251)) +- **Per-platform `gateway_restart_notification` flag** ([#20892](https://github.com/NousResearch/hermes-agent/pull/20892)) +- **`busy_ack_enabled` config — suppress ack messages** ([#18194](https://github.com/NousResearch/hermes-agent/pull/18194)) +- **Auto-delete slash-command system notices after TTL** ([#18266](https://github.com/NousResearch/hermes-agent/pull/18266)) +- **Opt-in cleanup of temporary progress bubbles** ([#21186](https://github.com/NousResearch/hermes-agent/pull/21186)) +- **`[[as_document]]` directive — skill media routing** (salvage #19069) ([#21210](https://github.com/NousResearch/hermes-agent/pull/21210)) +- **`hermes gateway list` — cross-profile status** (salvage #19129) ([#21225](https://github.com/NousResearch/hermes-agent/pull/21225)) +- **Auto-resume interrupted sessions after restart** (salvage #20888) ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192)) +- **Atomic restart markers + Windows runtime-lock offset** (#17842) ([#18179](https://github.com/NousResearch/hermes-agent/pull/18179)) +- Fix: `config.yaml` wins over `.env` for agent/display/timezone settings ([#18764](https://github.com/NousResearch/hermes-agent/pull/18764)) +- Fix: auto-restart when source files change out from under us (#17648) ([#18409](https://github.com/NousResearch/hermes-agent/pull/18409)) +- Fix: use git HEAD SHA for stale-code check, not file mtimes ([#19740](https://github.com/NousResearch/hermes-agent/pull/19740)) +- Fix: shutdown + restart hygiene — drain timeout, false-fatal, success log ([#18761](https://github.com/NousResearch/hermes-agent/pull/18761)) +- Fix: preserve max_turns after env reload (salvage #19183) ([#21240](https://github.com/NousResearch/hermes-agent/pull/21240)) +- Fix: exclude ancestor PIDs from gateway process scan ([#19586](https://github.com/NousResearch/hermes-agent/pull/19586)) +- Fix: move quick-command alias dispatch before built-ins ([#19588](https://github.com/NousResearch/hermes-agent/pull/19588)) +- Fix: show other profiles in 'gateway status' to prevent confusion ([#19582](https://github.com/NousResearch/hermes-agent/pull/19582)) +- Fix: include external_dirs skills in Telegram/Discord slash commands (salvage #8790) ([#18741](https://github.com/NousResearch/hermes-agent/pull/18741)) +- Fix: match disabled/optional skills by frontmatter slug, not dir name ([#18753](https://github.com/NousResearch/hermes-agent/pull/18753)) +- Fix: read /status token totals from SessionDB (#17158) ([#18206](https://github.com/NousResearch/hermes-agent/pull/18206)) +- Fix: snapshot callback generation after agent binds it, not before ([#18219](https://github.com/NousResearch/hermes-agent/pull/18219)) +- Fix: re-inject topic-bound skill after /new or /reset ([#18205](https://github.com/NousResearch/hermes-agent/pull/18205)) +- Fix: isolate pending native image paths by session ([#18202](https://github.com/NousResearch/hermes-agent/pull/18202)) +- Fix: clear queued reload skills notes on new/resume/branch ([#19431](https://github.com/NousResearch/hermes-agent/pull/19431)) +- Fix: hide required-arg commands from Telegram menu ([#19400](https://github.com/NousResearch/hermes-agent/pull/19400)) +- Fix: bridge top-level `require_mention` to Telegram config ([#19429](https://github.com/NousResearch/hermes-agent/pull/19429)) +- Fix: suppress duplicate voice transcripts ([#19428](https://github.com/NousResearch/hermes-agent/pull/19428)) +- Fix: show friendly error when service is not installed ([#19707](https://github.com/NousResearch/hermes-agent/pull/19707)) +- Fix: read context_length from custom_providers in session info header ([#19708](https://github.com/NousResearch/hermes-agent/pull/19708)) +- Fix: preserve WSL interop PATH in systemd units ([#19867](https://github.com/NousResearch/hermes-agent/pull/19867)) +- Fix: handle planned service stops (salvage #19876) ([#19936](https://github.com/NousResearch/hermes-agent/pull/19936)) +- Fix: keep DoH-confirmed Telegram IPs that match system DNS (salvage #17043) ([#20175](https://github.com/NousResearch/hermes-agent/pull/20175)) +- Fix: load `reply_to_mode` from config.yaml for Discord + Telegram (salvage #17117) ([#20171](https://github.com/NousResearch/hermes-agent/pull/20171)) +- Fix: tolerate malformed HERMES_HUMAN_DELAY_* env vars (salvage #16933) ([#20217](https://github.com/NousResearch/hermes-agent/pull/20217)) +- Fix: deterministic thread eviction preserves newest entries (salvage #13639) ([#20285](https://github.com/NousResearch/hermes-agent/pull/20285)) +- Fix: don't dead-end setup wizard when only system-scope unit is installed ([#20905](https://github.com/NousResearch/hermes-agent/pull/20905)) +- Fix: wait for systemd restart readiness + harden Discord slash-command sync ([#20949](https://github.com/NousResearch/hermes-agent/pull/20949)) +- Fix: avoid duplicated Responses history (salvage #18995) ([#21185](https://github.com/NousResearch/hermes-agent/pull/21185)) +- Fix: surface bootstrap failures to stderr (salvage #21157) ([#21278](https://github.com/NousResearch/hermes-agent/pull/21278)) +- Fix: log agent task failures instead of silently losing usage data (salvage #21159) ([#21274](https://github.com/NousResearch/hermes-agent/pull/21274)) +- Fix: log runtime-status write failures with rate-limiting (salvage #21158) ([#21285](https://github.com/NousResearch/hermes-agent/pull/21285)) +- Fix: reset-failed before every fallback restart so the gateway can't get stranded ([#21371](https://github.com/NousResearch/hermes-agent/pull/21371)) +- Fix: Telegram — preserve `thread_id=1` for forum General typing indicator ([#21390](https://github.com/NousResearch/hermes-agent/pull/21390)) +- Fix: batch critical fixes — session resume, /new race, HA WebSocket scheme (@kshitijk4poor) ([#19182](https://github.com/NousResearch/hermes-agent/pull/19182)) + +### Telegram +- **DM user-managed multi-session topics** (salvage of #19185) ([#19206](https://github.com/NousResearch/hermes-agent/pull/19206)) + +### Discord +- **Message deletion action** (salvage #19052) ([#21197](https://github.com/NousResearch/hermes-agent/pull/21197)) +- Fix: allow `free_response_channels` to override `DISCORD_IGNORE_NO_MENTION` ([#19629](https://github.com/NousResearch/hermes-agent/pull/19629)) + +### Slack +- Fix: ephemeral slash-command ack, private notice delivery, format_message fixes (@kshitijk4poor) ([#18198](https://github.com/NousResearch/hermes-agent/pull/18198)) + +### WhatsApp +- Fix: load WhatsApp home channel from env overrides ([#18190](https://github.com/NousResearch/hermes-agent/pull/18190)) + +### Feishu +- **Operator-configurable bot admission and mention policy** ([#18208](https://github.com/NousResearch/hermes-agent/pull/18208)) +- Fix: force text mode for markdown tables (salvage of #13723 by @WuTianyi123) ([#20275](https://github.com/NousResearch/hermes-agent/pull/20275)) + +### Matrix + Email +- Fix: `/sethome` on Matrix and Email now persists across restarts ([#18272](https://github.com/NousResearch/hermes-agent/pull/18272)) + +### Teams +- **Docs + feat: sidebar + threading with group-chat fallback** ([#20042](https://github.com/NousResearch/hermes-agent/pull/20042)) + +### Weixin +- Fix: deduplicate Weixin messages by content fingerprint ([#19742](https://github.com/NousResearch/hermes-agent/pull/19742)) + +### QQBot +- **Port SDK improvements in-tree — chunked upload, approval keyboards, quoted attachments** ([#21342](https://github.com/NousResearch/hermes-agent/pull/21342)) +- **Wire native tool-approval UX via inline keyboards** ([#21353](https://github.com/NousResearch/hermes-agent/pull/21353)) + +--- + +## 🏗️ Core Agent & Architecture + +### Provider & Model Support + +#### Pluggable providers +- **ProviderProfile ABC + `plugins/model-providers/`** — inference providers are now a pluggable surface (salvage of #14424) ([#20324](https://github.com/NousResearch/hermes-agent/pull/20324)) +- **`list_picker_providers`** — credential-filtered picker (salvage #13561) ([#20298](https://github.com/NousResearch/hermes-agent/pull/20298)) +- **Remove `/provider` alias for `/model`** ([#20358](https://github.com/NousResearch/hermes-agent/pull/20358)) +- **Shared Hermes dotenv loader across CLI + plugins** (salvage #13660) ([#20281](https://github.com/NousResearch/hermes-agent/pull/20281)) +- **Nous OAuth persisted across profiles via shared token store** ([#19712](https://github.com/NousResearch/hermes-agent/pull/19712)) + +#### New models +- `deepseek/deepseek-v4-pro` added to OpenRouter + Nous Portal ([#20495](https://github.com/NousResearch/hermes-agent/pull/20495)) +- `x-ai/grok-4.3` added to OpenRouter + Nous Portal ([#20497](https://github.com/NousResearch/hermes-agent/pull/20497)) +- `openrouter/owl-alpha` (free tier) added to curated OpenRouter list ([#18071](https://github.com/NousResearch/hermes-agent/pull/18071)) +- `tencent/hy3-preview` paid route on OpenRouter (@Contentment003111) ([#21077](https://github.com/NousResearch/hermes-agent/pull/21077)) +- Arcee Trinity Large Thinking — temperature + compression overrides ([#20473](https://github.com/NousResearch/hermes-agent/pull/20473)) +- Rename `x-ai/grok-4.20-beta` to `x-ai/grok-4.20` ([#19640](https://github.com/NousResearch/hermes-agent/pull/19640)) +- Demote Vercel AI Gateway to bottom of provider picker ([#18112](https://github.com/NousResearch/hermes-agent/pull/18112)) + +#### Provider configuration +- **OpenRouter — response caching support** (@kshitijk4poor) ([#19132](https://github.com/NousResearch/hermes-agent/pull/19132)) +- **`image_gen.model` from config.yaml honored** (salvage #19376) ([#21273](https://github.com/NousResearch/hermes-agent/pull/21273)) +- Fix: honor runtime default model during delegate provider resolution (@johnncenae) ([#17587](https://github.com/NousResearch/hermes-agent/pull/17587)) +- Fix: avoid Bedrock credential probe in provider picker (@helix4u) ([#18998](https://github.com/NousResearch/hermes-agent/pull/18998)) +- Fix: drop stale env-var override of persisted provider for cron ([#19627](https://github.com/NousResearch/hermes-agent/pull/19627)) +- Fix: auxiliary curator api_key/base_url into runtime resolution ([#19421](https://github.com/NousResearch/hermes-agent/pull/19421)) + +### Agent Loop & Conversation +- **`video_analyze` — native video understanding tool** (@alt-glitch) ([#19301](https://github.com/NousResearch/hermes-agent/pull/19301)) +- **Show context compression count in status bar** (CLI + TUI) ([#21218](https://github.com/NousResearch/hermes-agent/pull/21218)) +- **Isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection** (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889)) +- Fix: warning-first tool-call loop guardrails ([#18227](https://github.com/NousResearch/hermes-agent/pull/18227)) +- Fix: break permanent empty-response loop from orphan tool-tail ([#21385](https://github.com/NousResearch/hermes-agent/pull/21385)) +- Fix: propagate ContextVars to concurrent tool worker threads (salvage #16660) ([#18123](https://github.com/NousResearch/hermes-agent/pull/18123)) +- Fix: surface self-improvement review summaries across CLI, TUI, and gateway ([#18073](https://github.com/NousResearch/hermes-agent/pull/18073)) +- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902)) +- Fix: include system prompt + tool schemas in token estimates for compression ([#18265](https://github.com/NousResearch/hermes-agent/pull/18265)) + +### Compression +- Fix: skip non-string tool content in dedup pass to prevent AttributeError ([#19398](https://github.com/NousResearch/hermes-agent/pull/19398)) +- Fix: reset `_summary_failure_cooldown_until` on session reset ([#19622](https://github.com/NousResearch/hermes-agent/pull/19622)) +- Fix: trigger fallback on timeout errors alongside model-unavailable errors ([#19665](https://github.com/NousResearch/hermes-agent/pull/19665)) +- Fix: `_prune_old_tool_results` boundary direction ([#19725](https://github.com/NousResearch/hermes-agent/pull/19725)) +- Fix: soften summary prompt for content filters (salvage #19456) ([#21302](https://github.com/NousResearch/hermes-agent/pull/21302)) + +### Delegate +- Fix: inherit parent fallback_chain in `_build_child_agent` ([#19601](https://github.com/NousResearch/hermes-agent/pull/19601)) +- Fix: guard `_load_config()` against `delegation: null` in config.yaml ([#19662](https://github.com/NousResearch/hermes-agent/pull/19662)) +- Fix: inherit parent api_key when `delegation.base_url` set without `delegation.api_key` ([#19741](https://github.com/NousResearch/hermes-agent/pull/19741)) +- Fix: expand composite toolsets before intersection (salvage #19455) ([#21300](https://github.com/NousResearch/hermes-agent/pull/21300)) +- Fix: correct ACP docs — Claude Code CLI has no --acp flag (salvage #19058) ([#21201](https://github.com/NousResearch/hermes-agent/pull/21201)) + +### Session & Memory +- **Hindsight — probe API for `update_mode='append'` to dedupe across processes** (@nicoloboschi) ([#20222](https://github.com/NousResearch/hermes-agent/pull/20222)) + +### Curator +- **`hermes curator archive` and `prune` subcommands** ([#20200](https://github.com/NousResearch/hermes-agent/pull/20200)) +- **`hermes curator list-archived`** (#20651) ([#21236](https://github.com/NousResearch/hermes-agent/pull/21236)) +- **Synchronous manual `hermes curator run`** (#20555) ([#21216](https://github.com/NousResearch/hermes-agent/pull/21216)) +- Fix: preserve `last_report_path` in state ([#18169](https://github.com/NousResearch/hermes-agent/pull/18169)) +- Fix: rewrite cron job skill refs after consolidation ([#18253](https://github.com/NousResearch/hermes-agent/pull/18253)) +- Fix: defer first run + `--dry-run` preview (#18373) ([#18389](https://github.com/NousResearch/hermes-agent/pull/18389)) +- Fix: authoritative `absorbed_into` on delete + restore cron skill links on rollback (#18671) ([#18731](https://github.com/NousResearch/hermes-agent/pull/18731)) +- Fix: prevent false-positive consolidation from substring matching ([#19573](https://github.com/NousResearch/hermes-agent/pull/19573)) +- Fix: only mark agent-created for background-review sediment ([#19621](https://github.com/NousResearch/hermes-agent/pull/19621)) +- Fix: protect hub skills by frontmatter name ([#20194](https://github.com/NousResearch/hermes-agent/pull/20194)) + +--- + +## 🔧 Tool System + +### File tools +- **Post-write delta lint on `write_file` + `patch`** — in-proc linters for Python, JSON, YAML, TOML ([#20191](https://github.com/NousResearch/hermes-agent/pull/20191)) + +### Cron +- **`no_agent` mode — script-only cron jobs (watchdog pattern)** ([#19709](https://github.com/NousResearch/hermes-agent/pull/19709)) +- **`context_from` chaining docs** (salvage #15724) ([#20394](https://github.com/NousResearch/hermes-agent/pull/20394)) +- Fix: treat non-dict origin as missing instead of crashing tick ([#19283](https://github.com/NousResearch/hermes-agent/pull/19283)) +- Fix: bump skill usage when cron jobs load skills ([#19433](https://github.com/NousResearch/hermes-agent/pull/19433)) +- Fix: recover null `next_run_at` jobs ([#19576](https://github.com/NousResearch/hermes-agent/pull/19576)) +- Fix: skip AI call when prerun script produces no output ([#19628](https://github.com/NousResearch/hermes-agent/pull/19628)) +- Fix: expand config.yaml refs during job execution ([#19872](https://github.com/NousResearch/hermes-agent/pull/19872)) +- Fix: serialize `get_due_jobs` writes to prevent parallel state corruption ([#19874](https://github.com/NousResearch/hermes-agent/pull/19874)) +- Fix: initialize MCP servers before constructing the cron AIAgent ([#21354](https://github.com/NousResearch/hermes-agent/pull/21354)) + +### MCP +- **SSE transport support** (salvage #19135) ([#21227](https://github.com/NousResearch/hermes-agent/pull/21227)) +- **Forward OAuth auth + bump `sse_read_timeout` on SSE transport** ([#21323](https://github.com/NousResearch/hermes-agent/pull/21323)) +- **Retry stale pipe transport failures as session-expired** ([#21289](https://github.com/NousResearch/hermes-agent/pull/21289)) +- **Surface image tool results as MEDIA tags instead of dropping them** ([#21328](https://github.com/NousResearch/hermes-agent/pull/21328)) +- **Periodic keepalive to `_wait_for_lifecycle_event`** (salvage #17016) ([#20209](https://github.com/NousResearch/hermes-agent/pull/20209)) +- Fix: reconnect on terminated sessions ([#19380](https://github.com/NousResearch/hermes-agent/pull/19380)) +- Fix: decouple AnyUrl import from mcp dependency ([#19695](https://github.com/NousResearch/hermes-agent/pull/19695)) +- Fix: `mcp add --command` gets distinct argparse dest ([#21204](https://github.com/NousResearch/hermes-agent/pull/21204)) +- Fix: clear stale thread interrupt before MCP discovery ([#21276](https://github.com/NousResearch/hermes-agent/pull/21276)) +- Fix: report configured timeout in MCP call errors ([#21281](https://github.com/NousResearch/hermes-agent/pull/21281)) +- Fix: include exception type in error messages when str(exc) is empty (salvage #19425) ([#21292](https://github.com/NousResearch/hermes-agent/pull/21292)) +- Fix: re-raise CancelledError explicitly in `MCPServerTask.run` ([#21318](https://github.com/NousResearch/hermes-agent/pull/21318)) +- Fix: coerce numeric tool args defensively in `mcp_serve` ([#21329](https://github.com/NousResearch/hermes-agent/pull/21329)) +- Fix: gate utility stubs on server-advertised capabilities ([#21347](https://github.com/NousResearch/hermes-agent/pull/21347)) + +### Browser +- Fix: allow explicit CDP override without local agent-browser ([#19670](https://github.com/NousResearch/hermes-agent/pull/19670)) +- Fix: inject `--no-sandbox` for root + AppArmor userns restrictions ([#19747](https://github.com/NousResearch/hermes-agent/pull/19747)) +- Fix: tighten Lightpanda fallback edge cases (@kshitijk4poor) ([#20672](https://github.com/NousResearch/hermes-agent/pull/20672)) + +### Web tools +- **Per-capability backend selection — search/extract split** (@kshitijk4poor) ([#20061](https://github.com/NousResearch/hermes-agent/pull/20061)) +- **SearXNG native search-only backend** (@kshitijk4poor) ([#20823](https://github.com/NousResearch/hermes-agent/pull/20823)) + +### Approval / Tool gating +- Fix: wake blocked gateway approvals on session cleanup ([#18171](https://github.com/NousResearch/hermes-agent/pull/18171)) +- Fix: harden YOLO mode env parsing against quoted-bool strings ([#18214](https://github.com/NousResearch/hermes-agent/pull/18214)) +- Fix: extend sensitive write target to cover shell RC and credential files ([#19282](https://github.com/NousResearch/hermes-agent/pull/19282)) + +--- + +## 🔌 Plugin System + +- **`transform_llm_output` plugin hook** (salvage of #20813) ([#21235](https://github.com/NousResearch/hermes-agent/pull/21235)) +- **Document `env_enablement_fn` + `cron_deliver_env_var` platform-plugin hooks** ([#21331](https://github.com/NousResearch/hermes-agent/pull/21331)) +- **Pluggable surfaces coverage — model-provider guide, full plugin map, opt-in fix** ([#20749](https://github.com/NousResearch/hermes-agent/pull/20749)) +- **Plugin-authoring gaps — image-gen provider guide + publishing a skill tap** ([#20800](https://github.com/NousResearch/hermes-agent/pull/20800)) + +--- + +## 🧩 Skills Ecosystem + +### New optional skills +- **Shopify** — Admin + Storefront GraphQL optional skill ([#18116](https://github.com/NousResearch/hermes-agent/pull/18116)) +- **here.now** — optional skill ([#18170](https://github.com/NousResearch/hermes-agent/pull/18170)) +- **shop-app** — personal shopping assistant (optional) ([#20702](https://github.com/NousResearch/hermes-agent/pull/20702)) +- **Anthropic financial-services bundle** — ported as optional finance skills ([#21180](https://github.com/NousResearch/hermes-agent/pull/21180)) +- **kanban-video-orchestrator** — creative optional skill (@SHL0MS) ([#19281](https://github.com/NousResearch/hermes-agent/pull/19281)) +- **searxng-search** — optional skill + Web Search + Extract docs page (@kshitijk4poor) ([#20841](https://github.com/NousResearch/hermes-agent/pull/20841), [#20844](https://github.com/NousResearch/hermes-agent/pull/20844)) + +### Skill UX +- **Linear skill — add Documents support + Python helper script** ([#20752](https://github.com/NousResearch/hermes-agent/pull/20752)) +- **Modernize Obsidian skill to use file tools** (salvage #19332) ([#20413](https://github.com/NousResearch/hermes-agent/pull/20413)) +- **Default custom tool creation to plugins** (@kshitijk4poor) ([#19755](https://github.com/NousResearch/hermes-agent/pull/19755)) +- **skill_commands cache — rescan on platform scope changes** (salvage #14570 by @LeonSGP43) ([#18739](https://github.com/NousResearch/hermes-agent/pull/18739)) +- **Skills — additional rescan paths in skill_commands cache** (salvage #19042) ([#21181](https://github.com/NousResearch/hermes-agent/pull/21181)) +- Fix: regression tests for non-dict metadata in `extract_skill_conditions` ([#18213](https://github.com/NousResearch/hermes-agent/pull/18213)) +- Docs: explain restoring bundled skills (salvage #19254) ([#20404](https://github.com/NousResearch/hermes-agent/pull/20404)) +- Docs: document `hermes skills reset` subcommand (salvage #11544) ([#20395](https://github.com/NousResearch/hermes-agent/pull/20395)) +- Docs: himalaya v1.2.0 `folder.aliases` syntax ([#19882](https://github.com/NousResearch/hermes-agent/pull/19882)) +- Point agent at `hermes-agent` skill + docs site sync ([#20390](https://github.com/NousResearch/hermes-agent/pull/20390)) + +--- + +## 🖥️ CLI & User Experience + +### CLI +- **`/new` accepts optional session name argument** (salvage of #19555) ([#19637](https://github.com/NousResearch/hermes-agent/pull/19637)) +- **100 new CLI startup tips** ([#20168](https://github.com/NousResearch/hermes-agent/pull/20168)) +- **`display.language` — static message translation** (zh/ja/de/es) ([#20231](https://github.com/NousResearch/hermes-agent/pull/20231)) +- **French (fr) locale** (@Foolafroos) ([#20329](https://github.com/NousResearch/hermes-agent/pull/20329)) +- **Ukrainian (uk) locale** ([#20467](https://github.com/NousResearch/hermes-agent/pull/20467)) +- **Turkish (tr) locale** ([#20474](https://github.com/NousResearch/hermes-agent/pull/20474)) +- Fix: recover classic CLI output after resize (@helix4u) ([#20444](https://github.com/NousResearch/hermes-agent/pull/20444)) +- Fix: complete absolute paths as paths (@helix4u) ([#19930](https://github.com/NousResearch/hermes-agent/pull/19930)) +- Fix: resolve lazy session creation regressions (#18370 fallout) (@alt-glitch) ([#20363](https://github.com/NousResearch/hermes-agent/pull/20363)) +- Fix: local backend CLI always uses launch directory (@alt-glitch) ([#19334](https://github.com/NousResearch/hermes-agent/pull/19334)) +- Refactor: drop dead c-S-c key binding (follow-up to #19895) ([#19919](https://github.com/NousResearch/hermes-agent/pull/19919)) + +### TUI (Ink) +- **`/model` picker overhaul to match `hermes model` with inline auth** (@austinpickett) ([#18117](https://github.com/NousResearch/hermes-agent/pull/18117)) +- **Collapsible sections in startup banner** — skills, system prompt, MCP (@kshitijk4poor) ([#20625](https://github.com/NousResearch/hermes-agent/pull/20625)) +- **Show context compression count in status bar** ([#21218](https://github.com/NousResearch/hermes-agent/pull/21218)) +- Perf: reduce overlay render churn with focused selectors (@OutThisLife) ([#20393](https://github.com/NousResearch/hermes-agent/pull/20393)) +- Fix: restore voice push-to-talk parity (salvage of #16189 by @Montbra) (@OutThisLife) ([#20897](https://github.com/NousResearch/hermes-agent/pull/20897)) +- Fix: kanban button (@austinpickett) ([#18358](https://github.com/NousResearch/hermes-agent/pull/18358)) + +### Dashboard +- **Plugins page — manage, enable/disable, auth status** (@austinpickett) ([#18095](https://github.com/NousResearch/hermes-agent/pull/18095)) +- **Profiles management page** (@vincez-hms-coder) ([#16419](https://github.com/NousResearch/hermes-agent/pull/16419)) +- **Interactive column sorting in analytics tables** ([#18192](https://github.com/NousResearch/hermes-agent/pull/18192)) +- **`default-large` built-in theme with 18px base size** ([#20820](https://github.com/NousResearch/hermes-agent/pull/20820)) +- **Support serving under URL prefix via `X-Forwarded-Prefix`** (salvage #19450) ([#21296](https://github.com/NousResearch/hermes-agent/pull/21296)) +- **Launch dashboard as side-process via `HERMES_DASHBOARD=1` in Docker** (@benbarclay) ([#19540](https://github.com/NousResearch/hermes-agent/pull/19540)) +- Fix: dashboard theme layout shift (@AllardQuek) ([#17232](https://github.com/NousResearch/hermes-agent/pull/17232)) +- Fix: gateway model picker current context (@helix4u) ([#20513](https://github.com/NousResearch/hermes-agent/pull/20513)) + +### Update + setup +- **`hermes update --yes/-y` to skip interactive prompts** ([#18261](https://github.com/NousResearch/hermes-agent/pull/18261)) +- **Restart manual profile gateways after update** ([#18178](https://github.com/NousResearch/hermes-agent/pull/18178)) + +### Profiles +- **`--no-skills` flag for empty profile creation** ([#20986](https://github.com/NousResearch/hermes-agent/pull/20986)) + +--- + +## 🎵 Voice, Image & Media + +- **xAI Custom Voices — voice cloning** (@alt-glitch) ([#18776](https://github.com/NousResearch/hermes-agent/pull/18776)) +- **Achievements — share card render on unlocked badges** ([#19657](https://github.com/NousResearch/hermes-agent/pull/19657)) +- **Refresh systemd unit on gateway boot (not just start/restart)** (@alt-glitch) ([#19684](https://github.com/NousResearch/hermes-agent/pull/19684)) + +--- + +## 🔗 API Server & Remote Access + +- **`X-Hermes-Session-Key` header for long-term memory scoping** (closes #20060) ([#20199](https://github.com/NousResearch/hermes-agent/pull/20199)) + +--- + +## 🧰 ACP Adapter (VS Code / Zed / JetBrains) + +- **`/steer` and `/queue` slash commands** (@HenkDz) ([#18114](https://github.com/NousResearch/hermes-agent/pull/18114)) +- Fix: translate Windows cwd for WSL sessions (salvage #18128) ([#18233](https://github.com/NousResearch/hermes-agent/pull/18233)) +- Fix: run `/steer` as a regular prompt on idle sessions ([#18258](https://github.com/NousResearch/hermes-agent/pull/18258)) +- Fix: route Zed thoughts to reasoning + polish tool/context rendering ([#19139](https://github.com/NousResearch/hermes-agent/pull/19139)) +- Fix: atomic session persistence via `replace_messages` (salvage #13675) ([#20279](https://github.com/NousResearch/hermes-agent/pull/20279)) +- Fix: preserve assistant reasoning metadata in session persistence (salvage #13575) ([#20296](https://github.com/NousResearch/hermes-agent/pull/20296)) +- Docs: update VS Code setup for ACP Client extension (salvage #12495) ([#20433](https://github.com/NousResearch/hermes-agent/pull/20433)) + +--- + +## 🐳 Docker + +- **Launch dashboard as side-process via `HERMES_DASHBOARD=1`** (@benbarclay) ([#19540](https://github.com/NousResearch/hermes-agent/pull/19540)) +- **Refuse root gateway runs in official image** (salvage #19215) ([#21250](https://github.com/NousResearch/hermes-agent/pull/21250)) +- **Chown runtime `node_modules` trees to hermes user** (salvage #19303) ([#21267](https://github.com/NousResearch/hermes-agent/pull/21267)) +- Fix: exclude compose/profile runtime state from build context ([#19626](https://github.com/NousResearch/hermes-agent/pull/19626)) +- CI: don't cancel overlapping builds, guard `:latest` (@ethernet8023) ([#20890](https://github.com/NousResearch/hermes-agent/pull/20890)) +- Test: align Dockerfile contract tests with simplified TUI flow (salvage #19024) ([#21174](https://github.com/NousResearch/hermes-agent/pull/21174)) +- Docs: connect to local inference servers (vLLM, Ollama) (salvage #12335) ([#20407](https://github.com/NousResearch/hermes-agent/pull/20407)) +- Docs: document `API_SERVER_*` env vars (salvage #11758) ([#20409](https://github.com/NousResearch/hermes-agent/pull/20409)) +- Docs: clarify Docker terminal backend is a single persistent container ([#20003](https://github.com/NousResearch/hermes-agent/pull/20003)) + +--- + +## 🐛 Notable Bug Fixes + +### Agent +- Fix: recover lazy session creation regressions (#18370 fallout) (@alt-glitch) ([#20363](https://github.com/NousResearch/hermes-agent/pull/20363)) +- Fix: propagate ContextVars to concurrent tool worker threads (salvage #16660) ([#18123](https://github.com/NousResearch/hermes-agent/pull/18123)) +- Fix: warning-first tool-call loop guardrails ([#18227](https://github.com/NousResearch/hermes-agent/pull/18227)) +- Fix: surface self-improvement review summaries across CLI, TUI, and gateway ([#18073](https://github.com/NousResearch/hermes-agent/pull/18073)) + +### Gateway streaming +- Fix: harden StreamingConfig bool and numeric coercion (@simbam99) ([#16463](https://github.com/NousResearch/hermes-agent/pull/16463)) + +### Model +- Fix: avoid Bedrock credential probe in provider picker (@helix4u) ([#18998](https://github.com/NousResearch/hermes-agent/pull/18998)) + +### Doctor +- Fix: check global agent-browser when local install not found ([#19671](https://github.com/NousResearch/hermes-agent/pull/19671)) +- Test: kimi-coding-cn provider validation regression ([#19734](https://github.com/NousResearch/hermes-agent/pull/19734)) + +### Update +- Fix: patch `isatty` on real streams to fix xdist-flaky `--yes` tests (salvage #19026) ([#21175](https://github.com/NousResearch/hermes-agent/pull/21175)) +- Fix: teach restart-mocks about the post-update survivor sweep (salvage #19031) ([#21177](https://github.com/NousResearch/hermes-agent/pull/21177)) + +### Auth +- Fix: acp preserve assistant reasoning metadata ([#20296](https://github.com/NousResearch/hermes-agent/pull/20296)) + +### Redact +- Fix: add `code_file` param to skip false-positive ENV/JSON patterns ([#19715](https://github.com/NousResearch/hermes-agent/pull/19715)) + +### Email +- Fix: quoted-relative file-drop paths + Date header on tool email path ([#19646](https://github.com/NousResearch/hermes-agent/pull/19646)) + +--- + +## 🧪 Testing + +- **ACP — accept prompt persistence kwargs in MCP E2E mocks** (@stephenschoettler) ([#18047](https://github.com/NousResearch/hermes-agent/pull/18047)) +- **Toolsets — include kanban in expected post-#17805 toolset assertions** (@briandevans) ([#18122](https://github.com/NousResearch/hermes-agent/pull/18122)) +- **Agent — cover max-iterations summary message sanitization** ([#19580](https://github.com/NousResearch/hermes-agent/pull/19580)) +- **run_agent — `-inf` and `nan` regression coverage for `_coerce_number`** ([#19703](https://github.com/NousResearch/hermes-agent/pull/19703)) + +--- + +## 📚 Documentation + +### Major docs additions +- **`llms.txt` + `llms-full.txt` — agent-friendly ingestion** ([#18276](https://github.com/NousResearch/hermes-agent/pull/18276)) +- **User Stories and Use Cases collage page** ([#18282](https://github.com/NousResearch/hermes-agent/pull/18282)) +- **Persistent Goals (/goal) feature page** ([#18275](https://github.com/NousResearch/hermes-agent/pull/18275)) +- **Windows (WSL2) guide expansion** — filesystem, networking, services, pitfalls ([#20748](https://github.com/NousResearch/hermes-agent/pull/20748)) +- **Chinese (zh-CN) README translation** (salvage #13508) ([#20431](https://github.com/NousResearch/hermes-agent/pull/20431)) +- **zh-Hans Docusaurus locale** + Tool Gateway / image-gen / WSL quickstart translations (salvage #11728) ([#20430](https://github.com/NousResearch/hermes-agent/pull/20430)) +- **Tool Gateway docs restructure** — lead with what it does, config moved to bottom ([#20827](https://github.com/NousResearch/hermes-agent/pull/20827)) +- **Quickstart — Onchain AI Garage Hermes tutorials playlist** ([#20192](https://github.com/NousResearch/hermes-agent/pull/20192)) +- **Open WebUI bootstrap script** (salvage #9566) ([#20427](https://github.com/NousResearch/hermes-agent/pull/20427)) +- **Local Ollama setup guide** (salvage #5842) ([#20426](https://github.com/NousResearch/hermes-agent/pull/20426)) +- **Google Gemini guide** (salvage #17450) ([#20401](https://github.com/NousResearch/hermes-agent/pull/20401)) +- **Custom model aliases for /model command** ([#20475](https://github.com/NousResearch/hermes-agent/pull/20475)) +- **Together/Groq/Perplexity cookbook via `custom_providers`** (salvage #15214) ([#20400](https://github.com/NousResearch/hermes-agent/pull/20400)) +- **Doubao speech integration examples** (TTS + STT) (salvage #18065) ([#20418](https://github.com/NousResearch/hermes-agent/pull/20418)) +- **WSL-to-Windows Chrome MCP bridge** (salvage #8313) ([#20428](https://github.com/NousResearch/hermes-agent/pull/20428)) +- **Hermes skills docs sync** — slash commands + durable-systems section ([#20390](https://github.com/NousResearch/hermes-agent/pull/20390)) +- **AGENTS.md — curator/cron/delegation/toolsets + fix plugin tree** ([#20226](https://github.com/NousResearch/hermes-agent/pull/20226)) +- **Bedrock quickstart entry + fallback comment + deployment link** (salvage #11093) ([#20397](https://github.com/NousResearch/hermes-agent/pull/20397)) + +### Docs polish +- Collapse exploding skills tree to a single Skills node ([#18259](https://github.com/NousResearch/hermes-agent/pull/18259)) +- Clarify `session_search` auxiliary model docs ([#19593](https://github.com/NousResearch/hermes-agent/pull/19593)) +- Open WebUI Quick Setup gap fill ([#19654](https://github.com/NousResearch/hermes-agent/pull/19654)) +- Default custom tool creation to plugins (@kshitijk4poor) ([#19755](https://github.com/NousResearch/hermes-agent/pull/19755)) +- Clarify Telegram group chat troubleshooting (salvage #18672) ([#20416](https://github.com/NousResearch/hermes-agent/pull/20416)) +- Codex OAuth auth prerequisite clarification (salvage #18688) ([#20417](https://github.com/NousResearch/hermes-agent/pull/20417)) +- Discord Server Members Intent + SSRC-mapping drift + /voice join slash Choice (salvage #11350) ([#20411](https://github.com/NousResearch/hermes-agent/pull/20411)) +- Document `ctx.dispatch_tool()` (salvage #10955) ([#20391](https://github.com/NousResearch/hermes-agent/pull/20391)) +- Document `hermes webhook subscribe --deliver-only` (salvage #12612) ([#20392](https://github.com/NousResearch/hermes-agent/pull/20392)) +- Document `hermes import` reference (salvage #14711) ([#20396](https://github.com/NousResearch/hermes-agent/pull/20396)) +- Document per-provider TTS `max_text_length` caps (salvage #13825) ([#20389](https://github.com/NousResearch/hermes-agent/pull/20389)) +- Clarify supported prompt customization surfaces (salvage #19987) ([#20383](https://github.com/NousResearch/hermes-agent/pull/20383)) +- Correct `web_extract` summarizer timeout comment (salvage #20051) ([#20381](https://github.com/NousResearch/hermes-agent/pull/20381)) +- Fix fallback provider config paths (salvage #20033) ([#20382](https://github.com/NousResearch/hermes-agent/pull/20382)) +- Fix misleading RL install-extras claim (salvage #19080) ([#21213](https://github.com/NousResearch/hermes-agent/pull/21213)) +- Clarify API server tool execution locality (salvage #19117) ([#21223](https://github.com/NousResearch/hermes-agent/pull/21223)) +- Prefer `.venv` to match AGENTS.md and scripts/run_tests.sh (@xxxigm) ([#21334](https://github.com/NousResearch/hermes-agent/pull/21334)) +- Align tool discovery + test runner with AGENTS.md (@xxxigm) ([#20791](https://github.com/NousResearch/hermes-agent/pull/20791)) +- Align terminal-backend count and naming across docs and code (salvage #19044) ([#20402](https://github.com/NousResearch/hermes-agent/pull/20402)) +- Refresh stale platform counts (salvage #19053) ([#20403](https://github.com/NousResearch/hermes-agent/pull/20403)) + +--- + +## 👥 Contributors + +### Core +- **@teknium1** — salvage, triage, review, feature work, and release management + +### Top Community Contributors + +- **@kshitijk4poor** (21 PRs) — SearXNG native search backend, per-capability backend selection, collapsible TUI startup banner, Slack ephemeral ack + format fixes, Lightpanda fallback hardening, searxng-search optional skill + Web Search + Extract docs, default custom tool creation to plugins, kanban failure-column fix +- **@alt-glitch** (13 PRs) — video_analyze tool, xAI Custom Voices (voice cloning), local-backend CLI launch-directory fix, lazy-session creation regression recovery, systemd unit refresh on gateway boot +- **@OutThisLife** (9 PRs) — TUI perf — overlay render churn reduction, voice push-to-talk parity restoration (salvaging @Montbra) +- **@helix4u** (6 PRs) — Classic CLI output recovery after resize, absolute-path TUI completion, gateway model picker current-context fix, Bedrock credential probe avoidance, kanban docs fixes +- **@ethernet8023** (3 PRs) — Docker CI — don't cancel overlapping builds, :latest guard +- **@benbarclay** (3 PRs) — Docker — launch dashboard as side-process via HERMES_DASHBOARD=1 +- **@austinpickett** (3 PRs) — Dashboard Plugins page, TUI /model picker overhaul with inline auth, kanban button fix +- **@sprmn24** (2 PRs) — Contributor (2 PRs) +- **@asheriif** (2 PRs) — Contributor (2 PRs) +- **@xxxigm** (2 PRs) — Contributing docs — .venv preference and test runner alignment with AGENTS.md +- **@stephenschoettler** (1 PR) — ACP — MCP E2E mock kwargs +- **@vincez-hms-coder** (1 PR) — Dashboard — Profiles management page +- **@cdanis** (1 PR) — Contributor +- **@briandevans** (1 PR) — Toolsets test — kanban assertions post-#17805 +- **@heyitsaamir** (1 PR) — Contributor + +### All Contributors + +Thanks to everyone who contributed to v0.13.0 — commits, co-authored work, and salvaged PRs. 295 contributors in one week. + +@0oAstro, @0xDevNinja, @0xharryriddle, @0xKingBack, @0xsir0000, @0xyg3n, @0z1-ghb, @abhinav11082001-stack, +@acc001k, @acesjohnny, @adamludwin, @adybag14-cyber, @agentlinker, @agilejava, @ai-ag2026, @AJV20, +@alanxchen85, @albert748, @AllardQuek, @alt-glitch, @altmazza0-star, @ambition0802, @amitgaur, @amroessam, +@andrewhosf, @Asce66, @asheriif, @ashermorse, @asimons81, @Aslaaen, @Asunfly, @atongrun, @austinpickett, +@banditburai, @barteqpl, @Bartok9, @Beandon13, @beardthelion, @beibi9966, @benbarclay, @binhnt92, @bjianhang, +@BlackJulySnow, @bobashopcashier, @bogerman1, @Bongulielmi, @Brecht-H, @briandevans, @brooklynnicholson, +@c3115644151, @camaragon, @CashWilliams, @CCClelo, @cdanis, @CES4751, @cg2aigc, @changchun989, @ChanlerDev, +@CharlieKerfoot, @chengoak, @chenyunbo411, @chinadbo, @CIRWEL, @cixuuz, @cmcgrabby-hue, @colorcross, +@Contentment003111, @CoreyNoDream, @counterposition, @curiouscleo, @DaniuXie, @deep-name, @dengtaoyuan450-a11y, +@discodirector, @donramon77, @dpaluy, @ee-blog, @ehz0ah, @el-analista, @elmatadorgh, @EmelyanenkoK, +@Emidomenge, @emozilla, @Es1la, @EthanGuo-coder, @etherman-os, @ethernet8023, @EvilDrag0n, @exxmen, @Fearvox, +@Feranmi10, @firefly, @flobo3, @fmercurio, @Foolafroos, @formulahendry, @franksong2702, @ggnnggez, @GinWU05, +@giwaov, @glesperance, @gnanirahulnutakki, @GodsBoy, @Gosuj, @Grey0202, @guillaumemeyer, @Gutslabs, @h0tp-ftw, +@haidao1919, @halmisen, @happy5318, @hedirman, @helix4u, @hendrixfreire, @HenkDz, @hex-clawd, @heyitsaamir, +@hharry11, @Hinotoi-agent, @holynn-q, @hrkzogw, @Hypn0sis, @Hypnus-Yuan, @ideathinklab01-source, @IMHaoyan, +@Interstellar-code, @ishardo, @jacdevos, @jackey8616, @JanCong, @jasonoutland, @jatingodnani, @JayGwod, +@jethac, @JezzaHehn, @JiaDe-Wu, @jjjojoj, @jkausel-ai, @John-tip, @johnncenae, @jrusso1020, @jslizar, +@JTroyerOvermatch, @julysir, @Junass1, @JustinUssuri, @Kailigithub, @keepcalmqqf, @kiala9, @konsisumer, +@kowenhaoai, @Krionex, @kshitijk4poor, @kyan12, @leavrcn, @leon7609, @LeonSGP43, @leprincep35700, @lhysdl, +@likejudy, @lisanhu, @liu-collab, @liuguangyong93, @liuhao1024, @LucianoSP, @luoyuctl, @luyao618, @M3RCUR2Y, +@maciekczech, @Magicray1217, @magicray1217, @MaHaoHao-ch, @malaiwah, @manateelazycat, @masonjames, @megastary, +@memosr, @MichaelWDanko, @mikeyobrien, @millerc79, @Mind-Dragon, @mioimotoai-lgtm, @misery-hl, @molvikar, +@momowind, @Montbra, @MottledShadow, @mrbob-git, @mrcharlesiv, @mrcoferland, @ms-alan, @mwnickerson, +@nazirulhafiy, @nftpoetrist, @nicoloboschi, @nightq, @nikolay-bratanov, @NikolayGusev-astra, @nocturnum91, +@noOne-list, @nouseman666, @novax635, @npmisantosh, @nudiltoys-cmyk, @olisikh, @oluwadareab12, @Oxidane-bot, +@pama0227, @pander, @pasevin, @paul-tian, @pdonizete, @perlowja, @pingchesu, @PratikRai0101, @priveperfumes, +@probepark, @QifengKuang, @quocanh261997, @qWaitCrypto, @qxxaa, @r266-tech, @rames-jusso, @revaraver, +@Ricardo-M-L, @rob-maron, @Roy-oss1, @rxdxxxx, @SandroHub013, @Sanjays2402, @Sertug17, @shashwatgokhe, +@shellybotmoyer, @SHL0MS, @SimbaKingjoe, @simbam99, @simplenamebox-ops, @socrates1024, @sonic-netizen, +@sprmn24, @steezkelly, @stephen0110, @stephenschoettler, @stevenchanin, @stevenchouai, @stormhierta, +@subtract0, @suncokret12, @swithek, @taeng0204, @TakeshiSawaguchi, @tangyuanjc, @TheEpTic, @thelumiereguy, +@Tkander1715, @tmdgusya, @Tranquil-Flow, @TruaShamu, @UgwujaGeorge, @valda, @vincez-hms-coder, @VinVC, +@vominh1919, @wabrent, @WadydX, @wanazhar, @WanderWang, @warabe1122, @web-dev0521, @WideLee, @willy-scr, +@wmagev, @WuTianyi123, @wxst, @wysie, @Wysie, @xsfX20, @xxxigm, @xyiy001, @YanzhongSu, @ygd58, @Yoimex, +@yuehei, @Yukipukii1, @yuqianma, @YX234, @zeejaytan, @zhanggttry, @zhao0112, @zng8418, @zons-zhaozhy, @Zyproth + +--- + +**Full Changelog**: [v2026.4.30...v2026.5.7](https://github.com/NousResearch/hermes-agent/compare/v2026.4.30...v2026.5.7) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index dd9d75af9c9..c61bb80e471 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -3,13 +3,16 @@ from __future__ import annotations import asyncio +import base64 import contextvars import json import logging import os from collections import defaultdict, deque from concurrent.futures import ThreadPoolExecutor +from pathlib import Path from typing import Any, Deque, Optional +from urllib.parse import unquote, urlparse import acp from acp.schema import ( @@ -18,6 +21,7 @@ from acp.schema import ( AuthenticateResponse, AvailableCommand, AvailableCommandsUpdate, + BlobResourceContents, ClientCapabilities, EmbeddedResourceContentBlock, ForkSessionResponse, @@ -46,6 +50,7 @@ from acp.schema import ( SessionResumeCapabilities, SessionInfo, TextContentBlock, + TextResourceContents, UnstructuredCommandInput, Usage, UsageUpdate, @@ -83,6 +88,272 @@ _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent") # does not expose a client-side limit, so this is a fixed cap that clients # paginate against using `cursor` / `next_cursor`. _LIST_SESSIONS_PAGE_SIZE = 50 +_MAX_ACP_RESOURCE_BYTES = 512 * 1024 +_TEXT_RESOURCE_MIME_PREFIXES = ("text/",) +_TEXT_RESOURCE_MIME_TYPES = { + "application/json", + "application/javascript", + "application/typescript", + "application/xml", + "application/x-yaml", + "application/yaml", + "application/toml", + "application/sql", +} + + +def _resource_display_name(uri: str, name: str | None = None, title: str | None = None) -> str: + """Human-readable attachment name for prompt context.""" + raw_name = (name or "").strip() + raw_title = (title or "").strip() + if raw_title and raw_name and raw_title != raw_name: + return f"{raw_title} ({raw_name})" + if raw_title: + return raw_title + if raw_name: + return raw_name + parsed = urlparse(uri) + candidate = parsed.path if parsed.scheme else uri + return Path(unquote(candidate)).name or uri or "resource" + + +def _is_text_resource(mime_type: str | None) -> bool: + mime = (mime_type or "").split(";", 1)[0].strip().lower() + if not mime: + return False + return mime.startswith(_TEXT_RESOURCE_MIME_PREFIXES) or mime in _TEXT_RESOURCE_MIME_TYPES + + +def _is_image_resource(mime_type: str | None) -> bool: + mime = (mime_type or "").split(";", 1)[0].strip().lower() + return mime.startswith("image/") + + +def _guess_image_mime_from_path(path: Path) -> str | None: + suffix = path.suffix.lower() + return { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".svg": "image/svg+xml", + }.get(suffix) + + +def _image_data_url(data: bytes, mime_type: str) -> str: + return f"data:{mime_type};base64,{base64.b64encode(data).decode('ascii')}" + + +def _path_from_file_uri(uri: str) -> Path | None: + """Convert local file URIs/paths from ACP clients into a readable Path. + + Zed may send POSIX file URIs from Linux/WSL workspaces or Windows-ish paths + when launched through wsl.exe. Translate the common Windows drive form to + /mnt//... so Hermes running in WSL can read it. + """ + raw = (uri or "").strip() + if not raw: + return None + + parsed = urlparse(raw) + if parsed.scheme and parsed.scheme != "file": + return None + + if parsed.scheme == "file": + if parsed.netloc and parsed.netloc not in {"", "localhost"}: + return None + path_text = unquote(parsed.path or "") + else: + path_text = unquote(raw) + + # file:///C:/Users/... or C:\Users\... + if len(path_text) >= 3 and path_text[0] == "/" and path_text[2] == ":" and path_text[1].isalpha(): + drive = path_text[1].lower() + rest = path_text[3:].lstrip("/\\").replace("\\", "/") + return Path("/mnt") / drive / rest + if len(path_text) >= 2 and path_text[1] == ":" and path_text[0].isalpha(): + drive = path_text[0].lower() + rest = path_text[2:].lstrip("/\\").replace("\\", "/") + return Path("/mnt") / drive / rest + + return Path(path_text) + + +def _decode_text_bytes(data: bytes, mime_type: str | None) -> str | None: + """Decode resource bytes if they are probably text; return None for binary.""" + if b"\x00" in data and not _is_text_resource(mime_type): + return None + for encoding in ("utf-8-sig", "utf-8", "latin-1"): + try: + return data.decode(encoding) + except UnicodeDecodeError: + continue + return data.decode("utf-8", errors="replace") + + +def _format_resource_text( + *, + uri: str, + body: str, + name: str | None = None, + title: str | None = None, + note: str | None = None, +) -> str: + display = _resource_display_name(uri, name=name, title=title) + header = f"[Attached file: {display}]" + if note: + header += f" ({note})" + return f"{header}\nURI: {uri}\n\n{body}" + + +def _resource_link_to_parts(block: ResourceContentBlock) -> list[dict[str, Any]]: + """Convert an ACP resource_link block to OpenAI content parts. + + Returns a list of {"type": "text", ...} and/or {"type": "image_url", ...} + parts. Image resources produce an image_url part with a small text header + so the model knows which attachment it is. Non-image resources return a + single text part with the inlined file body (or a binary-omit note). + """ + uri = str(getattr(block, "uri", "") or "").strip() + if not uri: + return [] + + name = str(getattr(block, "name", "") or "").strip() or None + title = str(getattr(block, "title", "") or "").strip() or None + mime_type = str(getattr(block, "mime_type", "") or "").strip() or None + path = _path_from_file_uri(uri) + + if path is None: + return [{ + "type": "text", + "text": _format_resource_text( + uri=uri, + name=name, + title=title, + body="[Resource link only; Hermes cannot read non-file ACP resource URIs directly.]", + ), + }] + + # Image files: emit a short text header + image_url data URL so vision + # models can see the attachment instead of a "binary omitted" note. + image_mime = mime_type if _is_image_resource(mime_type) else _guess_image_mime_from_path(path) + if image_mime and _is_image_resource(image_mime): + try: + size = path.stat().st_size + if size > _MAX_ACP_RESOURCE_BYTES: + return [{ + "type": "text", + "text": _format_resource_text( + uri=uri, + name=name, + title=title, + body=f"[Image too large to inline: {size} bytes, cap={_MAX_ACP_RESOURCE_BYTES}]", + ), + }] + with path.open("rb") as fh: + data = fh.read() + except OSError as exc: + logger.warning("ACP image resource read failed: %s", uri, exc_info=True) + return [{ + "type": "text", + "text": _format_resource_text( + uri=uri, + name=name, + title=title, + body=f"[Could not read attached image: {exc}]", + ), + }] + display = _resource_display_name(uri, name=name, title=title) + return [ + {"type": "text", "text": f"[Attached image: {display}]\nURI: {uri}"}, + {"type": "image_url", "image_url": {"url": _image_data_url(data, image_mime)}}, + ] + + try: + size = path.stat().st_size + read_size = min(size, _MAX_ACP_RESOURCE_BYTES) + with path.open("rb") as fh: + data = fh.read(read_size) + text = _decode_text_bytes(data, mime_type) + if text is None: + return [{ + "type": "text", + "text": _format_resource_text( + uri=uri, + name=name, + title=title, + body=f"[Binary file omitted: {size} bytes, mime={mime_type or 'unknown'}]", + ), + }] + note = None + if size > _MAX_ACP_RESOURCE_BYTES: + note = f"truncated to {_MAX_ACP_RESOURCE_BYTES} of {size} bytes" + return [{ + "type": "text", + "text": _format_resource_text(uri=uri, name=name, title=title, body=text, note=note), + }] + except OSError as exc: + logger.warning("ACP resource read failed: %s", uri, exc_info=True) + return [{ + "type": "text", + "text": _format_resource_text( + uri=uri, + name=name, + title=title, + body=f"[Could not read attached file: {exc}]", + ), + }] + + +def _embedded_resource_to_parts(block: EmbeddedResourceContentBlock) -> list[dict[str, Any]]: + resource = getattr(block, "resource", None) + if resource is None: + return [] + + uri = str(getattr(resource, "uri", "") or "").strip() + mime_type = str(getattr(resource, "mime_type", "") or "").strip() or None + + if isinstance(resource, TextResourceContents): + return [{"type": "text", "text": _format_resource_text(uri=uri, body=resource.text)}] + + if isinstance(resource, BlobResourceContents): + blob = resource.blob or "" + try: + data = base64.b64decode(blob, validate=True) + except Exception: + data = blob.encode("utf-8", errors="replace") + + # Image blobs go through as image_url so vision models can see them. + if _is_image_resource(mime_type): + if len(data) > _MAX_ACP_RESOURCE_BYTES: + return [{ + "type": "text", + "text": _format_resource_text( + uri=uri, + body=f"[Embedded image too large to inline: {len(data)} bytes, cap={_MAX_ACP_RESOURCE_BYTES}]", + ), + }] + display = _resource_display_name(uri) + return [ + {"type": "text", "text": f"[Attached image: {display}]" + (f"\nURI: {uri}" if uri else "")}, + {"type": "image_url", "image_url": {"url": _image_data_url(data, mime_type or "image/png")}}, + ] + + text = _decode_text_bytes(data[:_MAX_ACP_RESOURCE_BYTES], mime_type) + if text is None: + body = f"[Binary embedded file omitted: {len(data)} bytes, mime={mime_type or 'unknown'}]" + else: + body = text + if len(data) > _MAX_ACP_RESOURCE_BYTES: + body += f"\n\n[Truncated to {_MAX_ACP_RESOURCE_BYTES} of {len(data)} bytes]" + return [{"type": "text", "text": _format_resource_text(uri=uri, body=body)}] + + text = getattr(resource, "text", None) + if text: + return [{"type": "text", "text": _format_resource_text(uri=uri, body=str(text))}] + return [] def _extract_text( @@ -144,6 +415,20 @@ def _content_blocks_to_openai_user_content( if image_part is not None: parts.append(image_part) continue + if isinstance(block, ResourceContentBlock): + resource_parts = _resource_link_to_parts(block) + for part in resource_parts: + parts.append(part) + if part.get("type") == "text": + text_parts.append(part["text"]) + continue + if isinstance(block, EmbeddedResourceContentBlock): + resource_parts = _embedded_resource_to_parts(block) + for part in resource_parts: + parts.append(part) + if part.get("type") == "text": + text_parts.append(part["text"]) + continue if not parts: return _extract_text(prompt) @@ -803,6 +1088,7 @@ class HermesACPAgent(acp.Agent): user_text = _extract_text(prompt).strip() user_content = _content_blocks_to_openai_user_content(prompt) + text_only_prompt = all(isinstance(block, TextContentBlock) for block in prompt) has_content = bool(user_text) or ( isinstance(user_content, list) and bool(user_content) ) @@ -821,7 +1107,7 @@ class HermesACPAgent(acp.Agent): # silently append to state.queued_prompts and respond with # "No active turn — queued for the next turn", which looks like # /queue even though the user never typed /queue. - if isinstance(user_content, str) and user_text.startswith("/steer"): + if text_only_prompt and isinstance(user_content, str) and user_text.startswith("/steer"): steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else "" interrupted_prompt = "" rewrite_idle = False @@ -846,7 +1132,7 @@ class HermesACPAgent(acp.Agent): # Slash commands are text-only; if the client included images/resources, # send the whole multimodal prompt to the agent instead of treating it as # an ACP command. - if isinstance(user_content, str) and user_text.startswith("/"): + if text_only_prompt and isinstance(user_content, str) and user_text.startswith("/"): response_text = self._handle_slash_command(user_text, state) if response_text is not None: if self._conn: diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index bb1b33fcc82..eb6b3e79adf 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -231,33 +231,30 @@ def _supports_fast_mode(model: str) -> bool: return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS) -# Beta headers for enhanced features (sent with ALL auth types). -# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the +# Beta headers for enhanced features that are safe on ordinary/native Anthropic +# requests. As of Opus 4.7 (2026-04-16), these are GA on Claude 4.6+ — the # beta headers are still accepted (harmless no-op) but not required. Kept -# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints -# that still gate on the headers continue to get the enhanced features. +# here so older Claude (4.5, 4.1) + compatible endpoints that still gate on +# the headers continue to get the enhanced features. # -# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7 -# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on -# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still -# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus -# at 200K even though model_metadata.py advertises 1M. The header is a harmless -# no-op on endpoints where 1M is GA. +# Do NOT include ``context-1m-2025-08-07`` here. Anthropic returns HTTP 400 +# ("long context beta is not yet available for this subscription") for +# accounts without the long-context beta, which breaks normal short auxiliary +# calls like title generation/session summarization. # -# Migration guide: remove these if you no longer support ≤4.5 models or once -# Bedrock/Azure promote 1M to GA. +# ``context-1m-2025-08-07`` is still required to unlock the 1M context window +# on Claude Opus 4.6/4.7 and Sonnet 4.6 when served via AWS Bedrock or Azure +# AI Foundry. Add it only for those endpoint-specific paths below. _COMMON_BETAS = [ "interleaved-thinking-2025-05-14", "fine-grained-tool-streaming-2025-05-14", - "context-1m-2025-08-07", ] # MiniMax's Anthropic-compatible endpoints fail tool-use requests when # the fine-grained tool streaming beta is present. Omit it so tool calls # fall back to the provider's default response path. _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14" -# 1M context beta — see comment on _COMMON_BETAS above. Stripped for -# Bearer-auth (MiniMax) endpoints since they host their own models and -# unknown Anthropic beta headers risk request rejection. +# 1M context beta. Native Anthropic does not get this by default because some +# subscriptions reject it, but Bedrock/Azure still need it for 1M context. _CONTEXT_1M_BETA = "context-1m-2025-08-07" # Fast mode beta — enables the ``speed: "fast"`` request parameter for @@ -476,6 +473,14 @@ def _requires_bearer_auth(base_url: str | None) -> bool: return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")) +def _base_url_needs_context_1m_beta(base_url: str | None) -> bool: + """Return True for endpoints that still gate 1M context behind a beta.""" + normalized = _normalize_base_url_text(base_url).lower() + if not normalized: + return False + return "azure.com" in normalized + + def _common_betas_for_base_url( base_url: str | None, *, @@ -485,27 +490,25 @@ def _common_betas_for_base_url( MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests that include Anthropic's ``fine-grained-tool-streaming`` beta — every - tool-use message triggers a connection error. Strip that beta for - Bearer-auth endpoints while keeping all other betas intact. + tool-use message triggers a connection error. - The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth - endpoints — MiniMax hosts its own models, not Claude, so the header is - irrelevant at best and risks request rejection at worst. + The ``context-1m-2025-08-07`` beta is not sent to native Anthropic by + default because some subscriptions reject it. Add it only for endpoint + families that still require it for 1M context, currently Azure AI Foundry. + Bedrock uses its own client helper below and opts in explicitly. - ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on - otherwise-unrelated endpoints. The OAuth retry path flips this flag after - a subscription rejects the beta with - "The long context beta is not yet available for this subscription" so - subsequent requests in the same session don't repeat the probe. See the - reactive recovery loop in ``run_agent.py`` and issue-comment history on - PR #17680 for the full rationale. + ``drop_context_1m_beta=True`` strips the 1M-context beta from any path that + would otherwise include it after a subscription/endpoint rejects the beta. """ + betas = list(_COMMON_BETAS) + if _base_url_needs_context_1m_beta(base_url) and not drop_context_1m_beta: + betas.append(_CONTEXT_1M_BETA) if _requires_bearer_auth(base_url): _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA} - return [b for b in _COMMON_BETAS if b not in _stripped] + return [b for b in betas if b not in _stripped] if drop_context_1m_beta: - return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] - return _COMMON_BETAS + return [b for b in betas if b != _CONTEXT_1M_BETA] + return betas def build_anthropic_client( @@ -642,7 +645,7 @@ def build_anthropic_bedrock_client(region: str): return _anthropic_sdk.AnthropicBedrock( aws_region=region, timeout=Timeout(timeout=900.0, connect=10.0), - default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)}, + default_headers={"anthropic-beta": ",".join([*_COMMON_BETAS, _CONTEXT_1M_BETA])}, ) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 1e3d39c7ba5..bd4e6be4579 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -455,6 +455,12 @@ def _to_openai_base_url(base_url: str) -> str: """ url = str(base_url or "").strip().rstrip("/") if url.endswith("/anthropic"): + # ZAI (open.bigmodel.cn) uses /api/anthropic for Anthropic wire + # but /api/paas/v4 for OpenAI wire — the generic /v1 rewrite is wrong. + if "open.bigmodel.cn" in url or "bigmodel" in url: + rewritten = url[: -len("/anthropic")] + "/paas/v4" + logger.debug("Auxiliary client: rewrote ZAI base URL %s → %s", url, rewritten) + return rewritten rewritten = url[: -len("/anthropic")] + "/v1" logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten) return rewritten @@ -596,6 +602,14 @@ class _CodexCompletionsAdapter: "store": False, } + # Preserve the chat.completions timeout contract. This adapter is used + # by auxiliary calls such as context compression; if the timeout is not + # forwarded and enforced, a Codex Responses stream can sit behind a + # dead-looking CLI until the user force-interrupts the whole session. + timeout = kwargs.get("timeout") + if timeout is not None: + resp_kwargs["timeout"] = timeout + # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT # support max_output_tokens or temperature — omit to avoid 400 errors. @@ -653,6 +667,37 @@ class _CodexCompletionsAdapter: text_parts: List[str] = [] tool_calls_raw: List[Any] = [] usage = None + total_timeout = timeout if isinstance(timeout, (int, float)) and timeout > 0 else None + deadline = time.monotonic() + float(total_timeout) if total_timeout else None + timed_out = threading.Event() + timeout_timer: Optional[threading.Timer] = None + + def _timeout_message() -> str: + return f"Codex auxiliary Responses stream exceeded {float(total_timeout):.1f}s total timeout" + + def _close_client_on_timeout() -> None: + timed_out.set() + close = getattr(self._client, "close", None) + if callable(close): + try: + close() + except Exception: + logger.debug("Codex auxiliary: client close during timeout failed", exc_info=True) + + def _check_cancelled() -> None: + if deadline is not None and time.monotonic() >= deadline: + timed_out.set() + raise TimeoutError(_timeout_message()) + try: + from tools.interrupt import is_interrupted + if is_interrupted(): + raise InterruptedError("Codex auxiliary Responses stream interrupted") + except InterruptedError: + raise + except Exception: + # Interrupt state is a best-effort UX hook; never make it a + # new failure mode for auxiliary calls. + pass try: # Collect output items and text deltas during streaming — @@ -661,8 +706,14 @@ class _CodexCompletionsAdapter: collected_output_items: List[Any] = [] collected_text_deltas: List[str] = [] has_function_calls = False + if total_timeout: + timeout_timer = threading.Timer(float(total_timeout), _close_client_on_timeout) + timeout_timer.daemon = True + timeout_timer.start() + _check_cancelled() with self._client.responses.stream(**resp_kwargs) as stream: for _event in stream: + _check_cancelled() _etype = getattr(_event, "type", "") if _etype == "response.output_item.done": _done = getattr(_event, "item", None) @@ -674,6 +725,7 @@ class _CodexCompletionsAdapter: collected_text_deltas.append(_delta) elif "function_call" in _etype: has_function_calls = True + _check_cancelled() final = stream.get_final_response() # Backfill empty output from collected stream events @@ -733,8 +785,13 @@ class _CodexCompletionsAdapter: total_tokens=getattr(resp_usage, "total_tokens", 0), ) except Exception as exc: + if timed_out.is_set(): + raise TimeoutError(_timeout_message()) from exc logger.debug("Codex auxiliary Responses API call failed: %s", exc) raise + finally: + if timeout_timer is not None: + timeout_timer.cancel() content = "".join(text_parts).strip() or None @@ -828,7 +885,14 @@ class _AnthropicCompletionsAdapter: model = kwargs.get("model", self._model) tools = kwargs.get("tools") tool_choice = kwargs.get("tool_choice") - max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000 + # ZAI's Anthropic-compatible endpoint rejects max_tokens on vision + # models (glm-4v-flash etc.) with error code 1210. When the caller + # signals this by setting _skip_zai_max_tokens in kwargs, omit it. + _skip_mt = kwargs.pop("_skip_zai_max_tokens", False) + if _skip_mt: + max_tokens = None + else: + max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000 temperature = kwargs.get("temperature") normalized_tool_choice = None @@ -2835,6 +2899,33 @@ def resolve_vision_provider_client( ) return _finalize(requested, sync_client, default_model) + # ZAI vision models must use the OpenAI-compatible endpoint, not the + # Anthropic-compatible one (which may be the main-runtime default). + # The Anthropic wire rejects max_tokens on multimodal calls (error 1210), + # while the OpenAI wire handles it correctly. + if requested == "zai" and not resolved_base_url: + zai_openai_urls = [ + "https://open.bigmodel.cn/api/paas/v4", + "https://api.z.ai/api/paas/v4", + ] + for _zai_url in zai_openai_urls: + client, final_model = _get_cached_client( + requested, resolved_model, async_mode, + base_url=_zai_url, + api_key=resolved_api_key or None, + api_mode="chat_completions", + is_vision=True, + ) + if client is not None: + return _finalize(requested, client, final_model) + # Fallback: try without explicit base_url (old behavior) + client, final_model = _get_cached_client(requested, resolved_model, async_mode, + api_mode=resolved_api_mode, + is_vision=True) + if client is None: + return requested, None, None + return requested, client, final_model + client, final_model = _get_cached_client(requested, resolved_model, async_mode, api_mode=resolved_api_mode, is_vision=True) @@ -2862,10 +2953,11 @@ def auxiliary_max_tokens_param(value: int) -> dict: """ custom_base = _current_custom_base_url() or_key = os.getenv("OPENROUTER_API_KEY") - # Only use max_completion_tokens for direct OpenAI custom endpoints + # Use max_completion_tokens for direct OpenAI-compatible providers that reject + # max_tokens on newer GPT-4o/o-series/GPT-5-style models. if (not or_key and _read_nous_auth() is None - and base_url_hostname(custom_base) == "api.openai.com"): + and base_url_hostname(custom_base) in {"api.openai.com", "api.githubcopilot.com"}): return {"max_completion_tokens": value} return {"max_tokens": value} @@ -3393,7 +3485,16 @@ def _build_call_kwargs( if max_tokens is not None: # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens. - if provider == "custom": + # ZAI vision models (glm-4v-flash, glm-4v-plus, etc.) reject max_tokens with + # error code 1210 ("API 调用参数有误") on multimodal requests — skip it. + _model_lower = (model or "").lower() + _skip_max_tokens = ( + provider == "zai" + and ("4v" in _model_lower or "5v" in _model_lower or "-v" in _model_lower) + ) + if _skip_max_tokens: + pass # ZAI vision models do not accept max_tokens + elif provider == "custom": custom_base = base_url or _current_custom_base_url() if base_url_hostname(custom_base) == "api.openai.com": kwargs["max_completion_tokens"] = max_tokens @@ -3624,13 +3725,23 @@ def call_llm( kwargs = retry_kwargs err_str = str(first_err) + # ZAI vision models (glm-4v-flash etc.) return error code 1210 + # ("API 调用参数有误") when max_tokens is passed on multimodal + # calls. The error message does NOT contain "max_tokens" so the + # generic retry below never fires. Detect the ZAI-specific error + # and strip max_tokens before retrying. + _is_zai_param_error = ( + "1210" in err_str + and "bigmodel" in str(getattr(client, "base_url", "")) + ) if max_tokens is not None and ( "max_tokens" in err_str or "unsupported_parameter" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens") + or _is_zai_param_error ): kwargs.pop("max_tokens", None) - kwargs["max_completion_tokens"] = max_tokens + kwargs.pop("max_completion_tokens", None) try: return _validate_llm_response( client.chat.completions.create(**kwargs), task) @@ -3930,13 +4041,23 @@ async def async_call_llm( kwargs = retry_kwargs err_str = str(first_err) + # ZAI vision models (glm-4v-flash etc.) return error code 1210 + # ("API 调用参数有误") when max_tokens is passed on multimodal + # calls. The error message does NOT contain "max_tokens" so the + # generic retry below never fires. Detect the ZAI-specific error + # and strip max_tokens before retrying. + _is_zai_param_error = ( + "1210" in err_str + and "bigmodel" in str(getattr(client, "base_url", "")) + ) if max_tokens is not None and ( "max_tokens" in err_str or "unsupported_parameter" in err_str or _is_unsupported_parameter_error(first_err, "max_tokens") + or _is_zai_param_error ): kwargs.pop("max_tokens", None) - kwargs["max_completion_tokens"] = max_tokens + kwargs.pop("max_completion_tokens", None) try: return _validate_llm_response( await client.chat.completions.create(**kwargs), task) diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py index c1dc6bb979c..34eebd73ba8 100644 --- a/agent/bedrock_adapter.py +++ b/agent/bedrock_adapter.py @@ -631,11 +631,18 @@ def normalize_converse_response(response: Dict) -> SimpleNamespace: stop_reason = response.get("stopReason", "end_turn") text_parts = [] + reasoning_parts = [] tool_calls = [] for block in content_blocks: if "text" in block: text_parts.append(block["text"]) + elif "reasoningContent" in block: + reasoning = block["reasoningContent"] + if isinstance(reasoning, dict): + thinking_text = reasoning.get("text", "") + if thinking_text: + reasoning_parts.append(str(thinking_text)) elif "toolUse" in block: tu = block["toolUse"] tool_calls.append(SimpleNamespace( @@ -652,6 +659,7 @@ def normalize_converse_response(response: Dict) -> SimpleNamespace: role="assistant", content="\n".join(text_parts) if text_parts else None, tool_calls=tool_calls if tool_calls else None, + reasoning_content="\n\n".join(reasoning_parts) if reasoning_parts else None, ) # Build usage stats @@ -732,6 +740,7 @@ def stream_converse_with_callbacks( ``normalize_converse_response()``. """ text_parts: List[str] = [] + reasoning_parts: List[str] = [] tool_calls: List[SimpleNamespace] = [] current_tool: Optional[Dict] = None current_text_buffer: List[str] = [] @@ -777,8 +786,10 @@ def stream_converse_with_callbacks( reasoning = delta["reasoningContent"] if isinstance(reasoning, dict): thinking_text = reasoning.get("text", "") - if thinking_text and on_reasoning_delta: - on_reasoning_delta(thinking_text) + if thinking_text: + reasoning_parts.append(str(thinking_text)) + if on_reasoning_delta: + on_reasoning_delta(thinking_text) elif "contentBlockStop" in event: if current_tool is not None: @@ -817,6 +828,7 @@ def stream_converse_with_callbacks( role="assistant", content="\n".join(text_parts) if text_parts else None, tool_calls=tool_calls if tool_calls else None, + reasoning_content="\n\n".join(reasoning_parts) if reasoning_parts else None, ) usage = SimpleNamespace( diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 4212085fc67..80b0a9b45b1 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -6,8 +6,7 @@ protecting head and tail context. Improvements over v2: - Structured summary template with Resolved/Pending question tracking - - Summarizer preamble: "Do not respond to any questions" (from OpenCode) - - Handoff framing: "different assistant" (from Codex) to create separation + - Filter-safe summarizer preamble that treats prior turns as source material - "Remaining Work" replaces "Next Steps" to avoid reading as active instructions - Clear separator when summary merges into tail message - Iterative summary updates (preserves info across multiple compactions) @@ -755,15 +754,14 @@ class ContextCompressor(ContextEngine): content_to_summarize = self._serialize_for_summary(turns_to_summarize) # Preamble shared by both first-compaction and iterative-update prompts. - # Inspired by OpenCode's "do not respond to any questions" instruction - # and Codex's "another language model" framing. + # Keep the wording deliberately plain: Azure/OpenAI-compatible content + # filters have flagged stronger "injection" / "do not respond" framing. _summarizer_preamble = ( "You are a summarization agent creating a context checkpoint. " - "Your output will be injected as reference material for a DIFFERENT " - "assistant that continues the conversation. " - "Do NOT respond to any questions or requests in the conversation — " - "only output the structured summary. " - "Do NOT include any preamble, greeting, or prefix. " + "Treat the conversation turns below as source material for a " + "compact record of prior work. " + "Produce only the structured summary; do not add a greeting, " + "preamble, or prefix. " "Write the summary in the same language the user was using in the " "conversation — do not translate or switch to English. " "NEVER include API keys, tokens, passwords, secrets, credentials, " @@ -777,7 +775,7 @@ class ContextCompressor(ContextEngine): [THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or task assignment verbatim — the exact words they used. If multiple tasks were requested and only some are done, list only the ones NOT yet completed. -The next assistant must pick up exactly here. Example: +Continuation should pick up exactly here. Example: "User asked: 'Now refactor the auth module to use JWT instead of sessions'" If no outstanding task exists, write "None."] @@ -814,7 +812,7 @@ Be specific with file paths, commands, line numbers, and results.] [Important technical decisions and WHY they were made] ## Resolved Questions -[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them] +[Questions the user asked that were ALREADY answered — include the answer so it is not repeated] ## Pending User Asks [Questions or requests from the user that have NOT yet been answered or fulfilled. If none, write "None."] @@ -851,7 +849,7 @@ Update the summary using this exact structure. PRESERVE all existing information # First compaction: summarize from scratch prompt = f"""{_summarizer_preamble} -Create a structured handoff summary for a different assistant that will continue this conversation after earlier turns are compacted. The next assistant should be able to understand what happened without re-reading the original turns. +Create a structured checkpoint summary for the conversation after earlier turns are compacted. The summary should preserve enough detail for continuity without re-reading the original turns. TURNS TO SUMMARIZE: {content_to_summarize} diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 027defa22b9..457b32b37be 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -477,8 +477,8 @@ class CopilotACPClient: proc.stdin.write(json.dumps(payload) + "\n") proc.stdin.flush() - deadline = time.time() + timeout_seconds - while time.time() < deadline: + deadline = time.monotonic() + timeout_seconds + while time.monotonic() < deadline: if proc.poll() is not None: break try: diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 27a16bd435c..0043c70ca29 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -68,8 +68,10 @@ SUPPORTED_POOL_STRATEGIES = { } # Cooldown before retrying an exhausted credential. -# 429 (rate-limited) and 402 (billing/quota) both cool down after 1 hour. +# Transient 401 auth failures cool down briefly so single-key setups can recover. +# 429 (rate-limited), 402 (billing/quota), and other failures cool down after 1 hour. # Provider-supplied reset_at timestamps override these defaults. +EXHAUSTED_TTL_401_SECONDS = 5 * 60 # 5 minutes EXHAUSTED_TTL_429_SECONDS = 60 * 60 # 1 hour EXHAUSTED_TTL_DEFAULT_SECONDS = 60 * 60 # 1 hour @@ -190,6 +192,8 @@ def _is_manual_source(source: str) -> bool: def _exhausted_ttl(error_code: Optional[int]) -> int: """Return cooldown seconds based on the HTTP status that caused exhaustion.""" + if error_code == 401: + return EXHAUSTED_TTL_401_SECONDS if error_code == 429: return EXHAUSTED_TTL_429_SECONDS return EXHAUSTED_TTL_DEFAULT_SECONDS @@ -305,14 +309,29 @@ def _iter_custom_providers(config: Optional[dict] = None): yield _normalize_custom_pool_name(name), entry -def get_custom_provider_pool_key(base_url: str) -> Optional[str]: +def get_custom_provider_pool_key(base_url: str, provider_name: Optional[str] = None) -> Optional[str]: """Look up the custom_providers list in config.yaml and return 'custom:' for a matching base_url. + When provider_name is given, prefer matching by name first (solving the case where + multiple custom providers share the same base_url but have different API keys). + Falls back to base_url matching when no name match is found. + Returns None if no match is found. """ if not base_url: return None normalized_url = base_url.strip().rstrip("/") + + # When a provider name is given, try to match by name first. + # This fixes the P1 bug where two custom providers sharing the same + # base_url always resolve to the first one's credentials. + if provider_name: + normalized_name = _normalize_custom_pool_name(provider_name) + for norm_name, entry in _iter_custom_providers(): + if norm_name == normalized_name: + return f"{CUSTOM_POOL_PREFIX}{norm_name}" + + # Fall back to base_url matching (original behavior) for norm_name, entry in _iter_custom_providers(): entry_url = str(entry.get("base_url") or "").strip().rstrip("/") if entry_url and entry_url == normalized_url: diff --git a/agent/display.py b/agent/display.py index 474595d76c0..1dd65c3514f 100644 --- a/agent/display.py +++ b/agent/display.py @@ -852,13 +852,15 @@ def get_cute_tool_message( s = str(s) if _tool_preview_max_len == 0: return s # no limit - return (s[:n-3] + "...") if len(s) > n else s + limit = _tool_preview_max_len + return (s[:limit-3] + "...") if len(s) > limit else s def _path(p, n=35): p = str(p) if _tool_preview_max_len == 0: return p # no limit - return ("..." + p[-(n-3):]) if len(p) > n else p + limit = _tool_preview_max_len + return ("..." + p[-(limit-3):]) if len(p) > limit else p def _wrap(line: str) -> str: """Apply skin tool prefix and failure suffix.""" diff --git a/agent/image_routing.py b/agent/image_routing.py index bd2ba83c87a..0b6687787a0 100644 --- a/agent/image_routing.py +++ b/agent/image_routing.py @@ -144,7 +144,51 @@ def decide_image_input_mode( # it fires, which is cheaper than permanent quality loss. -def _guess_mime(path: Path) -> str: +def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]: + """Detect image MIME from magic bytes. Returns None if unrecognised. + + Filename-based detection (``mimetypes.guess_type``) is unreliable when + upstream platforms lie about content-type. Discord, for example, can + serve a PNG with ``content_type=image/webp`` for proxied/animated + stickers, custom emoji previews, or images uploaded via certain bots. + Anthropic strictly validates that declared media_type matches the + actual bytes and returns HTTP 400 on mismatch, so we sniff to be safe. + """ + if not raw: + return None + # PNG: 89 50 4E 47 0D 0A 1A 0A + if raw.startswith(b"\x89PNG\r\n\x1a\n"): + return "image/png" + # JPEG: FF D8 FF + if raw.startswith(b"\xff\xd8\xff"): + return "image/jpeg" + # GIF87a / GIF89a + if raw[:6] in (b"GIF87a", b"GIF89a"): + return "image/gif" + # WEBP: "RIFF" .... "WEBP" + if len(raw) >= 12 and raw[:4] == b"RIFF" and raw[8:12] == b"WEBP": + return "image/webp" + # BMP: "BM" + if raw.startswith(b"BM"): + return "image/bmp" + # HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc. + if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in ( + b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis", + ): + return "image/heic" + return None + + +def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str: + """Return image MIME type for *path*. + + If *raw* bytes are provided, magic-byte sniffing wins (authoritative). + Otherwise we fall back to ``mimetypes`` then suffix-based defaults. + """ + if raw is not None: + sniffed = _sniff_mime_from_bytes(raw) + if sniffed: + return sniffed mime, _ = mimetypes.guess_type(str(path)) if mime and mime.startswith("image/"): return mime @@ -178,7 +222,7 @@ def _file_to_data_url(path: Path) -> Optional[str]: except Exception as exc: logger.warning("image_routing: failed to read %s — %s", path, exc) return None - mime = _guess_mime(path) + mime = _guess_mime(path, raw=raw) b64 = base64.b64encode(raw).decode("ascii") return f"data:{mime};base64,{b64}" @@ -190,24 +234,30 @@ def build_native_content_parts( """Build an OpenAI-style ``content`` list for a user turn. Shape: - [{"type": "text", "text": "..."}, + [{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}, ...] + The local path of each successfully attached image is appended to the + text part as ``[Image attached at: ]``. The model still sees the + pixels via the ``image_url`` part (full native vision); the path note + just gives it a string handle so MCP/skill tools that take an image + path or URL argument can be invoked on the same image without an + extra round-trip. This parallels the text-mode hint produced by + ``Runner._enrich_message_with_vision`` (``vision_analyze using image_url: + ``) so behaviour is consistent across both image input modes. + Images are attached at their native size. If a provider rejects the request because an image is too large (e.g. Anthropic's 5 MB per-image ceiling), the agent's retry loop transparently shrinks and retries once — see ``run_agent._try_shrink_image_parts_in_messages``. Returns (content_parts, skipped_paths). Skipped paths are files that - couldn't be read from disk. + couldn't be read from disk and are NOT advertised in the path hints. """ - parts: List[Dict[str, Any]] = [] skipped: List[str] = [] - - text = (user_text or "").strip() - if text: - parts.append({"type": "text", "text": text}) + image_parts: List[Dict[str, Any]] = [] + attached_paths: List[str] = [] for raw_path in image_paths: p = Path(raw_path) @@ -218,15 +268,30 @@ def build_native_content_parts( if not data_url: skipped.append(str(raw_path)) continue - parts.append({ + image_parts.append({ "type": "image_url", "image_url": {"url": data_url}, }) + attached_paths.append(str(raw_path)) - # If the text was empty, add a neutral prompt so the turn isn't just images. - if not text and any(p.get("type") == "image_url" for p in parts): - parts.insert(0, {"type": "text", "text": "What do you see in this image?"}) + text = (user_text or "").strip() + # If at least one image attached, build a single text part that combines + # the user's caption (or a neutral default) with one path hint per image. + if attached_paths: + base_text = text or "What do you see in this image?" + path_hints = "\n".join( + f"[Image attached at: {p}]" for p in attached_paths + ) + combined_text = f"{base_text}\n\n{path_hints}" + parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}] + parts.extend(image_parts) + return parts, skipped + + # No images successfully attached — fall back to plain text-only behaviour. + parts = [] + if text: + parts.append({"type": "text", "text": text}) return parts, skipped diff --git a/agent/models_dev.py b/agent/models_dev.py index 79cfa90ca95..0ef18f4ce1f 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -381,14 +381,18 @@ def get_model_capabilities(provider: str, model: str) -> Optional[ModelCapabilit # Extract capability flags (default to False if missing) supports_tools = bool(entry.get("tool_call", False)) - # Vision: check both the `attachment` flag and `modalities.input` for "image". - # Some models (e.g. gemma-4) list image in input modalities but not attachment. + # Vision: prefer explicit `modalities.input` when models.dev provides it. + # The older `attachment` flag can be stale or too broad for image routing; + # fall back to it only when the input modalities are absent/invalid. input_mods = entry.get("modalities", {}) if isinstance(input_mods, dict): - input_mods = input_mods.get("input", []) + input_mods = input_mods.get("input") else: - input_mods = [] - supports_vision = bool(entry.get("attachment", False)) or "image" in input_mods + input_mods = None + if isinstance(input_mods, list): + supports_vision = "image" in input_mods + else: + supports_vision = bool(entry.get("attachment", False)) supports_reasoning = bool(entry.get("reasoning", False)) # Extract limits diff --git a/agent/redact.py b/agent/redact.py index afdee652888..1ac284cffd4 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -56,12 +56,15 @@ _SENSITIVE_BODY_KEYS = frozenset({ }) # Snapshot at import time so runtime env mutations (e.g. LLM-generated -# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction -# mid-session. OFF by default — user must opt in via -# `security.redact_secrets: true` in config.yaml (bridged to this env var -# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true` -# in ~/.hermes/.env. -_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on") +# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction +# mid-session. ON by default — secure default per issue #17691. Users who +# need raw credential values in tool output (e.g. working on the redactor +# itself) can opt out via `security.redact_secrets: false` in config.yaml +# (bridged to this env var in hermes_cli/main.py, gateway/run.py, and +# cli.py) or `HERMES_REDACT_SECRETS=false` in ~/.hermes/.env. An opt-out +# warning is logged at gateway and CLI startup so operators see the +# downgrade — see `_log_redaction_status()` in gateway/run.py and cli.py. +_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "true").lower() in ("1", "true", "yes", "on") # Known API key prefixes -- match the prefix + contiguous token chars _PREFIX_PATTERNS = [ diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py index 746f9620979..467b72931c2 100644 --- a/agent/usage_pricing.py +++ b/agent/usage_pricing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from dataclasses import dataclass from datetime import datetime, timezone from decimal import Decimal @@ -82,6 +83,121 @@ _UTC_NOW = lambda: datetime.now(timezone.utc) # Official docs snapshot entries. Models whose published pricing and cache # semantics are stable enough to encode exactly. _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { + # ── Anthropic Claude 4.7 ───────────────────────────────────────────── + # Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more + # tokens for the same text). + # Source: https://platform.claude.com/docs/en/about-claude/pricing + ( + "anthropic", + "claude-opus-4-7", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-opus-4-7-20250507", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + # ── Anthropic Claude 4.6 ───────────────────────────────────────────── + ( + "anthropic", + "claude-opus-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-opus-4-6-20250414", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-sonnet-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-sonnet-4-6-20250414", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + # ── Anthropic Claude 4.5 ───────────────────────────────────────────── + ( + "anthropic", + "claude-opus-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-sonnet-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-haiku-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("1.00"), + output_cost_per_million=Decimal("5.00"), + cache_read_cost_per_million=Decimal("0.10"), + cache_write_cost_per_million=Decimal("1.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + # ── Anthropic Claude 4 / 4.1 ───────────────────────────────────────── ( "anthropic", "claude-opus-4-20250514", @@ -91,8 +207,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("1.50"), cache_write_cost_per_million=Decimal("18.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-prompt-caching-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -103,8 +219,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.30"), cache_write_cost_per_million=Decimal("3.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-prompt-caching-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), # OpenAI ( @@ -184,7 +300,7 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { source_url="https://openai.com/api/pricing/", pricing_version="openai-pricing-2026-03-16", ), - # Anthropic older models (pre-4.6 generation) + # ── Anthropic older models (pre-4.5 generation) ──────────────────────── ( "anthropic", "claude-3-5-sonnet-20241022", @@ -194,8 +310,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.30"), cache_write_cost_per_million=Decimal("3.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -206,8 +322,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.08"), cache_write_cost_per_million=Decimal("1.00"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -218,8 +334,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("1.50"), cache_write_cost_per_million=Decimal("18.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -230,8 +346,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.03"), cache_write_cost_per_million=Decimal("0.30"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), # DeepSeek ( @@ -426,8 +542,37 @@ def resolve_billing_route( return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown") +def _normalize_anthropic_model_name(model: str) -> str: + """Normalize Anthropic model name variants to canonical form. + + Handles: + - Dot notation: claude-opus-4.7 → claude-opus-4-7 + - Short aliases: claude-opus-4.7 → claude-opus-4-7 + - Strips anthropic/ prefix if present + """ + name = model.lower().strip() + if name.startswith("anthropic/"): + name = name[len("anthropic/"):] + # Normalize dots to dashes in version numbers (e.g. 4.7 → 4-7, 4.6 → 4-6) + # But preserve the rest of the name structure + name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name) + return name + + def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]: - return _OFFICIAL_DOCS_PRICING.get((route.provider, route.model.lower())) + model = route.model.lower() + # Direct lookup first + entry = _OFFICIAL_DOCS_PRICING.get((route.provider, model)) + if entry: + return entry + # Try normalized name for Anthropic (handles dot-notation like opus-4.7) + if route.provider == "anthropic": + normalized = _normalize_anthropic_model_name(model) + if normalized != model: + entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized)) + if entry: + return entry + return None def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]: diff --git a/apps/dashboard/src/components/ChatSidebar.tsx b/apps/dashboard/src/components/ChatSidebar.tsx index 1c923112889..38f1cf80abd 100644 --- a/apps/dashboard/src/components/ChatSidebar.tsx +++ b/apps/dashboard/src/components/ChatSidebar.tsx @@ -303,7 +303,7 @@ export function ChatSidebar({ channel, className }: ChatSidebarProps) { return (