Merge origin/main into pr-27248 (resolving run_agent.py = ours)

run_agent.py taken from HEAD (the extracted forwarder structure). The 25
run_agent.py fixes that landed on main during the PR's life need to be
ported into the agent/* extracted modules in follow-up commits.
This commit is contained in:
teknium1 2026-05-16 23:16:52 -07:00
commit 152d42d1a7
No known key found for this signature in database
355 changed files with 32716 additions and 4195 deletions

58
.github/workflows/history-check.yml vendored Normal file
View file

@ -0,0 +1,58 @@
name: History Check
# Rejects PRs whose branch has no common ancestor with main.
#
# In May 2026 PR #25045 was merged from a branch that had been disconnected
# from main's history (likely an accidental `git checkout --orphan` or
# `.git/` re-init). GitHub's merge UI does not refuse merges of unrelated
# histories, so the PR landed cleanly with the intended one-file change —
# but its parent-less root commit (413990c94) got grafted into main as a
# second root, and ~1500 files' worth of `git blame` history collapsed
# onto that single commit.
#
# This check catches the failure mode by requiring `git merge-base` between
# the PR head and main to be non-empty.
on:
pull_request:
branches: [main]
permissions:
contents: read
jobs:
check-common-ancestor:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0 # full history both sides for merge-base
- name: Reject PRs with no common ancestor on main
run: |
# `git merge-base` exits non-zero AND prints nothing when the two
# commits share no ancestor. We check both conditions explicitly
# so the failure message is clear regardless of which signal fires
# first.
if ! BASE=$(git merge-base origin/main HEAD 2>/dev/null) || [ -z "$BASE" ]; then
echo ""
echo "::error::This PR has no common ancestor with main."
echo ""
echo "Your branch's history is disconnected from main. Common causes:"
echo " - the branch was created with 'git checkout --orphan'"
echo " - '.git/' was re-initialized at some point during the work"
echo " - the branch was force-pushed from an unrelated repository"
echo ""
echo "Merging an unrelated-history PR grafts a parent-less root commit"
echo "into main and collapses git blame for every file in that snapshot."
echo "Reference: PR #25045 caused this and re-rooted blame on ~1500"
echo "files to a single orphan commit."
echo ""
echo "To fix, rebase your changes onto current main:"
echo " git fetch origin main"
echo " git checkout -b fix-branch origin/main"
echo " # re-apply your changes (cherry-pick, copy files, etc.)"
echo " git push -f origin fix-branch"
exit 1
fi
echo "::notice::Common ancestor with main: $BASE"

View file

@ -11,6 +11,7 @@ on:
- '**/sitecustomize.py'
- '**/usercustomize.py'
- '**/__init__.pth'
- 'pyproject.toml'
permissions:
pull-requests: write
@ -137,3 +138,68 @@ jobs:
run: |
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
exit 1
dep-bounds:
name: Check PyPI dependency upper bounds
runs-on: ubuntu-latest
if: contains(github.event.pull_request.changed_files_url, 'pyproject.toml') || true
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
fetch-depth: 0
- name: Check for unbounded PyPI deps
id: bounds
run: |
set -euo pipefail
BASE="${{ github.event.pull_request.base.sha }}"
HEAD="${{ github.event.pull_request.head.sha }}"
# Only check added lines in pyproject.toml
ADDED=$(git diff "$BASE".."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true)
if [ -z "$ADDED" ]; then
echo "found=false" >> "$GITHUB_OUTPUT"
exit 0
fi
# Match PyPI dep specs that have >= but no < ceiling.
# Pattern: "package>=version" without a following ",<" bound.
# Excludes git+ URLs (which use commit SHAs) and comments.
UNBOUNDED=$(echo "$ADDED" | grep -oE '"[a-zA-Z0-9_-]+(\[[^\]]*\])?>=[ 0-9.]+"' | grep -v ',<' || true)
if [ -n "$UNBOUNDED" ]; then
echo "found=true" >> "$GITHUB_OUTPUT"
echo "$UNBOUNDED" > /tmp/unbounded.txt
else
echo "found=false" >> "$GITHUB_OUTPUT"
fi
- name: Post unbounded dep warning
if: steps.bounds.outputs.found == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
BODY="## ⚠️ Unbounded PyPI Dependency Detected
This PR adds PyPI dependencies without a \`<next_major\` upper bound. Per our [supply chain policy](../blob/main/CONTRIBUTING.md#dependency-pinning-policy-supply-chain-hardening), all PyPI deps must be pinned as \`>=floor,<next_major\`.
**Unbounded specs found:**
\`\`\`
$(cat /tmp/unbounded.txt)
\`\`\`
**Fix:** Add an upper bound, e.g. \`\"package>=1.2.0,<2\"\`
---
*See PR #2810 and CONTRIBUTING.md for the full policy rationale.*"
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)"
- name: Fail on unbounded deps
if: steps.bounds.outputs.found == 'true'
run: |
echo "::error::PyPI dependencies without upper bounds detected. Add <next_major ceiling per CONTRIBUTING.md policy."
exit 1

163
.github/workflows/upload_to_pypi.yml vendored Normal file
View file

@ -0,0 +1,163 @@
name: Publish to PyPI
# Triggered by CalVer tag pushes from scripts/release.py (e.g. v2026.5.15)
# Can also be triggered manually from the Actions tab as an escape hatch.
on:
push:
tags:
- 'v20*' # CalVer tags: v2026.5.15, v2026.5.15.2, etc.
workflow_dispatch:
inputs:
confirm_tag:
description: 'Tag to publish (e.g. v2026.5.15). Must already exist.'
required: true
type: string
# Restrict default token to read-only; each job escalates as needed.
permissions:
contents: read
# Prevent overlapping publishes (e.g. two same-day tags pushed quickly).
concurrency:
group: pypi-publish
cancel-in-progress: false
jobs:
build:
name: Build distribution 📦
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
persist-credentials: false
# On workflow_dispatch, check out the confirmed tag.
ref: ${{ inputs.confirm_tag || github.ref }}
fetch-tags: true
- name: Validate tag exists
if: github.event_name == 'workflow_dispatch'
run: |
if ! git tag -l "${{ inputs.confirm_tag }}" | grep -q .; then
echo "::error::Tag '${{ inputs.confirm_tag }}' does not exist in the repo"
exit 1
fi
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
with:
python-version: '3.13'
- name: Install uv
uses: astral-sh/setup-uv@d0cc045d04ccac9d8b7881df0226f9e82c39688e # v6
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: '22'
- name: Build web dashboard
run: cd web && npm ci && npm run build
- name: Build TUI bundle
run: cd ui-tui && npm ci && npm run build
- name: Bundle TUI into hermes_cli
run: |
mkdir -p hermes_cli/tui_dist
cp ui-tui/dist/entry.js hermes_cli/tui_dist/entry.js
- name: Verify frontend assets exist
run: |
test -f hermes_cli/web_dist/index.html || { echo "ERROR: web_dist not built"; exit 1; }
test -f hermes_cli/tui_dist/entry.js || { echo "ERROR: tui_dist not built"; exit 1; }
- name: Bundle install.sh into wheel
run: |
mkdir -p hermes_cli/scripts
cp scripts/install.sh hermes_cli/scripts/install.sh
- name: Build wheel and sdist
run: uv build --sdist --wheel
- name: Upload distribution artifacts
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: python-package-distributions
path: dist/
publish:
name: Publish to PyPI
needs: build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/hermes-agent
permissions:
id-token: write # OIDC trusted publishing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
with:
skip-existing: true
sign:
name: Sign and attach to GitHub Release
# Only runs on tag pushes — release.py creates the GitHub Release,
# and workflow_dispatch won't have a matching release to attach to.
if: startsWith(github.ref, 'refs/tags/')
needs: publish
runs-on: ubuntu-latest
permissions:
contents: write # attach assets to the existing release
id-token: write # sigstore signing
steps:
- name: Download distribution artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
with:
name: python-package-distributions
path: dist/
- name: Wait for GitHub Release to exist
env:
GITHUB_TOKEN: ${{ github.token }}
# release.py creates the GitHub Release after pushing the tag,
# but this workflow starts from the tag push — wait for it.
run: |
for i in $(seq 1 30); do
if gh release view "$GITHUB_REF_NAME" --repo "$GITHUB_REPOSITORY" >/dev/null 2>&1; then
echo "Release $GITHUB_REF_NAME found"
exit 0
fi
echo "Waiting for release... ($i/30)"
sleep 10
done
echo "::warning::Release $GITHUB_REF_NAME not found after 5 minutes — skipping signature upload"
echo "skip_sign=true" >> "$GITHUB_ENV"
- name: Sign with Sigstore
if: env.skip_sign != 'true'
uses: sigstore/gh-action-sigstore-python@f514d46b907ebcd5bedc05145c03b69c1edd8b46 # v3.0.0
with:
inputs: >-
./dist/*.tar.gz
./dist/*.whl
- name: Attach signed artifacts to GitHub Release
if: env.skip_sign != 'true'
env:
GITHUB_TOKEN: ${{ github.token }}
# release.py already created the GitHub Release — just upload
# the Sigstore signatures alongside the existing assets.
run: >-
gh release upload
"$GITHUB_REF_NAME" dist/*.sigstore.json
--repo "$GITHUB_REPOSITORY"
--clobber

3
.gitignore vendored
View file

@ -70,3 +70,6 @@ mini-swe-agent/
result
website/static/api/skills-index.json
models-dev-upstream/
hermes_cli/tui_dist/*
hermes_cli/scripts/
docs/superpowers/*

View file

@ -308,6 +308,29 @@ The registry handles schema collection, dispatch, availability checking, and err
---
## Dependency Pinning Policy
All dependencies must have upper bounds to limit supply-chain attack surface.
This policy was established after the litellm compromise (PR #2796, #2810) and
reinforced after the Mini Shai-Hulud worm campaign (May 2026).
| Source type | Treatment | Example |
|---|---|---|
| PyPI package | `>=floor,<next_major` | `"httpx>=0.28.1,<1"` |
| Git URL | Commit SHA | `git+https://...@<40-char-sha>` |
| GitHub Actions | Commit SHA + comment | `uses: actions/checkout@<sha> # v4` |
| CI-only pip | `==exact` | `pyyaml==6.0.2` |
**When adding a new dependency to `pyproject.toml`:**
1. Pin to `>=current_version,<next_major` for post-1.0 (e.g. `>=1.5.0,<2`).
2. For pre-1.0 packages, use `<0.(current_minor + 2)` (e.g. `>=0.29,<0.32`).
3. Never commit a bare `>=X.Y.Z` without a ceiling — CI and reviewers will reject it.
4. Run `uv lock` to regenerate `uv.lock` with hashes.
Reference: #2810 (bounds pass), #9801 (SHA pinning + audit CI).
---
## Adding Configuration
### config.yaml options:

View file

@ -800,6 +800,47 @@ Hermes has terminal access. Security matters.
If your PR affects security, note it explicitly in the description.
### Dependency pinning policy (supply chain hardening)
After the [litellm supply chain compromise](https://github.com/BerriAI/litellm/issues/24512) in March 2026 and the [Mini Shai-Hulud worm campaign](https://socket.dev/blog/tanstack-npm-packages-compromised-mini-shai-hulud-supply-chain-attack) in May 2026, all dependencies must follow these rules:
| Source type | Required treatment | Rationale |
|---|---|---|
| **PyPI package** | `>=floor,<next_major` | PyPI versions are immutable once published, but new versions can be pushed into your range. A `<next_major` ceiling stops a 1.x install from upgrading to a malicious 2.0.0. |
| **Git URL** (atroposlib, tinker, yc-bench, Baileys) | Full commit SHA | Branches and tags are mutable refs; SHA is content-addressed. |
| **GitHub Actions** | Full commit SHA + version comment | Action tags are mutable refs (e.g. tj-actions/changed-files March 2025). Pin as `uses: owner/action@<sha> # vX.Y.Z` |
| **CI-only pip installs** | `==exact` | Hermetic CI builds; churn is acceptable. |
**Every new PyPI dependency in a PR must have a `<next_major` upper bound.** PRs adding unbounded `>=X.Y.Z` specs will be rejected by reviewers. The `supply-chain-audit.yml` CI workflow also flags dependency manifest changes for manual review.
**How to determine the ceiling:**
- If the package is at version `1.x.y`, use `<2`.
- If the package is at version `0.x.y` (pre-1.0), use `<0.(current_minor + 2)` — e.g. if current is `0.29.x`, use `<0.32`. This gives ~2 minor versions of headroom while keeping the window small enough that a hostile takeover version is unlikely to land inside it.
- Exception: packages with very stable APIs (e.g. `aiohttp-socks`) can use `<1` at reviewer discretion.
**Examples:**
```toml
# ✅ Correct — post-1.0
"openai>=2.21.0,<3"
"pydantic>=2.12.5,<3"
# ✅ Correct — pre-1.0 (tight minor window)
"asyncpg>=0.29,<0.32"
"aiosqlite>=0.20,<0.23"
"hindsight-client>=0.4.22,<0.5"
# ❌ Rejected — no upper bound
"some-package>=1.2.3"
# ❌ Rejected — too tight (blocks legitimate patches)
"some-package==1.2.3"
# ❌ Rejected — too loose for pre-1.0 (allows 80 minor versions)
"some-package>=0.20,<1"
```
**Reference PRs:** #2796 (litellm removal), #2810 (upper bounds pass), #9801 (SHA pinning + supply-chain-audit CI).
---
## Pull Request Process

View file

@ -182,6 +182,7 @@ scripts/run_tests.sh
- 💬 [Discord](https://discord.gg/NousResearch)
- 📚 [Skills Hub](https://agentskills.io)
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — Linux desktop-control MCP server for Hermes and other MCP hosts, with AT-SPI accessibility trees, Wayland/X11 input, screenshots, and compositor window targeting.
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
---

479
RELEASE_v0.14.0.md Normal file
View file

@ -0,0 +1,479 @@
# Hermes Agent v0.14.0 (v2026.5.16)
**Release Date:** May 16, 2026
**Since v0.13.0:** 808 commits · 633 merged PRs · 1393 files changed · 165,061 insertions · 545 issues closed (12 P0, 50 P1) · 215 community contributors (including co-authors)
> The Foundation Release — Hermes installs and runs anywhere, ships with the things you actually want to use, and stops shipping the things you don't. xAI Grok lands as a SuperGrok OAuth provider with grok-4.3 bumped to a 1M context window. A new OpenAI-compatible local proxy turns any OAuth-authed Hermes provider — Claude Pro, ChatGPT Pro, SuperGrok — into an endpoint that Codex / Aider / Cline / Continue can hit. `x_search` lands as a first-class X (Twitter) search tool with OAuth-or-API-key auth. The Microsoft Teams stack is wired end-to-end (Graph auth + webhook listener + pipeline runtime + outbound delivery). A debloating wave makes installs dramatically lighter — heavyweight backends now lazy-install on first use, the `[all]` extras drop everything covered by lazy-deps, and a tiered install falls back when a wheel rejects on your platform. `pip install hermes-agent` works from PyPI. The cold-start wave shaves ~19 seconds off `hermes` launch. Browser CDP calls are 180x faster. Two new messaging platforms (LINE + SimpleX Chat) bring the total to 22. Cross-session 1-hour Claude prompt caching, `/handoff` that actually transfers sessions live, native button UI for `clarify` on Telegram and Discord, Discord channel history backfill, LSP semantic diagnostics on every write, a unified pluggable `video_generate`, a `computer_use` cua-driver backend that finally works with non-Anthropic providers, clickable URLs in any terminal, Zed ACP Registry integration via `uvx`, native Windows beta, 9 new optional skills, OpenRouter Pareto Code router, huggingface/skills as a trusted default tap. 12 P0 + 50 P1 closures.
---
## ✨ Highlights
- **xAI Grok via SuperGrok OAuth — and grok-4.3 jumps to a 1M context window** — If you pay for SuperGrok, you can now use Grok inside Hermes by signing in with your xAI account — no API key, no separate billing. The wire-through also bumps grok-4.3 to a 1M token context window, so you can drop whole codebases or research corpora into a single prompt. Includes proper handling for entitlement errors and an SSH-to-tunnel docs page for when you're SSH'd into a remote box and need to complete the OAuth flow. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534), [#26664](https://github.com/NousResearch/hermes-agent/pull/26664), [#26644](https://github.com/NousResearch/hermes-agent/pull/26644), [#26592](https://github.com/NousResearch/hermes-agent/pull/26592))
- **OpenAI-compatible local proxy for OAuth providers** — Run `hermes proxy` and you get a `http://localhost:port` endpoint that speaks the OpenAI API but is backed by whichever OAuth provider you're signed into — Claude Pro, ChatGPT Pro, SuperGrok. Now any tool that expects an OpenAI-compatible endpoint (Codex CLI, Aider, Cline, Continue, your custom scripts) just works with your existing subscription, no API key required. One subscription, every tool. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
- **`x_search` — first-class X (Twitter) search tool** — The agent can now search X directly without installing a skill or wiring up a custom integration. Search the timeline, find threads, surface specific posts — straight from the chat. Auth with either your X OAuth login or an API key, whichever you have. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
- **Microsoft Teams — end-to-end** — Hermes can now read messages from Teams and post back. The full Microsoft Graph stack lands together: auth + client foundation, a webhook listener that receives Teams events, a pipeline plugin runtime, and outbound delivery. Wire up the bot once, then chat to your agent from any Teams channel, DM, or group. (salvages of #21408#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
- **Debloating wave — lighter installs, less you don't use** — A clean `pip install hermes-agent` used to pull down everything: every messaging adapter SDK, every image-gen SDK, every voice/TTS provider, whether you used them or not. Now those heavy backends (Slack / Matrix / Feishu / DingTalk adapters, hindsight client, codex app-server, Pixverse / Camofox / image-gen SDKs, voice/TTS providers) install automatically the first time you actually use them. The `[all]` extras drop everything covered by lazy-deps, the installer falls back through tiers when a wheel doesn't fit your platform, and a supply-chain advisory checker scans every install for unsafe versions. Faster installs, smaller disk footprint, fewer transitive vulnerabilities. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220), [#24515](https://github.com/NousResearch/hermes-agent/pull/24515), [#25014](https://github.com/NousResearch/hermes-agent/pull/25014), [#25038](https://github.com/NousResearch/hermes-agent/pull/25038), [#25766](https://github.com/NousResearch/hermes-agent/pull/25766), [#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. No more cloning the repo or running shell installers — one pip command and you're running. The wheel ships with the Ink TUI bundle and the shell launcher, so the full experience comes out of the box. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593), [#26148](https://github.com/NousResearch/hermes-agent/pull/26148))
- **Cross-session 1h Claude prompt cache** — When you use Claude through Anthropic, OpenRouter, or Nous Portal, the prompt prefix (system prompt, skills, memory) now caches for an hour across sessions. Start a `/new` session and the first response comes back faster and cheaper because the cache is still warm from your last session. Background memory review hits the cache too, so it's not paying full price every turn. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828), [#25434](https://github.com/NousResearch/hermes-agent/pull/25434), [#24778](https://github.com/NousResearch/hermes-agent/pull/24778))
- **180x faster `browser_console` evaluations** — When the agent uses the browser tool to inspect a page or run JavaScript, those calls now share one persistent connection to Chrome instead of spinning up a new DevTools session every time. The difference is huge: things that used to take a couple of seconds per call return in milliseconds. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
- **Cold-start performance wave — ~19 seconds off `hermes` launch** — Running `hermes` used to make you wait through a chunk of import overhead and network calls before you saw a prompt. Now the launch path is mostly deferred: heavy adapters only load when you use them, model catalogs come from disk cache first, doctor checks run in parallel, and `chat -q` skips the welcome banner entirely. The `hermes tools` All-Platforms screen alone dropped from 14 seconds to under 1.5 seconds. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
- **Two new messaging platforms — LINE + SimpleX Chat** — LINE is huge in Japan, Korea, and Taiwan, and now Hermes runs natively on the LINE Messaging API. SimpleX Chat is the privacy-focused decentralized messenger with no user IDs — also wired up as a first-class platform. That brings Hermes to 22 messaging platforms total, so wherever you and your team chat, the agent can be there. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
- **`/handoff` actually transfers the session live** — Switching models or personalities mid-conversation used to mean losing context or starting over. Now `/handoff` moves your active session — every message, every tool call, every piece of context — to the target model, persona, or profile, live, without dropping anything. Mid-debugging hand off from a fast model to a deep-reasoning one, or pass a session between profiles for different parts of a task. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
- **Native button UI for `clarify` on Telegram and Discord** — When the agent uses the `clarify` tool to ask you a multiple-choice question, it now shows real platform-native buttons on Telegram and Discord instead of asking you to type back the option number. Tap the button, the agent gets your answer. Especially nice on mobile. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
- **Discord channel history backfill (default on)** — When Hermes joins a Discord channel or thread for the first time, it now reads the recent message history so it knows what's been said before it responds. No more "what are we talking about?" — the agent has the context that's already on screen for everyone else. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
- **`vision_analyze` returns pixels to vision-capable models** — When you point the agent at an image with `vision_analyze` and the active model can actually see (GPT-5, Claude, Gemini, Grok-vision), Hermes now passes the raw pixels straight to the model instead of converting them to a text description first. You get the model's actual visual reasoning instead of a degraded text-summary round-trip. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
- **Per-turn file-mutation verifier footer** — After every turn that wrote or edited files, the agent now gets a short footer summarizing exactly what changed on disk — the file paths, the line counts, the actual delta. That means the agent catches its own mistakes when a write didn't land or got silently overwritten, instead of confidently telling you "I added the function" when the file wasn't actually saved. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
- **LSP semantic diagnostics on every write** — When the agent uses `write_file` or `patch`, Hermes now runs a real language server against the edited file and surfaces any new errors back to the agent before the next turn. Type errors, undefined symbols, missing imports — caught immediately. Goes way beyond v0.13.0's basic Python/JSON/YAML/TOML linting because it's actual semantic analysis. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
- **Unified `video_generate` with pluggable provider backends** — One tool, any video model. Hermes ships with the obvious backends already, but you can drop in a new video provider as a plugin without touching core. So when a new video model lands next month, it can be a one-file plugin instead of a fork. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
- **`computer_use` cua-driver backend — works with non-Anthropic models now** — Computer-use (the agent controlling your mouse and keyboard to drive GUI apps) used to be locked to Anthropic's SDK. The new cua-driver backend works with non-Anthropic providers too, has proper focus-safe operations, and refreshes itself on `hermes update`. Now any vision-capable model can drive your desktop. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
- **Clickable URLs in any terminal** — Links in agent output are now real OSC8 hyperlinks with hover-highlight in any terminal that supports them. Click to open in your browser — no more copy-paste-trim of long URLs from the transcript. Just works in iTerm2, Kitty, Ghostty, modern Windows Terminal, etc. (@OutThisLife) ([#25071](https://github.com/NousResearch/hermes-agent/pull/25071), [#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
- **Zed ACP Registry — `uvx` install in one click** — Hermes is now listed in Zed's Agent Client Protocol registry, so Zed users can install it with one click. The install path uses `uvx` so there's no npm dependency. `hermes acp --setup-browser` bootstraps the browser tools for registry-driven installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
- **OpenRouter Pareto Code router with `min_coding_score` knob** — OpenRouter's "Pareto" router automatically picks the cheapest model that meets a minimum quality bar. The new `min_coding_score` config lets you set that bar for coding tasks specifically — Hermes routes to the most affordable model that's at least that good at code. Stop paying for top-tier models when a mid-tier one would do. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
- **NovitaAI as a new model provider** — NovitaAI joins the provider lineup, giving you another option for open-source model hosting (Llama, Qwen, DeepSeek, etc.) with their pricing and rate limits. (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
- **Codex app-server runtime for OpenAI/Codex models** — An optional runtime that drives OpenAI's Codex CLI under the hood when you're using OpenAI or Codex paths. You get session reuse, automatic retirement of wedged sessions, and proper OAuth refresh classification — the kind of plumbing that makes long agentic runs not fall over. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
- **`huggingface/skills` as a trusted default tap** — The community skills index hosted at huggingface.co/skills is now wired into the Skills Hub by default. So when somebody publishes a useful skill there, you can install it from your own `hermes skills` browser without any extra config. (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
- **9 new optional skills** — Hyperliquid (perp + spot trading via the SDK and REST API), Yahoo Finance (live market data, fundamentals, historicals), api-testing (REST + GraphQL debug recipes), unified EVM multi-chain (one skill covers Ethereum + L2s + Base), darwinian-evolver (evolutionary prompt/skill tuning), osint-investigation (OSINT recipes for people / domains / orgs), pinggy-tunnel (expose local services to the public internet), watchers (polls RSS / HTTP JSON / GitHub via cron `no_agent` mode for change detection), and a full Notion overhaul for the May 2026 Developer Platform. ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
- **API server exposes run approval events** — If you're driving Hermes programmatically through the HTTP API, long-running runs no longer silently hang when the agent hits an approval-required command. The approval request now surfaces on the API stream so your client can prompt the user and reply — no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
- **Plugins can run any LLM call via `ctx.llm` + replace built-in tools via `tool_override`** — If you're writing a Hermes plugin, you now get first-class access to make LLM calls through the active provider and credentials — no manual client wiring. The new `tool_override` flag lets a plugin swap out a built-in tool with its own implementation cleanly. Plugin authors get the same model-routing and auth plumbing the core agent uses. (closes #11049) ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — Two new free web-search backends join Tavily, SearXNG, and Exa. Brave Search has a generous free tier; DDGS is the DuckDuckGo scraper that needs no key at all. Pick whichever fits your budget and rate-limit needs. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
- **Sudo brute-force block + 3 dangerous-command bypasses closed + tool-error sanitization** — The approval gate now blocks `sudo -S` brute-force attempts and classifies stdin-fed or askpass-stripped sudo invocations as DANGEROUS. Three known bypasses of dangerous-command detection are closed (inspired by Claude Code's command-detection work). And tool error strings are now sanitized before being re-injected into the model context, so a malicious file or remote service can't pass instructions to your agent through error output. ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736), [#26829](https://github.com/NousResearch/hermes-agent/pull/26829), [#26823](https://github.com/NousResearch/hermes-agent/pull/26823))
- **`/subgoal` — user-added criteria appended to an active `/goal`** — When you've got a `/goal` running (the persistent Ralph-loop goal where the agent keeps going until criteria are met), you can now use `/subgoal <text>` to layer extra success criteria onto it mid-run. The judge factors your new criteria into the done-or-keep-going decision without restarting the loop. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
- **Provider rename — Alibaba Cloud → Qwen Cloud** — The Alibaba Cloud provider is renamed to Qwen Cloud in the picker and config to match what the rest of the world calls it. Existing config keys still work — no breaking changes — but the UI matches the actual brand now. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
- **Native Windows support (early beta)** — Hermes now runs natively on `cmd.exe` and PowerShell without WSL. A full PowerShell installer handles MinGit auto-install, Microsoft Store python stub detection, and the foreground Ctrl+C dance. There's still rough edges (this is the "early beta" stamp) — ~40 follow-up Windows-only fixes already landed in the window — but the basic loop works end-to-end on a clean Windows box. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
---
## 🪟 Windows — Native Support (Early Beta)
### Bootstrap & installer
- **Native Windows support (early beta)** — first-class native Windows path across CLI / gateway / TUI / tools ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))
- **PyPI wheel packaging — `pip install hermes-agent && hermes`** (salvage of #26350) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
- **Recognise Shift+Enter as a newline key** + Windows docs (salvage #21545) ([#22130](https://github.com/NousResearch/hermes-agent/pull/22130))
- **Preserve Ctrl+C for Windows foreground runs** (@helix4u) ([#22752](https://github.com/NousResearch/hermes-agent/pull/22752))
- **Stop spamming cwd-missing + tirith-spawn warnings on every terminal call** ([#26618](https://github.com/NousResearch/hermes-agent/pull/26618))
- **Use `--extra all` not `--all-extras`; drop lazy-covered extras from `[all]`** ([#24515](https://github.com/NousResearch/hermes-agent/pull/24515))
### Windows-specific fixes (40+ across cli / tools / gateway / curator / TUI)
A long tail of native-Windows fixes shipped alongside the beta — taskkill-based subprocess management, MinGit auto-install, Microsoft Store python stub detection, npm prefix handling, native PTY paths, signal handling differences, foreground process management, ANSI sequence handling, path normalization, file-locking semantics, and many more. Full list in commit log under `fix(windows)` / `feat(windows)` / `windows`.
---
## 🚀 Performance Wave
### Cold start
- **Cut ~19s from `hermes` cold start** — skills cache + lazy Feishu + no Nous HTTP at startup ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138))
- **Skip eager plugin discovery on known built-in subcommands** ([#22120](https://github.com/NousResearch/hermes-agent/pull/22120))
- **Cache Nous auth + .env loads**`hermes tools` All Platforms from 14s to <1.5s ([#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
- **Skip welcome banner on `chat -q` single-query mode** ([#22904](https://github.com/NousResearch/hermes-agent/pull/22904))
- **Defer heavy google-cloud imports in google_chat to first adapter use** ([#22681](https://github.com/NousResearch/hermes-agent/pull/22681))
- **Defer QQAdapter and YuanbaoAdapter imports via PEP 562** ([#22790](https://github.com/NousResearch/hermes-agent/pull/22790))
- **Defer httpx import in teams to first webhook call** ([#22831](https://github.com/NousResearch/hermes-agent/pull/22831))
- **Defer fal_client import to first generation request** ([#22859](https://github.com/NousResearch/hermes-agent/pull/22859))
- **models.dev cache-first lookup, skip network when disk cache is fresh** ([#22808](https://github.com/NousResearch/hermes-agent/pull/22808))
- **Parallelize API connectivity checks in `hermes doctor` and disable IMDS** ([#22766](https://github.com/NousResearch/hermes-agent/pull/22766))
### Runtime
- **180x faster `browser_console` evaluations** — route through supervisor's persistent CDP WebSocket ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
- **Tune Telegram cadence + adaptive fast-path for short replies** (salvage of #10388) ([#23587](https://github.com/NousResearch/hermes-agent/pull/23587))
- **Accumulate length-continuation prefix via list+join** ([#26237](https://github.com/NousResearch/hermes-agent/pull/26237))
### Prompt caching
- **Cross-session 1h prefix cache for Claude on Anthropic / OpenRouter / Nous Portal** ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828))
- **Hit prefix cache in background review fork** (salvage #17276 + #25427) ([#25434](https://github.com/NousResearch/hermes-agent/pull/25434))
---
## 📦 Installation & Distribution
### PyPI + supply-chain
- **PyPI wheel packaging — `pip install hermes-agent && hermes`** (salvage of #26350) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
- **Supply-chain advisory checker + lazy-install framework + tiered install fallback** ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
- **Use `--extra all` not `--all-extras`; drop lazy-covered extras from `[all]`** ([#24515](https://github.com/NousResearch/hermes-agent/pull/24515))
- **Skip browser download when system chromium exists** (@helix4u) ([#25317](https://github.com/NousResearch/hermes-agent/pull/25317))
### Nix
- **`extraDependencyGroups` for sealed venv extras** (@alt-glitch) ([#21817](https://github.com/NousResearch/hermes-agent/pull/21817))
- **Refresh npm lockfile hashes** — keeps Nix flake builds reproducible
### Docker
- **Bootstrap auth.json from env on first boot** ([#21880](https://github.com/NousResearch/hermes-agent/pull/21880))
- **Drop manual @hermes/ink build, rely on esbuild bundle** — slimmer image
### ACP / Zed
- **Zed ACP Registry integration** (salvage of #25908) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079))
- **Switch to uvx distribution, drop npm launcher** ([#26120](https://github.com/NousResearch/hermes-agent/pull/26120))
- **`hermes acp --setup-browser` bootstraps browser tools for registry installs** ([#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
---
## 🏗️ Core Agent & Architecture
### Sessions & handoff
- **`/handoff` actually transfers the session live** ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
- **Expose `HERMES_SESSION_ID` env var to agent tools** (@alt-glitch) ([#23847](https://github.com/NousResearch/hermes-agent/pull/23847))
### Goals (Ralph loop)
- **`/subgoal` — user-added criteria appended to active `/goal`** ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
- **`/goal` checklist + /subgoal user controls** ([#23456](https://github.com/NousResearch/hermes-agent/pull/23456)) — rolled back in window ([#23813](https://github.com/NousResearch/hermes-agent/pull/23813)); /subgoal returned in simpler form via #25449
### Compression
- **Make `protect_first_n` configurable** ([#25447](https://github.com/NousResearch/hermes-agent/pull/25447))
### Verification
- **Per-turn file-mutation verifier footer** ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
### Stream retry
- **Log inner cause, upstream headers, bytes/elapsed on every drop** ([#23005](https://github.com/NousResearch/hermes-agent/pull/23005))
---
## 🤖 Models & Providers
### New providers
- **xAI Grok OAuth (SuperGrok Subscription) provider** ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534))
- **NovitaAI provider** (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))
- **NVIDIA NIM billing origin header** (salvage #25211) ([#26585](https://github.com/NousResearch/hermes-agent/pull/26585))
### Provider work
- **OpenRouter Pareto Code router with `min_coding_score` knob** ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
- **Optional codex app-server runtime for OpenAI/Codex models** ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182))
- **Codex-runtime: retire wedged sessions + post-tool watchdog + OAuth refresh classify** ([#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
- **Codex-runtime: skip unavailable plugins during migration** ([#25437](https://github.com/NousResearch/hermes-agent/pull/25437))
- **Codex-runtime: de-dup `[plugins.X]` tables and stop leaking HERMES_HOME into config.toml** (#26250) (@kshitijk4poor) ([#26260](https://github.com/NousResearch/hermes-agent/pull/26260))
- **Pass `reasoning.effort` to xAI Responses API** ([#22807](https://github.com/NousResearch/hermes-agent/pull/22807))
- **Custom provider: prompt and persist explicit `api_mode`** ([#25068](https://github.com/NousResearch/hermes-agent/pull/25068))
- **Rename Alibaba Cloud → Qwen Cloud, reorder picker** ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
- **Restore gpt-5.3-codex-spark for ChatGPT Pro** (salvage #18286 + #19530, fixes #16172) (@kshitijk4poor) ([#22991](https://github.com/NousResearch/hermes-agent/pull/22991))
- **Inject tool-use enforcement for GLM models** ([#24715](https://github.com/NousResearch/hermes-agent/pull/24715))
- **Use Nous Portal as model metadata authority** (@rob-maron) ([#24502](https://github.com/NousResearch/hermes-agent/pull/24502))
- **Unified `client=hermes-client-v<version>` tag on every Portal request** ([#24779](https://github.com/NousResearch/hermes-agent/pull/24779))
- **Prevent stale Ollama credentials after provider switch** (@kshitijk4poor) ([#21703](https://github.com/NousResearch/hermes-agent/pull/21703))
- **Auxiliary client: rotate pooled auth after quota failures** (salvage #22779) ([#22792](https://github.com/NousResearch/hermes-agent/pull/22792))
- **Auxiliary client: skip providers without credentials immediately** (#25395) ([#25487](https://github.com/NousResearch/hermes-agent/pull/25487))
- **Auth: send Nous refresh token via header** (@shannonsands) ([#21578](https://github.com/NousResearch/hermes-agent/pull/21578))
- **MiniMax: harden OAuth dashboard and runtime** ([#24165](https://github.com/NousResearch/hermes-agent/pull/24165))
### OpenAI-compatible proxy
- **Local OpenAI-compatible proxy for OAuth providers** — Codex / Aider / Cline can hit Claude Pro, ChatGPT Pro, SuperGrok ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
---
## 📱 Messaging Platforms (Gateway)
### New platforms
- **LINE Messaging API platform plugin** ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197))
- **SimpleX Chat platform plugin** (salvages #2558) ([#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
### Microsoft Graph foundation
- **msgraph: add auth and client foundation** (salvage of #21408) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922))
- **msgraph: add webhook listener platform** (salvage of #21409) ([#21969](https://github.com/NousResearch/hermes-agent/pull/21969))
- **teams-pipeline: add plugin runtime and operator cli** (salvage of #21410) ([#22007](https://github.com/NousResearch/hermes-agent/pull/22007))
- **teams: add pipeline outbound delivery via existing adapter** (salvage of #21411) ([#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
### Cross-platform
- **Per-platform admin/user split for slash commands** (salvage of #4443) ([#23373](https://github.com/NousResearch/hermes-agent/pull/23373))
- **Forensics on signal handling — non-blocking diag, per-phase timing, stale-unit warning** ([#23285](https://github.com/NousResearch/hermes-agent/pull/23285))
- **Keep gateway running when platforms fail; add per-platform circuit breaker + `/platform`** ([#26600](https://github.com/NousResearch/hermes-agent/pull/26600))
- **Wire `clarify` tool with inline keyboard buttons on Telegram** ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199))
- **Add `chat_id` to `hook_ctx` for message source tracking** ([#24710](https://github.com/NousResearch/hermes-agent/pull/24710))
### Telegram
- **Native draft streaming via `sendMessageDraft` (Bot API 9.5+)** (salvage of #3412) ([#23512](https://github.com/NousResearch/hermes-agent/pull/23512))
- **Stream Telegram edits safely** — salvage of #22264 (@kshitijk4poor) ([#22518](https://github.com/NousResearch/hermes-agent/pull/22518))
- **Telegram notification mode** (salvage #22772) ([#22793](https://github.com/NousResearch/hermes-agent/pull/22793))
- **Telegram guest mention mode** (@kshitijk4poor) ([#22759](https://github.com/NousResearch/hermes-agent/pull/22759))
- **Split-and-deliver oversized edits instead of silent truncation** (salvage of #19537) ([#23576](https://github.com/NousResearch/hermes-agent/pull/23576))
- **Preserve DM topic routing via reply fallback** (salvage #22053) (@kshitijk4poor) ([#22410](https://github.com/NousResearch/hermes-agent/pull/22410))
- **Pass `source.thread_id` explicitly on auto-reset notice** (carve-out of #7404) ([#23440](https://github.com/NousResearch/hermes-agent/pull/23440))
### Discord
- **Render clarify choices as buttons** ([#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
- **Channel history backfill — default on, broadened scope** ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
- **`thread_require_mention` for multi-bot threads** (salvage #25313) ([#25445](https://github.com/NousResearch/hermes-agent/pull/25445))
### Slack
- **Support `!cmd` as alternate prefix for slash commands in threads** ([#25355](https://github.com/NousResearch/hermes-agent/pull/25355))
### WhatsApp
- **Surface quoted reply metadata from Baileys** (#25398) ([#25489](https://github.com/NousResearch/hermes-agent/pull/25489))
### Feishu / Google Chat / others
- **Feishu: native update prompt cards** (@kshitijk4poor) ([#22448](https://github.com/NousResearch/hermes-agent/pull/22448))
- **Google Chat: repair setup prompt imports** (@helix4u) ([#22038](https://github.com/NousResearch/hermes-agent/pull/22038))
- **Google Chat: honor relay-declared sender_type** (salvage of #22107) (@kshitijk4poor) ([#22432](https://github.com/NousResearch/hermes-agent/pull/22432))
- **LINE: use `build_source` instead of nonexistent `create_source`** ([#24717](https://github.com/NousResearch/hermes-agent/pull/24717))
- **Add `weixin, and more` to gateway docs** (salvage of #21063 by @wuwuzhijing)
---
## 🖥️ CLI & TUI
### CLI
- **Show YOLO mode warning in banner and status bar** ([#26238](https://github.com/NousResearch/hermes-agent/pull/26238))
- **Confirm prompt for destructive slash commands** (#4069) ([#22687](https://github.com/NousResearch/hermes-agent/pull/22687))
- **`docker_extra_args` + `display.timestamps`** ([#23599](https://github.com/NousResearch/hermes-agent/pull/23599))
- **Delegate tool: show user's actual concurrency / spawn-depth limits in description** ([#22694](https://github.com/NousResearch/hermes-agent/pull/22694))
### TUI
- **`/sessions` slash command for browsing and resuming previous sessions** (@austinpickett) ([#20805](https://github.com/NousResearch/hermes-agent/pull/20805))
- **Segment turns with rule above non-first user msgs; trim ticker dead space** (@OutThisLife) ([#21846](https://github.com/NousResearch/hermes-agent/pull/21846))
- **Support attaching to an existing gateway** (@OutThisLife) ([#21978](https://github.com/NousResearch/hermes-agent/pull/21978))
- **Resolve markdown links to readable page titles** (@OutThisLife) ([#24013](https://github.com/NousResearch/hermes-agent/pull/24013))
- **Width-aware markdown table rendering with vertical fallback** (@alt-glitch) ([#26195](https://github.com/NousResearch/hermes-agent/pull/26195))
- **Keep Ink displayCursor in sync with fast-echo writes so cursor stops drifting** (@OutThisLife) ([#26717](https://github.com/NousResearch/hermes-agent/pull/26717))
- **Allow transcript scroll + Esc during approval/clarify/confirm prompts** (@OutThisLife) ([#26414](https://github.com/NousResearch/hermes-agent/pull/26414))
- **Preserve session when switching personality** (@austinpickett) ([#20942](https://github.com/NousResearch/hermes-agent/pull/20942))
- **Skip native safety net on OSC52-capable terminals** (@benbarclay) ([#20954](https://github.com/NousResearch/hermes-agent/pull/20954))
### Dashboard / GUI
- **Route embedded TUI through dashboard gateway** (@OutThisLife) ([#21979](https://github.com/NousResearch/hermes-agent/pull/21979))
- **Hide token/cost analytics behind config flag (default off)** ([#25438](https://github.com/NousResearch/hermes-agent/pull/25438))
- **Fix Langfuse observability — trace I/O, tool outputs, placeholder credentials** (closes #22342, #22763) (@kshitijk4poor) ([#26320](https://github.com/NousResearch/hermes-agent/pull/26320))
- **MiniMax 'Login' button launched Claude OAuth** (salvage #22849) ([#24058](https://github.com/NousResearch/hermes-agent/pull/24058))
- **Update cron modals** (@austinpickett) ([#25985](https://github.com/NousResearch/hermes-agent/pull/25985))
- **Analytics: prevent silent token loss and add Claude 4.54.7 pricing** (@austinpickett) ([#21455](https://github.com/NousResearch/hermes-agent/pull/21455))
---
## 🔧 Tools & Capabilities
### Vision & video
- **`vision_analyze` returns pixels to vision-capable models** ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
- **Unified `video_generate` with pluggable provider backends** ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
- **`image_gen`: actionable setup message when no FAL backend is reachable** ([#26222](https://github.com/NousResearch/hermes-agent/pull/26222))
### Computer use
- **`computer_use` cua-driver backend + focus-safe ops + non-Anthropic provider fix** (re-salvage #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967))
- **Refresh cua-driver on `hermes update` + add `install --upgrade`** ([#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
### LSP & write-time diagnostics
- **Semantic diagnostics from real language servers in `write_file`/`patch`** ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168))
- **Shift baseline diagnostics into post-edit coordinates** ([#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
### Search & web
- **Brave Search (free tier) and DDGS search providers** ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
- **Bearer auth header for Tavily `/crawl` endpoint** ([#24658](https://github.com/NousResearch/hermes-agent/pull/24658))
### X (Twitter)
- **Gated `x_search` tool with OAuth-or-API-key auth** ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
### Browser
- **Route `browser_console` eval through supervisor's persistent CDP WS (180x faster)** ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
- **Support externally managed Camofox sessions** ([#24499](https://github.com/NousResearch/hermes-agent/pull/24499))
### MCP
- **`supports_parallel_tool_calls` for MCP servers** (salvage of #9944) ([#26825](https://github.com/NousResearch/hermes-agent/pull/26825))
- **Codex preset for Codex CLI MCP server** (salvage #22663) ([#22679](https://github.com/NousResearch/hermes-agent/pull/22679))
- **Stop retrying initial MCP auth failures** (#25624) ([#25776](https://github.com/NousResearch/hermes-agent/pull/25776))
### Google Workspace
- **Drive write ops + Docs/Sheets create/append** ([#21895](https://github.com/NousResearch/hermes-agent/pull/21895))
### Per-turn verifier
- **Per-turn file-mutation verifier footer** ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
---
## 🧩 Kanban (Multi-Agent)
- **`specify` — auxiliary LLM fleshes out triage tasks** ([#21435](https://github.com/NousResearch/hermes-agent/pull/21435))
- **Orchestrator board tools — `kanban_list` + `kanban_unblock`** (carve-out of #20568) ([#23012](https://github.com/NousResearch/hermes-agent/pull/23012))
- **`stranded_in_ready` diagnostic for unclaimed tasks** ([#23578](https://github.com/NousResearch/hermes-agent/pull/23578))
- **Dashboard batch QOL upgrade** (salvage of #23240) ([#23550](https://github.com/NousResearch/hermes-agent/pull/23550))
- **Tooltips and docs link across dashboard** ([#21541](https://github.com/NousResearch/hermes-agent/pull/21541))
- **Dedupe notifier delivery via atomic claim + rewind on failure** (salvage #22558) ([#23401](https://github.com/NousResearch/hermes-agent/pull/23401))
- **Keep notifier subscriptions alive across retry cycles** (salvage #21398) ([#23423](https://github.com/NousResearch/hermes-agent/pull/23423))
- **Drop caller-controlled author override in `kanban_comment`** (salvage of #22109) (@kshitijk4poor) ([#22435](https://github.com/NousResearch/hermes-agent/pull/22435))
- **Sanitize comment author rendering in `build_worker_context`** ([#22769](https://github.com/NousResearch/hermes-agent/pull/22769))
---
## 🧠 Plugins & Extension
### Plugin surface
- **Run any LLM call from inside a plugin via `ctx.llm`** ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194))
- **`tool_override` flag for replacing built-in tools** (closes #11049) ([#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
- **`standalone_sender_fn` for out-of-process cron delivery** (@kshitijk4poor) ([#22461](https://github.com/NousResearch/hermes-agent/pull/22461))
- **`HERMES_PLUGINS_DEBUG=1` surfaces plugin discovery logs** ([#22684](https://github.com/NousResearch/hermes-agent/pull/22684))
- **Hindsight-client as optional dependency** (@alt-glitch) ([#21818](https://github.com/NousResearch/hermes-agent/pull/21818))
### Profile & distribution
- **Shareable profile distributions via git** ([#20831](https://github.com/NousResearch/hermes-agent/pull/20831))
---
## ⏰ Cron
- **Routing intent — `deliver=all` fans out to every connected channel** ([#21495](https://github.com/NousResearch/hermes-agent/pull/21495))
- **Support name-based lookup for job operations** ([#26231](https://github.com/NousResearch/hermes-agent/pull/26231))
- **Blank Cron dashboard tab + partial-record crashes** (salvage #21042 + #22330) (@kshitijk4poor) ([#22389](https://github.com/NousResearch/hermes-agent/pull/22389))
- **Do not seed `HERMES_SESSION_*` contextvars from cron origin** (salvage of #22356) (@kshitijk4poor) ([#22382](https://github.com/NousResearch/hermes-agent/pull/22382))
- **Scan assembled prompt including skill content for prompt injection** (#3968)
---
## 🧩 Skills Ecosystem
### Skills Hub
- **`hermes-skills/huggingface` as a trusted default tap** (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
- **Show per-skill pages in the left sidebar** ([#26646](https://github.com/NousResearch/hermes-agent/pull/26646))
- **Richer info panels on the Skills Hub** ([#22905](https://github.com/NousResearch/hermes-agent/pull/22905))
- **Refuse `skill_view` name collisions instead of guessing** (closes #6136 @polkn)
### Curator
- **Show rename map in user-visible summary** ([#22910](https://github.com/NousResearch/hermes-agent/pull/22910))
- **Hint at `hermes curator pin` in the rename block** ([#23212](https://github.com/NousResearch/hermes-agent/pull/23212))
### New optional skills
- **Hyperliquid** — perp/spot trading via SDK + REST (salvage of #1952) ([#23583](https://github.com/NousResearch/hermes-agent/pull/23583))
- **Yahoo Finance** market data ([#23590](https://github.com/NousResearch/hermes-agent/pull/23590))
- **api-testing** (REST/GraphQL debug, salvages #1800) ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582))
- **Unified EVM multi-chain skill** (salvages #25291 + #2010 + folds in base/) ([#25299](https://github.com/NousResearch/hermes-agent/pull/25299))
- **darwinian-evolver** ([#26760](https://github.com/NousResearch/hermes-agent/pull/26760))
- **osint-investigation** (closes #355) ([#26729](https://github.com/NousResearch/hermes-agent/pull/26729))
- **pinggy-tunnel** ([#26765](https://github.com/NousResearch/hermes-agent/pull/26765))
- **watchers** — RSS / HTTP JSON / GitHub polling via cron no-agent ([#21881](https://github.com/NousResearch/hermes-agent/pull/21881))
- **Notion overhaul for the Developer Platform** (May 2026) ([#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
---
## 🔒 Security & Reliability
### Security hardening
- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS** (salvage of #22194 + #21128) (@kshitijk4poor) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
- **Drop caller-controlled author override in `kanban_comment`** (salvage of #22109) (@kshitijk4poor) ([#22435](https://github.com/NousResearch/hermes-agent/pull/22435))
- **Cover remaining SSRF fetch paths in skills-hub** (salvage #22804) ([#22843](https://github.com/NousResearch/hermes-agent/pull/22843))
- **Use credential_pool for custom endpoint model listing probes** (salvage #22810) ([#22842](https://github.com/NousResearch/hermes-agent/pull/22842))
- **Require dashboard auth for plugin API routes** (salvage #19541) ([#23220](https://github.com/NousResearch/hermes-agent/pull/23220))
- **Sanitize env and redact output in quick commands + remove write-only `_pending_messages`** ([#23584](https://github.com/NousResearch/hermes-agent/pull/23584))
- **Reduce unnecessary `shell=True` in subprocess calls** ([#25149](https://github.com/NousResearch/hermes-agent/pull/25149))
- **Sanitize Google Chat sender_type from relay** (salvage of #22107) (@kshitijk4poor) ([#22432](https://github.com/NousResearch/hermes-agent/pull/22432))
- **Supply-chain advisory checker** ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
- **Rewrite security policy around OS-level isolation as the boundary** (@jquesnelle) ([#20317](https://github.com/NousResearch/hermes-agent/pull/20317))
- **Remove public security advisory page** ([#24253](https://github.com/NousResearch/hermes-agent/pull/24253))
### Reliability — notable bug closures
- **SQLite: fall back to `journal_mode=DELETE` on NFS/SMB/FUSE** (fixes `/resume` on network mounts) (@kshitijk4poor) ([#22043](https://github.com/NousResearch/hermes-agent/pull/22043))
- **Codex-runtime: retire wedged sessions + post-tool watchdog + OAuth refresh classify** ([#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
- **Codex-runtime: de-dup `[plugins.X]` tables and stop leaking HERMES_HOME** (#26250) (@kshitijk4poor) ([#26260](https://github.com/NousResearch/hermes-agent/pull/26260))
- **Daytona: migrate legacy-sandbox lookup to cursor-based `list()`** ([#24587](https://github.com/NousResearch/hermes-agent/pull/24587))
- **MCP: stop retrying initial MCP auth failures** (#25624) ([#25776](https://github.com/NousResearch/hermes-agent/pull/25776))
- **Gateway: enable text-intercept for multi-choice clarify fallback** (#25587) ([#25778](https://github.com/NousResearch/hermes-agent/pull/25778))
- **Gateway: keep running when platforms fail; per-platform circuit breaker + `/platform`** ([#26600](https://github.com/NousResearch/hermes-agent/pull/26600))
- **Delegate: salvage #21933 JSON-string batch + diagnostic logging** (@kshitijk4poor) ([#22436](https://github.com/NousResearch/hermes-agent/pull/22436))
- **Profiles+banner: exclude infrastructure from `--clone-all` + fix stale update-check repo resolution** (@kshitijk4poor) ([#22475](https://github.com/NousResearch/hermes-agent/pull/22475))
- **ACP: inline file attachment resources** (salvage #21400 + image support) ([#21407](https://github.com/NousResearch/hermes-agent/pull/21407))
- **CI: unblock shared PR checks** (@stephenschoettler) ([#21012](https://github.com/NousResearch/hermes-agent/pull/21012), [#25957](https://github.com/NousResearch/hermes-agent/pull/25957))
### Notable reverts in window
- **`/goal` checklist + /subgoal feature stack** — rolled back ([#23813](https://github.com/NousResearch/hermes-agent/pull/23813)); `/subgoal` returned in simpler form via [#25449](https://github.com/NousResearch/hermes-agent/pull/25449)
- **Scrollback box width clamp** (#25975) rolled back to restore full-width borders ([#26163](https://github.com/NousResearch/hermes-agent/pull/26163))
- **`fix(cli): tolerate unreadable dirs when building systemd PATH`** rolled back
---
## 🌍 i18n
- **Localize all gateway commands + web dashboard, add 8 new locales (16 total)** ([#22914](https://github.com/NousResearch/hermes-agent/pull/22914))
---
## 📚 Documentation
- **Repair Voice & TTS provider table** (@nightcityblade, fixes #24101) ([#24138](https://github.com/NousResearch/hermes-agent/pull/24138))
- **Show per-skill pages in the left sidebar** ([#26646](https://github.com/NousResearch/hermes-agent/pull/26646))
- **Mention Weixin in gateway help and docstrings** (salvage of #21063 by @wuwuzhijing)
- **Richer info panels on the Skills Hub** ([#22905](https://github.com/NousResearch/hermes-agent/pull/22905))
- Many more doc updates across providers, platforms, skills, Windows install paths, and dashboard.
---
## 🧪 Testing & CI
- **Unblock shared PR checks** (@stephenschoettler) ([#21012](https://github.com/NousResearch/hermes-agent/pull/21012))
- **Stabilize shared test state after 21012** (@stephenschoettler) ([#25957](https://github.com/NousResearch/hermes-agent/pull/25957))
- A long tail of test additions for platforms, providers, plugins, and edge cases — 8 explicit `test:` PRs plus ~250 fix PRs that also added regression coverage.
---
## 👥 Contributors
### Core
- @teknium1 — release lead, architecture, ~406 PRs merged in window
### Top community contributors
- **@kshitijk4poor** — 38 PRs · Telegram cadence/streaming/topic routing, security hardening (sudo, SSRF, kanban_comment, dashboard auth), codex-runtime hygiene, NovitaAI provider, profile/banner fixes, Feishu update cards, gateway QOL across the board
- **@alt-glitch** — 13 PRs · Markdown-table TUI rendering, `HERMES_SESSION_ID` env var, hindsight-client optional dep, Nix `extraDependencyGroups`
- **@OutThisLife** (Brooklyn Nicholson) — 12 PRs · TUI turn segmentation, attach-to-gateway, markdown link titles, embedded TUI via dashboard gateway, Ink cursor sync, scroll/Esc during prompts
- **@austinpickett** — 8 PRs · `/sessions` slash command, personality switching preserves session, cron modals, dashboard analytics
- **@helix4u** — 5 PRs · Google Chat setup, browser install skip on system chromium, Windows Ctrl+C preservation
- **@rob-maron** — 4 PRs · Nous Portal as model metadata authority, provider polish
- **@stephenschoettler** — 3 PRs · CI stabilization
- **@ethernet8023** — 3 PRs · platform/gateway work
### All contributors (alphabetical)
@02356abc, @0xbyt4, @0xharryriddle, @1000Delta, @1RB, @29206394, @A-kamal, @aashizpoudel, @Abd0r,
@adybag14-cyber, @AgentArcLab, @ahmedbadr3, @AhmetArif0, @alblez, @Alex-yang00, @ALIYILD, @AllynSheep,
@alt-glitch, @am423, @amathxbt, @amethystani, @ArecaNon, @Arkmusn, @askclaw-vesper, @AsoTora, @austinpickett,
@aydnOktay, @ayushere, @baocin, @Bartok9, @benbarclay, @BennetYrWang, @Bihruze, @binhnt92, @briandevans,
@brooklynnicholson, @btorresgil, @buntingszn, @CalmProton, @chrisworksai, @CoinTheHat, @dandacompany, @Dangooy,
@DanielLSM, @David-0x221Eight, @ddupont808, @dhruv-saxena, @diablozzc, @dlkakbs, @dmahan93, @dmnkhorvath,
@domtriola, @donrhmexe, @Dusk1e, @eloklam, @emozilla, @ephron-ren, @erenkarakus, @EthanGuo-coder,
@ethernet8023, @evgyur, @explainanalyze, @fahdad, @fr33d3m0n, @Freeman-Consulting, @freqyfreqy, @Frowtek,
@fu576, @github-actions[bot], @gnanirahulnutakki, @GodsBoy, @guglielmofonda, @Gutslabs, @hanzckernel,
@heathley, @hekaru-agent, @helix4u, @HenkDz, @HiddenPuppy, @hllqkb, @hrygo, @HuangYuChuh, @Hugo-SEQUIER, @HxT9,
@iacker, @InB4DevOps, @isaachuangGMICLOUD, @iuyup, @Jaaneek, @jackey8616, @jackjin1997, @Jaggia, @jak983464779,
@jelrod27, @jethac, @JithendraNara, @johnisag, @Julientalbot, @Jwd-gity, @kallidean, @keyuyuan, @kfa-ai,
@kidonng, @KiraKatana, @kjames2001, @konsisumer, @Korkyzer, @kshitijk4poor, @KvnGz, @lars-hagen, @leehack,
@leepoweii, @LeonSGP43, @li0near, @libo1106, @liquidchen, @littlewwwhite, @liuhao1024, @liyoungc, @luandiasrj,
@luoyuctl, @luyao618, @magic524, @mbac, @McClean, @memosr, @Mibayy, @ming1523, @mizgyo, @mrshu, @ms-alan,
@MustafaKara7, @nederev, @nicoechaniz, @nidhi-singh02, @nightcityblade, @nik1t7n, @Ninso112, @NivOO5,
@novax635, @nv-kasikritc, @oferlaor, @oswaldb22, @outdoorsea, @oxngon, @PaTTeeL, @pearjelly, @pefontana,
@perng, @PhilipAD, @phuongvm, @polkn, @Prasanna28Devadiga, @princepal9120, @pty819, @purzbeats, @Quarkex,
@quocanh261997, @qWaitCrypto, @Qwinty, @rahimsais, @raymaylee, @ReqX, @rewbs, @RhombusMaximus, @rob-maron,
@Ruzzgar, @ryptotalent, @Sanjays2402, @shannonsands, @shaun0927, @SiliconID, @silv-mt-holdings, @simpolism,
@smwbev, @soichiyo, @sprmn24, @steezkelly, @stephenschoettler, @Sylw3ster, @szymonclawd, @teyrebaz33,
@Tianyu199509, @Tranquil-Flow, @TreyDong, @TurgutKural, @tw2818, @tymrtn, @uzunkuyruk, @v1b3coder,
@vanthinh6886, @VinceZcrikl, @vKongv, @vominh1919, @voteblake, @VTRiot, @wali-reheman, @wesleysimplicio,
@wilsen0, @WorldWriter, @worlldz, @wuli666, @wuwuzhijing, @Wysie, @XiaoXiao0221, @xieNniu, @xxxigm, @yehuosi,
@ygd58, @yifengingit, @yuga-hashimoto, @zccyman, @ZeterMordio, @Zhekinmaksim, @zhengyn0001
Also: @Nagatha (Claude Opus 4.7).
---
**Full Changelog**: [v2026.5.7...v2026.5.16](https://github.com/NousResearch/hermes-agent/compare/v2026.5.7...v2026.5.16)

View file

View file

@ -0,0 +1,288 @@
# bootstrap_browser_tools.ps1 — install agent-browser + Playwright Chromium
# into ~/.hermes/node/ for use by Hermes Agent's browser tools on Windows.
#
# Targets the registry-install path: users who got Hermes via
# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone,
# so the install.ps1 `npm install`-in-repo flow doesn't apply. This script
# is a self-contained, idempotent slice of install.ps1's browser block.
#
# Usage:
# .\bootstrap_browser_tools.ps1 # use defaults
# .\bootstrap_browser_tools.ps1 -Yes # accept Chromium download
# .\bootstrap_browser_tools.ps1 -SkipChromium # Node + agent-browser only
#
# Idempotent: re-running this is safe and fast.
[CmdletBinding()]
param(
[switch]$Yes,
[switch]$SkipChromium
)
$ErrorActionPreference = "Stop"
$NodeVersion = "22"
# ─────────────────────────────────────────────────────────────────────────
# Logging
# ─────────────────────────────────────────────────────────────────────────
function Write-Info { param([string]$msg) Write-Host "[*] $msg" -ForegroundColor Cyan }
function Write-Success { param([string]$msg) Write-Host "[+] $msg" -ForegroundColor Green }
function Write-Warn { param([string]$msg) Write-Host "[!] $msg" -ForegroundColor Yellow }
function Write-Err { param([string]$msg) Write-Host "[x] $msg" -ForegroundColor Red }
# ─────────────────────────────────────────────────────────────────────────
# Paths
# ─────────────────────────────────────────────────────────────────────────
$HermesHome = $env:HERMES_HOME
if (-not $HermesHome) {
$HermesHome = Join-Path $env:USERPROFILE ".hermes"
}
$NodePrefix = Join-Path $HermesHome "node"
# ─────────────────────────────────────────────────────────────────────────
# Step 1: Node.js
# ─────────────────────────────────────────────────────────────────────────
function Resolve-NpmExe {
# Same gotcha as install.ps1: prefer npm.cmd over npm.ps1 so the
# PowerShell execution policy doesn't block us.
$cmd = Get-Command npm -ErrorAction SilentlyContinue
if (-not $cmd) { return $null }
$npmExe = $cmd.Source
if ($npmExe -like "*.ps1") {
$sibling = Join-Path (Split-Path $npmExe -Parent) "npm.cmd"
if (Test-Path $sibling) { return $sibling }
}
return $npmExe
}
function Resolve-NpxExe {
$cmd = Get-Command npx -ErrorAction SilentlyContinue
if (-not $cmd) { return $null }
$npxExe = $cmd.Source
if ($npxExe -like "*.ps1") {
$sibling = Join-Path (Split-Path $npxExe -Parent) "npx.cmd"
if (Test-Path $sibling) { return $sibling }
}
return $npxExe
}
function Ensure-Node {
# System Node on PATH?
$sysNode = Get-Command node -ErrorAction SilentlyContinue
if ($sysNode) {
try {
$v = & $sysNode.Source --version
$major = [int]($v -replace '^v(\d+).*', '$1')
if ($major -ge 20) {
Write-Success "Node.js $v found on PATH"
return
}
Write-Warn "Node.js $v is older than v20 — installing managed Node."
} catch {
Write-Warn "Failed to query Node version: $_"
}
}
# Hermes-managed Node?
$managedNode = Join-Path $NodePrefix "node.exe"
if (Test-Path $managedNode) {
$v = & $managedNode --version
Write-Success "Node.js $v found (Hermes-managed at $NodePrefix)"
# Prepend to current-process PATH so subsequent npm/npx calls find it.
$env:PATH = "$NodePrefix;$env:PATH"
return
}
Write-Info "Installing Node.js $NodeVersion LTS into $NodePrefix ..."
$arch = if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" }
$indexUrl = "https://nodejs.org/dist/latest-v${NodeVersion}.x/"
try {
$indexPage = Invoke-WebRequest -Uri $indexUrl -UseBasicParsing
$matches = [regex]::Matches($indexPage.Content, "node-v${NodeVersion}\.\d+\.\d+-win-${arch}\.zip")
if ($matches.Count -eq 0) {
Write-Err "Could not locate Node.js $NodeVersion zip for win-$arch"
throw "no tarball"
}
$zipName = $matches[0].Value
$zipUrl = "$indexUrl$zipName"
$tmpDir = Join-Path $env:TEMP "hermes-node-$([guid]::NewGuid().ToString('N'))"
New-Item -ItemType Directory -Force -Path $tmpDir | Out-Null
$zipPath = Join-Path $tmpDir $zipName
Write-Info "Downloading $zipName ..."
Invoke-WebRequest -Uri $zipUrl -OutFile $zipPath -UseBasicParsing
Expand-Archive -Path $zipPath -DestinationPath $tmpDir -Force
$extracted = Get-ChildItem -Path $tmpDir -Directory | Where-Object { $_.Name -like "node-v*" } | Select-Object -First 1
if (-not $extracted) { Write-Err "Node.js extraction failed"; throw "extract" }
if (Test-Path $NodePrefix) { Remove-Item -Recurse -Force $NodePrefix }
New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null
Move-Item -Path $extracted.FullName -Destination $NodePrefix
Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue
$env:PATH = "$NodePrefix;$env:PATH"
$v = & "$NodePrefix\node.exe" --version
Write-Success "Node.js $v installed to $NodePrefix"
} catch {
Write-Err "Node.js install failed: $_"
Write-Info "Install Node 20+ manually from https://nodejs.org/en/download/ and re-run."
throw
}
}
# ─────────────────────────────────────────────────────────────────────────
# Step 2: agent-browser
# ─────────────────────────────────────────────────────────────────────────
function Ensure-AgentBrowser {
$npmExe = Resolve-NpmExe
if (-not $npmExe) {
Write-Err "npm not on PATH after Node install — aborting"
throw "npm missing"
}
# Already installed?
$existing = Get-Command agent-browser -ErrorAction SilentlyContinue
if ($existing) {
Write-Success "agent-browser already installed at $($existing.Source)"
return
}
# When the user has system Node (winget / installer-based), `npm install
# -g` writes to a directory that may require admin rights. Force the
# prefix to the user-writable Hermes-managed Node directory so we never
# need elevation and the agent can always find the result. Mirrors the
# bash bootstrap's `--prefix $NODE_PREFIX` strategy.
New-Item -ItemType Directory -Force -Path $NodePrefix | Out-Null
Write-Info "Installing agent-browser (npm, prefix=$NodePrefix)..."
& $npmExe install -g --prefix $NodePrefix --silent `
"agent-browser@^0.26.0" "@askjo/camofox-browser@^1.5.2"
if ($LASTEXITCODE -ne 0) {
Write-Err "npm install -g agent-browser failed (exit $LASTEXITCODE)"
throw "npm install"
}
# Windows npm global installs drop shims at $NodePrefix\ root (not bin/).
# Prepend to PATH so any subsequent npx call resolves them.
$env:PATH = "$NodePrefix;$env:PATH"
Write-Success "agent-browser installed to $NodePrefix"
}
# ─────────────────────────────────────────────────────────────────────────
# Step 3: Playwright Chromium
# ─────────────────────────────────────────────────────────────────────────
function Find-SystemBrowser {
$candidates = @(
"C:\Program Files\Google\Chrome\Application\chrome.exe",
"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
"C:\Program Files\Chromium\Application\chromium.exe",
"${env:LOCALAPPDATA}\Google\Chrome\Application\chrome.exe",
"${env:LOCALAPPDATA}\Chromium\Application\chromium.exe"
)
foreach ($p in $candidates) {
if (Test-Path $p) { return $p }
}
# Edge — Chromium-based, agent-browser can use it
foreach ($p in @(
"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
"C:\Program Files\Microsoft\Edge\Application\msedge.exe"
)) {
if (Test-Path $p) { return $p }
}
return $null
}
function Write-BrowserEnv {
param([string]$BrowserPath)
$envFile = Join-Path $HermesHome ".env"
New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null
if (Test-Path $envFile) {
$existing = Get-Content $envFile -Raw -ErrorAction SilentlyContinue
if ($existing -and ($existing -match "(?m)^AGENT_BROWSER_EXECUTABLE_PATH=")) {
return
}
}
Add-Content -Path $envFile -Value ""
Add-Content -Path $envFile -Value "# Hermes Agent browser tools — use the system Chrome/Chromium/Edge binary."
Add-Content -Path $envFile -Value "AGENT_BROWSER_EXECUTABLE_PATH=$BrowserPath"
Write-Success "Configured browser tools to use $BrowserPath"
}
function Confirm-ChromiumDownload {
if ($Yes) { return $true }
if (-not [Environment]::UserInteractive) {
Write-Warn "Non-interactive shell — skipping Chromium prompt."
Write-Info "Re-run with -Yes to install Chromium (~400 MB download)."
return $false
}
$reply = Read-Host "Install Playwright Chromium (~400 MB download)? [y/N]"
return ($reply -match "^(y|yes)$")
}
function Ensure-Chromium {
if ($SkipChromium) {
Write-Info "Skipping Chromium install (-SkipChromium)"
return
}
# agent-browser on Windows expects a Playwright-managed Chromium under
# %LOCALAPPDATA%\ms-playwright. The system-browser shortcut from the
# Linux/macOS path doesn't apply the same way on Windows — Playwright's
# default launch path won't pick up a stock Chrome install without an
# explicit AGENT_BROWSER_EXECUTABLE_PATH. We still offer it as a
# fallback when the user doesn't want the download.
if (-not (Confirm-ChromiumDownload)) {
$sys = Find-SystemBrowser
if ($sys) {
Write-Info "Using system browser at $sys (Chromium download skipped)."
Write-BrowserEnv -BrowserPath $sys
} else {
Write-Info "Chromium install skipped. Browser tools won't launch until"
Write-Info "Chromium is installed or AGENT_BROWSER_EXECUTABLE_PATH is set."
}
return
}
$npxExe = Resolve-NpxExe
if (-not $npxExe) {
Write-Err "npx not on PATH — cannot install Playwright Chromium"
throw "npx missing"
}
Write-Info "Installing Playwright Chromium (~400 MB) ..."
& $npxExe --yes playwright install chromium
if ($LASTEXITCODE -ne 0) {
Write-Err "Playwright Chromium install failed (exit $LASTEXITCODE)"
Write-Info "Try again later: npx --yes playwright install chromium"
throw "playwright"
}
Write-Success "Playwright Chromium installed"
}
# ─────────────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────────────
Write-Info "Hermes Agent: bootstrapping browser tools"
Write-Info " HERMES_HOME = $HermesHome"
Write-Info " OS = Windows"
Ensure-Node
Ensure-AgentBrowser
Ensure-Chromium
Write-Success "Browser tools setup complete."
Write-Info "Hermes Agent will pick up agent-browser from $NodePrefix on next launch."

View file

@ -0,0 +1,399 @@
#!/usr/bin/env bash
#
# bootstrap_browser_tools.sh — install agent-browser + Playwright Chromium
# into ~/.hermes/node/ for use by Hermes Agent's browser tools.
#
# Targets the registry-install path: users who got Hermes via
# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone,
# so the install.sh `npm install`-in-repo flow doesn't apply. This script
# is a self-contained, idempotent slice of install.sh's browser block —
# safe to run from `hermes-acp --setup-browser`, from a fresh terminal,
# or from install.sh itself (it's a no-op when everything is already in place).
#
# Usage:
# bootstrap_browser_tools.sh # use defaults
# bootstrap_browser_tools.sh --yes # accept the ~400MB Chromium download
# bootstrap_browser_tools.sh --skip-chromium # only install Node + agent-browser
# HERMES_HOME=/custom/path bootstrap_browser_tools.sh
#
# Idempotent: re-running this is safe and fast. Each step checks whether
# the work is already done.
set -euo pipefail
# ─────────────────────────────────────────────────────────────────────────
# Config
# ─────────────────────────────────────────────────────────────────────────
NODE_VERSION="22"
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
NODE_PREFIX="$HERMES_HOME/node"
SKIP_CHROMIUM=false
ASSUME_YES=false
# ─────────────────────────────────────────────────────────────────────────
# Logging
# ─────────────────────────────────────────────────────────────────────────
if [ -t 1 ]; then
C_GREEN='\033[0;32m'
C_YELLOW='\033[0;33m'
C_BLUE='\033[0;34m'
C_RED='\033[0;31m'
C_RESET='\033[0m'
else
C_GREEN='' ; C_YELLOW='' ; C_BLUE='' ; C_RED='' ; C_RESET=''
fi
log_info() { printf "${C_BLUE}[*]${C_RESET} %s\n" "$*"; }
log_success() { printf "${C_GREEN}[✓]${C_RESET} %s\n" "$*"; }
log_warn() { printf "${C_YELLOW}[!]${C_RESET} %s\n" "$*" >&2; }
log_error() { printf "${C_RED}[✗]${C_RESET} %s\n" "$*" >&2; }
# ─────────────────────────────────────────────────────────────────────────
# Arg parsing
# ─────────────────────────────────────────────────────────────────────────
while [ $# -gt 0 ]; do
case "$1" in
--skip-chromium) SKIP_CHROMIUM=true ;;
--yes|-y) ASSUME_YES=true ;;
-h|--help)
cat <<EOF
Bootstrap Hermes Agent browser tools.
Installs Node.js (into ~/.hermes/node/), the agent-browser npm package,
and the Playwright Chromium browser engine.
Options:
--skip-chromium Install Node + agent-browser but skip Chromium download
--yes, -y Accept the ~400 MB Chromium download without prompting
-h, --help Show this help
Environment:
HERMES_HOME Override Hermes data dir (default: \$HOME/.hermes)
EOF
exit 0
;;
*)
log_error "Unknown option: $1"
exit 2
;;
esac
shift
done
# ─────────────────────────────────────────────────────────────────────────
# OS / arch detection
# ─────────────────────────────────────────────────────────────────────────
OS="unknown"
case "$(uname -s)" in
Linux*) OS="linux" ;;
Darwin*) OS="macos" ;;
*)
log_error "Unsupported OS: $(uname -s)"
log_info "Windows users: run scripts/bootstrap_browser_tools.ps1 in PowerShell."
exit 1
;;
esac
NODE_ARCH=""
case "$(uname -m)" in
x86_64) NODE_ARCH="x64" ;;
aarch64|arm64) NODE_ARCH="arm64" ;;
armv7l) NODE_ARCH="armv7l" ;;
*)
log_error "Unsupported architecture: $(uname -m)"
exit 1
;;
esac
NODE_OS=""
case "$OS" in
linux) NODE_OS="linux" ;;
macos) NODE_OS="darwin" ;;
esac
DISTRO=""
if [ -f /etc/os-release ]; then
# shellcheck disable=SC1091
. /etc/os-release
DISTRO="${ID:-}"
fi
# ─────────────────────────────────────────────────────────────────────────
# Step 1: Node.js
# ─────────────────────────────────────────────────────────────────────────
ensure_node() {
# Already on PATH and recent enough?
if command -v node >/dev/null 2>&1; then
local found_ver major
found_ver=$(node --version 2>/dev/null)
major=$(echo "$found_ver" | sed -E 's/^v([0-9]+).*/\1/')
if [ -n "$major" ] && [ "$major" -ge 20 ]; then
log_success "Node.js $found_ver found on PATH"
return 0
fi
log_warn "Node.js $found_ver is older than v20 — installing managed Node."
fi
if [ -x "$NODE_PREFIX/bin/node" ]; then
local found_ver
found_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?")
export PATH="$NODE_PREFIX/bin:$PATH"
log_success "Node.js $found_ver found (Hermes-managed at $NODE_PREFIX)"
return 0
fi
log_info "Installing Node.js $NODE_VERSION LTS into $NODE_PREFIX ..."
local index_url="https://nodejs.org/dist/latest-v${NODE_VERSION}.x/"
local tarball_name
tarball_name=$(curl -fsSL "$index_url" \
| grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.xz" \
| head -1)
if [ -z "$tarball_name" ]; then
tarball_name=$(curl -fsSL "$index_url" \
| grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.gz" \
| head -1)
fi
if [ -z "$tarball_name" ]; then
log_error "Could not locate Node.js $NODE_VERSION tarball for $NODE_OS-$NODE_ARCH"
log_info "Install Node 20+ manually: https://nodejs.org/en/download/"
return 1
fi
local tmp_dir
tmp_dir=$(mktemp -d)
trap 'rm -rf "$tmp_dir"' RETURN
log_info "Downloading $tarball_name ..."
if ! curl -fsSL "${index_url}${tarball_name}" -o "$tmp_dir/$tarball_name"; then
log_error "Node.js download failed"
return 1
fi
if [[ "$tarball_name" == *.tar.xz ]]; then
tar xf "$tmp_dir/$tarball_name" -C "$tmp_dir"
else
tar xzf "$tmp_dir/$tarball_name" -C "$tmp_dir"
fi
local extracted_dir
extracted_dir=$(ls -d "$tmp_dir"/node-v* 2>/dev/null | head -1)
if [ ! -d "$extracted_dir" ]; then
log_error "Node.js extraction failed"
return 1
fi
mkdir -p "$HERMES_HOME"
rm -rf "$NODE_PREFIX"
mv "$extracted_dir" "$NODE_PREFIX"
export PATH="$NODE_PREFIX/bin:$PATH"
local installed_ver
installed_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?")
log_success "Node.js $installed_ver installed to $NODE_PREFIX"
}
# ─────────────────────────────────────────────────────────────────────────
# Step 2: agent-browser + @askjo/camofox-browser via global npm install
# ─────────────────────────────────────────────────────────────────────────
ensure_agent_browser() {
if ! command -v npm >/dev/null 2>&1; then
log_error "npm not on PATH after Node install — aborting"
return 1
fi
# _find_agent_browser() in tools/browser_tool.py walks ~/.hermes/node/bin
# plus a few standard prefixes, so installing globally into the managed
# Node prefix is enough — no PATH manipulation needed from the agent side.
if [ -x "$NODE_PREFIX/bin/agent-browser" ] || command -v agent-browser >/dev/null 2>&1; then
log_success "agent-browser already installed"
return 0
fi
# When the system's `npm` resolves to a root-owned prefix (e.g.
# /usr/lib/node_modules), `npm install -g` fails with EACCES without
# sudo. Force the prefix to the user-writable Hermes-managed Node
# directory so we never need sudo and the agent can always find the
# result. If we installed Node ourselves above, this is a no-op
# (managed Node already uses $NODE_PREFIX). If the user has system
# Node, we still drop agent-browser under $NODE_PREFIX/bin/ — which
# is exactly where _browser_candidate_path_dirs() looks first.
mkdir -p "$NODE_PREFIX"
log_info "Installing agent-browser (npm, prefix=$NODE_PREFIX)..."
if ! npm install -g --prefix "$NODE_PREFIX" --silent \
agent-browser@^0.26.0 \
"@askjo/camofox-browser@^1.5.2"; then
log_error "npm install -g agent-browser failed"
return 1
fi
# macOS/Linux global installs place the shim into $NODE_PREFIX/bin/.
# Add it to PATH for any subsequent steps (npx playwright).
export PATH="$NODE_PREFIX/bin:$PATH"
log_success "agent-browser installed to $NODE_PREFIX/bin/"
}
# ─────────────────────────────────────────────────────────────────────────
# Step 3: Playwright Chromium
# ─────────────────────────────────────────────────────────────────────────
confirm_chromium_download() {
if [ "$ASSUME_YES" = true ]; then return 0; fi
if [ ! -t 0 ]; then
log_warn "Non-interactive shell — skipping Chromium prompt."
log_info "Re-run with --yes to install Chromium (~400 MB download)."
return 1
fi
printf "Install Playwright Chromium (~400 MB download)? [y/N] "
local reply=""
read -r reply || reply=""
case "$reply" in
y|Y|yes|YES) return 0 ;;
*) return 1 ;;
esac
}
# Detect a usable system Chrome/Chromium. agent-browser's Chrome engine can
# use it instead of downloading Playwright's bundled Chromium, saving the
# download cost. Returns the path or empty string.
find_system_browser() {
local candidate
for candidate in google-chrome google-chrome-stable chromium chromium-browser chrome; do
if command -v "$candidate" >/dev/null 2>&1; then
command -v "$candidate"
return 0
fi
done
# macOS app-bundle locations
if [ "$OS" = "macos" ]; then
for candidate in \
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
"/Applications/Chromium.app/Contents/MacOS/Chromium" ; do
if [ -x "$candidate" ]; then
echo "$candidate"
return 0
fi
done
fi
return 1
}
write_browser_env() {
local browser_path="$1"
local env_file="$HERMES_HOME/.env"
mkdir -p "$HERMES_HOME"
if [ -f "$env_file" ] && grep -q "^AGENT_BROWSER_EXECUTABLE_PATH=" "$env_file"; then
return 0
fi
{
echo ""
echo "# Hermes Agent browser tools — use the system Chrome/Chromium binary."
echo "AGENT_BROWSER_EXECUTABLE_PATH=$browser_path"
} >> "$env_file"
log_success "Configured browser tools to use $browser_path"
}
ensure_chromium() {
if [ "$SKIP_CHROMIUM" = true ]; then
log_info "Skipping Chromium install (--skip-chromium)"
return 0
fi
local system_browser
system_browser="$(find_system_browser 2>/dev/null || true)"
if [ -n "$system_browser" ]; then
log_success "Found system browser: $system_browser"
log_info "Skipping Playwright Chromium download; agent-browser will use it."
write_browser_env "$system_browser"
return 0
fi
if ! confirm_chromium_download; then
log_info "Chromium install skipped. Browser tools will only work if you"
log_info "set AGENT_BROWSER_EXECUTABLE_PATH or install Chromium later."
return 0
fi
if ! command -v npx >/dev/null 2>&1; then
log_error "npx not on PATH — cannot install Playwright Chromium"
return 1
fi
log_info "Installing Playwright Chromium (~400 MB) ..."
# On apt-based distros, --with-deps requires sudo. Try non-interactively
# only — never prompt — and fall back to the bare browser-only install.
local installed=false
if [ "$OS" = "linux" ]; then
case "$DISTRO" in
ubuntu|debian|raspbian|pop|linuxmint|elementary|zorin|kali|parrot)
if [ "$(id -u)" -eq 0 ] || (command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null); then
log_info "Installing system deps with --with-deps (sudo available)"
if npx --yes playwright install --with-deps chromium; then
installed=true
fi
else
log_warn "sudo not available non-interactively — installing Chromium without system deps."
log_info "If browser tools fail to launch, an administrator should run:"
log_info " sudo npx playwright install-deps chromium"
fi
;;
arch|manjaro|cachyos|endeavouros|garuda)
log_info "Arch-family system dependencies are not auto-installed."
log_info "If launch fails, run: sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
;;
fedora|rhel|centos|rocky|alma)
log_info "Fedora/RHEL system dependencies are not auto-installed."
log_info "If launch fails, run: sudo dnf install nss atk at-spi2-core cups-libs libdrm libxkbcommon mesa-libgbm pango cairo alsa-lib"
;;
opensuse*|sles)
log_info "openSUSE system dependencies are not auto-installed."
;;
esac
fi
if [ "$installed" = false ]; then
if npx --yes playwright install chromium; then
installed=true
fi
fi
if [ "$installed" = true ]; then
log_success "Playwright Chromium installed"
else
log_error "Playwright Chromium install failed"
log_info "Try again later: npx --yes playwright install chromium"
return 1
fi
}
# ─────────────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────────────
main() {
log_info "Hermes Agent: bootstrapping browser tools"
log_info " HERMES_HOME = $HERMES_HOME"
log_info " OS / arch = $NODE_OS-$NODE_ARCH ${DISTRO:+($DISTRO)}"
ensure_node
ensure_agent_browser
ensure_chromium
log_success "Browser tools setup complete."
log_info "Hermes Agent will pick up agent-browser from $NODE_PREFIX/bin/ on next launch."
}
main

View file

@ -124,6 +124,20 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
action="store_true",
help="Run interactive Hermes provider/model setup for ACP terminal auth",
)
parser.add_argument(
"--setup-browser",
action="store_true",
help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ "
"for browser tool support. Idempotent.",
)
parser.add_argument(
"--yes",
"-y",
action="store_true",
dest="assume_yes",
help="Accept all prompts (currently used by --setup-browser to skip the "
"~400 MB Chromium download confirmation).",
)
return parser.parse_args(argv)
@ -150,6 +164,75 @@ def _run_setup() -> None:
finally:
sys.argv = old_argv
# Offer browser-tools install as a follow-up. The terminal auth method
# is the one supported first-run UX for registry installs, so this is
# the natural moment to ask. Skip silently if stdin isn't a TTY (the
# answer can't be collected anyway).
if not sys.stdin.isatty():
return
try:
reply = input(
"\nInstall browser tools? Downloads agent-browser (npm) and "
"optionally Playwright Chromium (~400 MB). [y/N] "
).strip().lower()
except (EOFError, KeyboardInterrupt):
return
if reply in {"y", "yes"}:
_run_setup_browser(assume_yes=False)
def _run_setup_browser(assume_yes: bool = False) -> int:
"""Bootstrap agent-browser + Playwright Chromium for the registry-install path.
Shells out to the bundled platform-specific bootstrap script
(acp_adapter/bootstrap/bootstrap_browser_tools.{sh,ps1}) so the install
logic lives in one place readable, debuggable, and shareable with
install.sh / install.ps1 if we ever want to call it from there too.
Returns the script's exit code (0 on success).
"""
import platform
import subprocess
bootstrap_dir = Path(__file__).resolve().parent / "bootstrap"
if platform.system() == "Windows":
script = bootstrap_dir / "bootstrap_browser_tools.ps1"
if not script.is_file():
print(
f"Bootstrap script not found at {script} — wheel may be incomplete.",
file=sys.stderr,
)
return 1
cmd = [
"powershell.exe",
"-NoProfile",
"-ExecutionPolicy", "Bypass",
"-File", str(script),
]
if assume_yes:
cmd.append("-Yes")
else:
script = bootstrap_dir / "bootstrap_browser_tools.sh"
if not script.is_file():
print(
f"Bootstrap script not found at {script} — wheel may be incomplete.",
file=sys.stderr,
)
return 1
cmd = ["bash", str(script)]
if assume_yes:
cmd.append("--yes")
# stdio is inherited so the user sees the bootstrap's progress live.
try:
result = subprocess.run(cmd, check=False)
except FileNotFoundError as exc:
# bash / powershell.exe not on PATH
print(f"Could not launch browser bootstrap: {exc}", file=sys.stderr)
return 1
return result.returncode
def main(argv: list[str] | None = None) -> None:
"""Entry point: load env, configure logging, run the ACP agent."""
@ -163,6 +246,11 @@ def main(argv: list[str] | None = None) -> None:
if args.setup:
_run_setup()
return
if args.setup_browser:
rc = _run_setup_browser(assume_yes=args.assume_yes)
if rc != 0:
sys.exit(rc)
return
_setup_logging()
_load_env()

View file

@ -14,6 +14,7 @@ from collections import deque
from typing import Any, Callable, Deque, Dict
import acp
from acp.schema import AgentPlanUpdate, PlanEntry
from .tools import (
build_tool_complete,
@ -24,6 +25,65 @@ from .tools import (
logger = logging.getLogger(__name__)
def _json_loads_maybe_prefix(value: str) -> Any:
"""Parse a JSON object even when Hermes appended a human hint after it."""
text = value.strip()
try:
return json.loads(text)
except Exception:
decoder = json.JSONDecoder()
data, _ = decoder.raw_decode(text)
return data
def _build_plan_update_from_todo_result(result: Any) -> AgentPlanUpdate | None:
"""Translate Hermes' todo tool result into ACP's native plan update.
Zed renders ``sessionUpdate: plan`` as its first-class task/todo panel. The
Hermes agent already maintains task state through the ``todo`` tool, so the
ACP adapter should expose that state natively instead of only as a generic
tool-call transcript block.
"""
if not isinstance(result, str) or not result.strip():
return None
try:
data = _json_loads_maybe_prefix(result)
except Exception:
return None
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
return None
todos = data["todos"]
if not todos:
return AgentPlanUpdate(session_update="plan", entries=[])
status_map = {
"pending": "pending",
"in_progress": "in_progress",
"completed": "completed",
# ACP plans only support pending/in_progress/completed. Preserve
# cancelled tasks as terminal entries instead of dropping them and
# making the client's full-list replacement lose visible context.
"cancelled": "completed",
}
entries: list[PlanEntry] = []
for item in todos:
if not isinstance(item, dict):
continue
content = str(item.get("content") or item.get("id") or "").strip()
if not content:
continue
raw_status = str(item.get("status") or "pending").strip()
status = status_map.get(raw_status, "pending")
if raw_status == "cancelled":
content = f"[cancelled] {content}"
entries.append(PlanEntry(content=content, priority="medium", status=status))
return AgentPlanUpdate(session_update="plan", entries=entries)
def _send_update(
conn: acp.Client,
session_id: str,
@ -31,10 +91,17 @@ def _send_update(
update: Any,
) -> None:
"""Fire-and-forget an ACP session update from a worker thread."""
from agent.async_utils import safe_schedule_threadsafe
future = safe_schedule_threadsafe(
conn.session_update(session_id, update),
loop,
logger=logger,
log_message="Failed to send ACP update",
)
if future is None:
return
try:
future = asyncio.run_coroutine_threadsafe(
conn.session_update(session_id, update), loop
)
future.result(timeout=5)
except Exception:
logger.debug("Failed to send ACP update", exc_info=True)
@ -168,6 +235,10 @@ def make_step_cb(
snapshot=meta.get("snapshot"),
)
_send_update(conn, session_id, loop, update)
if tool_name == "todo":
plan_update = _build_plan_update_from_todo_result(result)
if plan_update is not None:
_send_update(conn, session_id, loop, plan_update)
if not queue:
tool_call_ids.pop(tool_name, None)

View file

@ -111,21 +111,28 @@ def make_approval_callback(
allow_permanent: bool = True,
**_: object,
) -> str:
from agent.async_utils import safe_schedule_threadsafe
options = _build_permission_options(allow_permanent=allow_permanent)
future = None
tool_call = _build_permission_tool_call(command, description)
coro = request_permission_fn(
session_id=session_id,
tool_call=tool_call,
options=options,
)
future = safe_schedule_threadsafe(
coro, loop,
logger=logger,
log_message="Permission request: failed to schedule on loop",
)
if future is None:
return "deny"
try:
tool_call = _build_permission_tool_call(command, description)
coro = request_permission_fn(
session_id=session_id,
tool_call=tool_call,
options=options,
)
future = asyncio.run_coroutine_threadsafe(coro, loop)
response = future.result(timeout=timeout)
except (FutureTimeout, Exception) as exc:
if future is not None:
future.cancel()
future.cancel()
logger.warning("Permission request timed out or failed: %s", exc)
return "deny"

View file

@ -18,6 +18,7 @@ import acp
from acp.schema import (
AgentCapabilities,
AgentMessageChunk,
AgentThoughtChunk,
AuthenticateResponse,
AvailableCommand,
AvailableCommandsUpdate,
@ -59,6 +60,7 @@ from acp.schema import (
from acp_adapter.auth import TERMINAL_SETUP_AUTH_METHOD_ID, build_auth_methods, detect_provider
from acp_adapter.events import (
_build_plan_update_from_todo_result,
make_message_cb,
make_step_cb,
make_thinking_cb,
@ -787,14 +789,20 @@ class HermesACPAgent(acp.Agent):
# ---- Session management -------------------------------------------------
@staticmethod
def _history_message_text(message: dict[str, Any]) -> str:
"""Extract displayable text from a persisted OpenAI-style message."""
content = message.get("content")
if isinstance(content, str):
return content.strip()
if isinstance(content, list):
def _flatten_history_text(value: Any) -> str:
"""Normalize a persisted text-or-text-parts value into a single string.
OpenAI-style assistant content (and provider reasoning fields) can arrive
as either a scalar string or a list of ``{"text": ...}`` /
``{"type": "text", "content": ...}`` parts. Whitespace-only inputs
collapse to an empty string so callers can treat ``""`` as "nothing to
emit".
"""
if isinstance(value, str):
return value.strip()
if isinstance(value, list):
parts: list[str] = []
for item in content:
for item in value:
if isinstance(item, dict):
text = item.get("text")
if isinstance(text, str):
@ -806,6 +814,29 @@ class HermesACPAgent(acp.Agent):
return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
return ""
@classmethod
def _history_message_text(cls, message: dict[str, Any]) -> str:
"""Extract displayable text from a persisted OpenAI-style message."""
return cls._flatten_history_text(message.get("content"))
@classmethod
def _history_reasoning_text(cls, message: dict[str, Any]) -> str:
"""Extract displayable reasoning/thought text from a persisted assistant message.
Returns the first non-empty value among ``reasoning_content`` (the
canonical field used by DeepSeek / Moonshot and the post-#16892
chat-completions normalizer) and ``reasoning`` (used by the codex
event projector and several other transports). Both keys are
actively written by live code paths, so neither branch is
deprecated they cover different transports rather than old vs.
new sessions.
"""
for key in ("reasoning_content", "reasoning"):
text = cls._flatten_history_text(message.get(key))
if text:
return text
return ""
@staticmethod
def _history_message_update(
*,
@ -826,6 +857,11 @@ class HermesACPAgent(acp.Agent):
)
return None
@staticmethod
def _history_thought_update(text: str) -> AgentThoughtChunk:
"""Build an ACP history replay update for an assistant thought."""
return acp.update_agent_thought_text(text)
@staticmethod
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
"""Extract function name/arguments from an OpenAI-style tool_call."""
@ -853,13 +889,17 @@ class HermesACPAgent(acp.Agent):
).strip()
async def _replay_session_history(self, state: SessionState) -> None:
"""Send persisted user/assistant history to clients during session/load.
"""Replay persisted user/assistant history during session/load or session/resume.
Zed's ACP history UI calls ``session/load`` after the user picks an item
from the Agents sidebar. The agent must then replay the full conversation
as user/assistant chunks plus reconstructed tool-call start/completion
notifications; merely restoring server-side state makes Hermes remember
context, but leaves the editor looking like a clean thread.
Invoked inline (``await``) from both ``load_session`` and
``resume_session`` so that spec-compliant ACP clients receive the
full transcript within the request's lifetime — see the comment at
the call sites for the rationale and prior-art citations.
Replays the conversation as user/assistant chunks, thinking-mode
thought chunks, plus reconstructed tool-call start/completion
notifications. Merely restoring server-side state makes Hermes
remember context, but leaves the editor looking like a clean thread.
"""
if not self._conn or not state.history:
return
@ -881,24 +921,37 @@ class HermesACPAgent(acp.Agent):
for message in state.history:
role = str(message.get("role") or "")
if role in {"user", "assistant"}:
if role == "user":
text = self._history_message_text(message)
if text:
update = self._history_message_update(role=role, text=text)
if update is not None and not await _send(update):
return
continue
if role == "assistant":
thought = self._history_reasoning_text(message)
if thought and not await _send(self._history_thought_update(thought)):
return
text = self._history_message_text(message)
if text:
update = self._history_message_update(role=role, text=text)
if update is not None and not await _send(update):
return
if role == "assistant" and isinstance(message.get("tool_calls"), list):
for tool_call in message["tool_calls"]:
if not isinstance(tool_call, dict):
continue
tool_call_id = self._history_tool_call_id(tool_call)
if not tool_call_id:
continue
tool_name, args = self._history_tool_call_name_args(tool_call)
active_tool_calls[tool_call_id] = (tool_name, args)
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
return
tool_calls = message.get("tool_calls")
if isinstance(tool_calls, list):
for tool_call in tool_calls:
if not isinstance(tool_call, dict):
continue
tool_call_id = self._history_tool_call_id(tool_call)
if not tool_call_id:
continue
tool_name, args = self._history_tool_call_name_args(tool_call)
active_tool_calls[tool_call_id] = (tool_name, args)
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
return
continue
if role == "tool":
@ -910,15 +963,20 @@ class HermesACPAgent(acp.Agent):
if not tool_call_id or not tool_name:
continue
result = message.get("content")
result_text = result if isinstance(result, str) else None
if not await _send(
build_tool_complete(
tool_call_id,
tool_name,
result=result if isinstance(result, str) else None,
result=result_text,
function_args=function_args,
)
):
return
if tool_name == "todo":
plan_update = _build_plan_update_from_todo_result(result_text)
if plan_update is not None and not await _send(plan_update):
return
async def new_session(
self,
@ -936,18 +994,6 @@ class HermesACPAgent(acp.Agent):
models=self._build_model_state(state),
)
def _schedule_history_replay(self, state: SessionState) -> None:
"""Replay persisted history after session/load or session/resume returns.
Zed only attaches streamed transcript/tool updates once the load/resume
response has completed. Sending replay notifications while the request is
still in-flight can make the server look correct in logs while the editor
drops or fails to attach the tool-call history.
"""
loop = asyncio.get_running_loop()
replay_coro = self._replay_session_history(state)
loop.call_soon(asyncio.create_task, replay_coro)
async def load_session(
self,
cwd: str,
@ -961,7 +1007,30 @@ class HermesACPAgent(acp.Agent):
return None
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Loaded session %s", session_id)
self._schedule_history_replay(state)
# Per ACP spec, `session/load` must stream the prior conversation back
# to the client via `session/update` notifications BEFORE responding,
# so the client receives the full transcript within the load request's
# lifetime. Awaiting the replay here matches Codex / Claude Code /
# OpenCode / Pi and the Zed client (which registers the session-update
# routing entry before awaiting the loadSession RPC specifically so
# in-call history replay updates can find the thread). Deferring this
# via `loop.call_soon` (as we did briefly in May 2026) broke every
# spec-compliant ACP client that measures notifications synchronously
# against the load response — see #12285 follow-up.
try:
await self._replay_session_history(state)
except Exception:
# Replay is best-effort — a corrupted or unexpected message shape
# must not turn a successful session/load into a JSON-RPC error
# response. Per-notification failures are already caught inside
# ``_replay_session_history``; this outer guard covers anything
# raised by the helpers themselves before reaching ``_send``.
logger.warning(
"ACP history replay raised during session/load for %s"
"load will still succeed, partial transcript may be missing",
session_id,
exc_info=True,
)
self._schedule_available_commands_update(session_id)
self._schedule_usage_update(state)
return LoadSessionResponse(models=self._build_model_state(state))
@ -979,7 +1048,18 @@ class HermesACPAgent(acp.Agent):
state = self.session_manager.create_session(cwd=cwd)
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Resumed session %s", state.session_id)
self._schedule_history_replay(state)
# See `load_session` above for the spec rationale — replay must
# complete before the response so clients receive the full transcript
# within the request's lifetime.
try:
await self._replay_session_history(state)
except Exception:
logger.warning(
"ACP history replay raised during session/resume for %s"
"resume will still succeed, partial transcript may be missing",
state.session_id,
exc_info=True,
)
self._schedule_available_commands_update(state.session_id)
self._schedule_usage_update(state)
return ResumeSessionResponse(models=self._build_model_state(state))

View file

@ -1060,10 +1060,12 @@ def _generate_pkce() -> tuple:
def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"""Run Hermes-native OAuth PKCE flow and return credential state."""
import secrets
import time
import webbrowser
verifier, challenge = _generate_pkce()
oauth_state = secrets.token_urlsafe(32)
params = {
"code": "true",
@ -1073,7 +1075,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"scope": _OAUTH_SCOPES,
"code_challenge": challenge,
"code_challenge_method": "S256",
"state": verifier,
"state": oauth_state,
}
from urllib.parse import urlencode
@ -1110,7 +1112,12 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
splits = auth_code.split("#")
code = splits[0]
state = splits[1] if len(splits) > 1 else ""
received_state = splits[1] if len(splits) > 1 else ""
# Validate state to prevent CSRF (RFC 6749 §10.12)
if received_state != oauth_state:
logger.warning("OAuth state mismatch — possible CSRF, aborting")
return None
try:
import urllib.request
@ -1119,7 +1126,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"grant_type": "authorization_code",
"client_id": _OAUTH_CLIENT_ID,
"code": code,
"state": state,
"state": received_state,
"redirect_uri": _OAUTH_REDIRECT_URI,
"code_verifier": verifier,
}).encode()

68
agent/async_utils.py Normal file
View file

@ -0,0 +1,68 @@
"""Async/sync bridging helpers.
The codebase has ~30 sites that schedule a coroutine onto an event loop from a
worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can
raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
and when it does the coroutine object is never awaited and never closed
which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
leaks the coroutine's frame until GC.
:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
scheduling failure, and returns ``None`` (instead of a half-formed future) so
callers can branch cleanly:
fut = safe_schedule_threadsafe(coro, loop)
if fut is None:
return # or fallback behavior
fut.result(timeout=5)
The helper deliberately does NOT also handle ``future.result()`` failures
that is a separate concern. Once the loop has accepted the coroutine, its
lifecycle belongs to the loop, not the scheduling thread.
"""
from __future__ import annotations
import asyncio
import logging
from concurrent.futures import Future
from typing import Any, Coroutine, Optional
_DEFAULT_LOGGER = logging.getLogger(__name__)
def safe_schedule_threadsafe(
coro: Coroutine[Any, Any, Any],
loop: Optional[asyncio.AbstractEventLoop],
*,
logger: Optional[logging.Logger] = None,
log_message: str = "Failed to schedule coroutine on loop",
log_level: int = logging.DEBUG,
) -> Optional[Future]:
"""Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
(e.g. the loop was closed during a shutdown race). In all failure paths
the coroutine is :meth:`close`-d so it does not trigger
``"coroutine was never awaited"`` warnings or leak its frame.
Callers retain full control over what to do with the returned future
(call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
fire-and-forget, etc.).
"""
log = logger if logger is not None else _DEFAULT_LOGGER
if loop is None:
if asyncio.iscoroutine(coro):
coro.close()
log.log(log_level, "%s: loop is None", log_message)
return None
try:
return asyncio.run_coroutine_threadsafe(coro, loop)
except Exception as exc:
if asyncio.iscoroutine(coro):
coro.close()
log.log(log_level, "%s: %s", log_message, exc)
return None

View file

@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict:
return headers
# NVIDIA NIM cloud billing attribution. Keep this host-gated because the
# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL.
_NVIDIA_NIM_CLOUD_HEADERS = {
"X-BILLING-INVOKE-ORIGIN": "HermesAgent",
}
def build_nvidia_nim_headers(base_url: str | None) -> dict:
"""Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic."""
if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"):
return dict(_NVIDIA_NIM_CLOUD_HEADERS)
return {}
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway's analytics.
from hermes_cli import __version__ as _HERMES_VERSION
@ -409,7 +424,7 @@ NOUS_EXTRA_BODY = _nous_extra_body()
auxiliary_is_nous: bool = False
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_OPENROUTER_MODEL = "google/gemini-2.5-flash"
_NOUS_MODEL = "google/gemini-3-flash-preview"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
@ -1254,6 +1269,58 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
return api_key, base_url
def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]:
"""Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients.
Prefer the credential pool, matching the main runtime/provider status
path. Some xAI OAuth logins live only as pool entries; falling straight
to the singleton auth-store resolver would make auxiliary tasks such as
compression report "no provider configured" even though ``hermes auth
status`` shows xAI OAuth as logged in.
Falls back to ``hermes_cli.auth``'s singleton runtime resolver for older
auth-store-only logins. Returns ``None`` if the user is not authenticated
with xAI Grok OAuth.
"""
try:
from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
pool = load_pool("xai-oauth")
if pool and pool.has_credentials():
entry = pool.select()
if entry is not None:
api_key = str(
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
or ""
).strip()
base_url = str(
os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
or getattr(entry, "runtime_base_url", None)
or getattr(entry, "base_url", None)
or DEFAULT_XAI_OAUTH_BASE_URL
).strip().rstrip("/")
if api_key and base_url:
return api_key, base_url
except Exception as exc:
logger.debug("Auxiliary xAI OAuth pool credential resolution failed: %s", exc)
try:
from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
creds = resolve_xai_oauth_runtime_credentials()
except Exception as exc:
logger.debug("Auxiliary xAI OAuth runtime credential resolution failed: %s", exc)
return None
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
if not api_key or not base_url:
return None
return api_key, base_url
def _read_codex_access_token() -> Optional[str]:
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
@ -1348,6 +1415,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
extra["default_headers"] = build_nvidia_nim_headers(base_url)
else:
try:
from providers import get_provider_profile as _gpf_aux
@ -1383,6 +1452,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
extra["default_headers"] = build_nvidia_nim_headers(base_url)
else:
try:
from providers import get_provider_profile as _gpf_aux2
@ -1402,7 +1473,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
or_key = explicit_api_key or _pool_runtime_api_key(entry)
@ -1412,7 +1483,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return OpenAI(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), _OPENROUTER_MODEL
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
if not or_key:
@ -1420,7 +1491,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
return None, None
logger.debug("Auxiliary client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=build_or_headers()), _OPENROUTER_MODEL
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
def _describe_openrouter_unavailable() -> str:
@ -1744,6 +1815,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
return _fallback_client, model
def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
"""Build a CodexAuxiliaryClient for an xAI Grok OAuth-authenticated session.
xAI's ``/v1/responses`` endpoint speaks the OpenAI Responses API, so we
wrap a plain ``OpenAI`` client in ``CodexAuxiliaryClient`` to translate
``chat.completions.create()`` calls into ``responses.stream()`` requests.
The caller must pass an explicit model pinning a default for Grok
would silently rot when xAI's allowlist drifts. Returns ``(None, None)``
when the user has not authenticated with xAI Grok OAuth.
"""
if not model:
logger.warning(
"Auxiliary client: xai-oauth requested without a model; "
"pass model explicitly (auxiliary.<task>.model in config.yaml)."
)
return None, None
resolved = _resolve_xai_oauth_for_aux()
if resolved is None:
return None, None
api_key, base_url = resolved
logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
real_client = OpenAI(api_key=api_key, base_url=base_url)
return CodexAuxiliaryClient(real_client, model), model
def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
"""Build a CodexAuxiliaryClient for an explicitly-requested model.
@ -2640,6 +2737,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
)
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"):
async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url)
else:
# Fall back to profile.default_headers for providers that declare
# client-level headers on their ProviderProfile (e.g. attribution
@ -2851,6 +2950,26 @@ def resolve_provider_client(
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# ── xAI Grok OAuth (loopback PKCE → Responses API) ───────────────
# Without this branch, an xai-oauth main provider falls through to the
# generic ``oauth_external`` arm below and returns ``(None, None)``,
# silently re-routing every auxiliary task (compression, web extract,
# session search, curator, etc.) to whatever Step-2 fallback the user
# has configured. Users on xAI Grok OAuth would then see surprise
# OpenRouter / Nous bills for side tasks they thought were running on
# their xAI subscription.
if provider == "xai-oauth":
client, default = _build_xai_oauth_aux_client(model)
if client is None:
logger.warning(
"resolve_provider_client: xai-oauth requested but no xAI "
"OAuth token found (run: hermes model -> xAI Grok OAuth — SuperGrok Subscription)"
)
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
if explicit_base_url:
@ -2881,6 +3000,8 @@ def resolve_provider_client(
extra["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
)
elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"):
extra["default_headers"] = build_nvidia_nim_headers(custom_base)
else:
# Fall back to profile.default_headers for providers that
# declare client-level attribution headers on their profile.
@ -2928,10 +3049,17 @@ def resolve_provider_client(
if custom_entry:
custom_base = custom_entry.get("base_url", "").strip()
custom_key = custom_entry.get("api_key", "").strip()
custom_key_env = custom_entry.get("key_env", "").strip()
custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
if not custom_key and custom_key_env:
custom_key = os.getenv(custom_key_env, "").strip()
custom_key = custom_key or "no-key-required"
if custom_key == "no-key-required":
logger.warning(
"resolve_provider_client: named custom provider %r has no resolvable "
"api_key — request will be sent with placeholder no-key-required "
"and will 401 on auth-required endpoints",
custom_entry.get("name") or provider,
)
# An explicit per-task api_mode override (from _resolve_task_provider_model)
# wins; otherwise fall back to what the provider entry declared.
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
@ -3079,6 +3207,8 @@ def resolve_provider_client(
headers.update(copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
))
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
headers.update(build_nvidia_nim_headers(base_url))
else:
# Fall back to profile.default_headers for providers that declare
# client-level attribution headers on their profile (e.g. GMI
@ -3201,6 +3331,8 @@ def resolve_provider_client(
return resolve_provider_client("nous", model, async_mode)
if provider == "openai-codex":
return resolve_provider_client("openai-codex", model, async_mode)
if provider == "xai-oauth":
return resolve_provider_client("xai-oauth", model, async_mode)
# Other OAuth providers not directly supported
logger.warning("resolve_provider_client: OAuth provider %s not "
"directly supported, try 'auto'", provider)
@ -3275,7 +3407,7 @@ def _resolve_strict_vision_backend(
if provider == "copilot":
return resolve_provider_client("copilot", model, is_vision=True)
if provider == "openrouter":
return _try_openrouter()
return _try_openrouter(model=model)
if provider == "nous":
return _try_nous(vision=True)
if provider == "openai-codex":

View file

@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
return default
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
def _chat_messages_to_responses_input(
messages: List[Dict[str, Any]],
*,
is_xai_responses: bool = False,
) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items.
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
rejects encrypted reasoning blobs minted by prior turns: the request
streams an ``error`` SSE frame before ``response.created`` and the
OpenAI SDK collapses it into a generic stream-ordering error. Native
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
keep the default off.
"""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
if role == "assistant":
# Replay encrypted reasoning items from previous turns
# so the API can maintain coherent reasoning chains.
#
# xAI OAuth (SuperGrok/Premium) rejects replayed
# ``encrypted_content`` reasoning items minted by prior
# turns — see _chat_messages_to_responses_input docstring.
# When ``is_xai_responses`` is set we drop the replay
# entirely; Grok still reasons on each turn server-side,
# we just don't try to thread the prior turn's encrypted
# blob back in.
codex_reasoning = msg.get("codex_reasoning_items")
has_codex_reasoning = False
if isinstance(codex_reasoning, list):
if isinstance(codex_reasoning, list) and not is_xai_responses:
for ri in codex_reasoning:
if isinstance(ri, dict) and ri.get("encrypted_content"):
item_id = ri.get("id")
@ -726,7 +747,7 @@ def _preflight_codex_api_kwargs(
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
"extra_headers", "extra_body",
}
normalized: Dict[str, Any] = {
"model": model,
@ -776,6 +797,19 @@ def _preflight_codex_api_kwargs(
if normalized_headers:
normalized["extra_headers"] = normalized_headers
extra_body = api_kwargs.get("extra_body")
if extra_body is not None:
if not isinstance(extra_body, dict):
raise ValueError("Codex Responses request 'extra_body' must be an object.")
# Pass extra_body through verbatim — used by xAI Responses to
# carry `prompt_cache_key` as a body-level field (the documented
# cache-routing surface on /v1/responses). The openai SDK
# serializes extra_body into the JSON body without per-field
# type checks, so it survives Responses.stream() kwarg-signature
# changes that would otherwise raise TypeError before the wire.
if extra_body:
normalized["extra_body"] = dict(extra_body)
if allow_stream:
stream = api_kwargs.get("stream")
if stream is not None and stream is not True:

View file

@ -221,6 +221,114 @@ def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
return json.dumps(shrunken, ensure_ascii=False)
_IMAGE_PART_TYPES = frozenset({"image_url", "input_image", "image"})
def _is_image_part(part: Any) -> bool:
"""True if ``part`` is a multimodal image content block.
Recognizes all three shapes the agent handles:
- OpenAI chat.completions: ``{"type": "image_url", "image_url": ...}``
- OpenAI Responses API: ``{"type": "input_image", "image_url": "..."}``
- Anthropic native: ``{"type": "image", "source": {...}}``
"""
if not isinstance(part, dict):
return False
return part.get("type") in _IMAGE_PART_TYPES
def _content_has_images(content: Any) -> bool:
"""True if a message's ``content`` is a multimodal list with image parts."""
if not isinstance(content, list):
return False
return any(_is_image_part(p) for p in content)
def _strip_images_from_content(content: Any) -> Any:
"""Return a copy of ``content`` with every image part replaced by a
short text placeholder.
- String content is returned unchanged.
- Non-list, non-string content is returned unchanged.
- List content: image parts become ``{"type": "text", "text": "[Attached
image stripped after compression]"}``; other parts are preserved as-is.
Input is never mutated.
"""
if not isinstance(content, list):
return content
if not any(_is_image_part(p) for p in content):
return content
new_parts: List[Any] = []
for p in content:
if _is_image_part(p):
new_parts.append({
"type": "text",
"text": "[Attached image — stripped after compression]",
})
else:
new_parts.append(p)
return new_parts
def _strip_historical_media(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Replace image parts in older messages with placeholder text.
The anchor is the *last* user message that has any image content. Every
message before that anchor gets its image parts replaced with a short
placeholder so the outgoing request stops re-shipping the same multi-MB
base-64 image blobs on every turn.
If no user message carries images, the list is returned unchanged.
If the only user message with images is the very first one (nothing
earlier to strip), the list is returned unchanged.
Shallow copies of touched messages only; input is never mutated.
Port of Kilo-Org/kilocode#9434 (adapted for the OpenAI-style message
shape the hermes compressor emits).
"""
if not messages:
return messages
# Find the newest user message that carries at least one image part.
# We anchor on image-bearing user messages (not all user messages) so
# a plain text follow-up after a big-image turn still strips the old
# image — matching the problem kilocode#9434 set out to solve.
anchor = -1
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if not isinstance(msg, dict):
continue
if msg.get("role") != "user":
continue
if _content_has_images(msg.get("content")):
anchor = i
break
if anchor <= 0:
# No image-bearing user message, or it's the very first message —
# nothing before it to strip.
return messages
changed = False
result: List[Dict[str, Any]] = []
for i, msg in enumerate(messages):
if i >= anchor or not isinstance(msg, dict):
result.append(msg)
continue
content = msg.get("content")
if not _content_has_images(content):
result.append(msg)
continue
new_msg = msg.copy()
new_msg["content"] = _strip_images_from_content(content)
result.append(new_msg)
changed = True
return result if changed else messages
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
@ -1559,6 +1667,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = self._sanitize_tool_pairs(compressed)
# Replace image parts in all compressed messages before the newest
# image-bearing user turn with a short text placeholder. Without
# this, tail messages keep their original multi-MB base-64 image
# payloads forever, which can push every subsequent API request
# past the provider's body-size limit and wedge the session.
# Port of Kilo-Org/kilocode#9434.
compressed = _strip_historical_media(compressed)
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate

View file

@ -30,6 +30,28 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
# Stderr fingerprint of the deprecated `gh copilot` CLI extension
# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
# We require BOTH the literal product name ("gh-copilot") AND a deprecation
# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
# in its own banners and error messages — doesn't get misclassified as the
# deprecated extension.
_DEPRECATION_REQUIRED = ("gh-copilot",)
_DEPRECATION_MARKERS = (
"has been deprecated",
"no commands will be executed",
)
def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
"""True iff stderr looks like the deprecated gh-copilot extension's banner."""
lower = stderr_text.lower()
if not any(req in lower for req in _DEPRECATION_REQUIRED):
return False
return any(marker in lower for marker in _DEPRECATION_MARKERS)
def _resolve_command() -> str:
return (
@ -506,6 +528,21 @@ class CopilotACPClient:
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
if _is_gh_copilot_deprecation_message(stderr_text):
raise RuntimeError(
"Hermes ACP mode requires the NEW GitHub Copilot CLI "
"(github.com/github/copilot-cli), but the binary it just "
"spawned is the deprecated `gh copilot` extension.\n\n"
"Install the new CLI:\n"
" npm install -g @github/copilot\n"
" # then verify with: copilot --help\n\n"
"If `copilot` already resolves to the new CLI but you still see this,\n"
"point Hermes at it explicitly:\n"
" export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
"Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
"directly with a Copilot subscription token) via `hermes setup`.\n\n"
f"Original error:\n{stderr_text}"
)
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")

View file

@ -29,6 +29,7 @@ from hermes_cli.auth import (
_resolve_zai_base_url,
_save_auth_store,
_save_provider_state,
_store_provider_state,
read_credential_pool,
write_credential_pool,
)
@ -128,6 +129,9 @@ class PooledCredential:
def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential":
field_names = {f.name for f in fields(cls) if f.name != "provider"}
data = {k: payload.get(k) for k in field_names if k in payload}
# Rehydrated last_status_at may be an ISO string from to_dict() — normalize to float epoch
if "last_status_at" in data and isinstance(data["last_status_at"], str):
data["last_status_at"] = _parse_absolute_timestamp(data["last_status_at"])
extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None}
data["extra"] = extra
data.setdefault("id", uuid.uuid4().hex[:6])
@ -539,6 +543,64 @@ class CredentialPool:
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
return entry
def _sync_xai_oauth_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
"""Sync an xAI OAuth pool entry from auth.json if tokens differ.
xAI OAuth refresh tokens are single-use. When another Hermes process
(or another profile sharing the same auth.json) refreshes the token,
it writes the new pair to ``providers["xai-oauth"]["tokens"]`` under
``_auth_store_lock``. Without this resync, our in-memory pool entry
keeps the consumed refresh_token and the next ``_refresh_entry`` call
would replay it and get a ``refresh_token_reused``-style 4xx.
Only applies to entries seeded from the singleton (``loopback_pkce``);
manually added entries (``manual:xai_pkce``) are independent
credentials with their own refresh-token lifecycle.
"""
if self.provider != "xai-oauth" or entry.source != "loopback_pkce":
return entry
try:
with _auth_store_lock():
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "xai-oauth")
if not isinstance(state, dict):
return entry
tokens = state.get("tokens")
if not isinstance(tokens, dict):
return entry
store_access = tokens.get("access_token", "")
store_refresh = tokens.get("refresh_token", "")
entry_access = entry.access_token or ""
entry_refresh = entry.refresh_token or ""
if store_access and (
store_access != entry_access
or (store_refresh and store_refresh != entry_refresh)
):
logger.debug(
"Pool entry %s: syncing xAI OAuth tokens from auth.json "
"(refreshed by another process)",
entry.id,
)
field_updates: Dict[str, Any] = {
"access_token": store_access,
"refresh_token": store_refresh or entry.refresh_token,
"last_status": None,
"last_status_at": None,
"last_error_code": None,
"last_error_reason": None,
"last_error_message": None,
"last_error_reset_at": None,
}
if state.get("last_refresh"):
field_updates["last_refresh"] = state["last_refresh"]
updated = replace(entry, **field_updates)
self._replace_entry(entry, updated)
self._persist()
return updated
except Exception as exc:
logger.debug("Failed to sync xAI OAuth entry from auth.json: %s", exc)
return entry
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
"""Sync a Nous pool entry from auth.json if tokens differ.
@ -604,9 +666,22 @@ class CredentialPool:
re-seeding a consumed single-use refresh token.
Applies to any OAuth provider whose singleton lives in auth.json
(currently Nous and OpenAI Codex).
(currently Nous, OpenAI Codex, and xAI Grok OAuth).
``set_active=False`` on every write: a pool sync-back is a
token-rotation side effect, not the user choosing a provider.
Using ``_save_provider_state`` (which sets ``active_provider``)
here would mean every Nous/Codex/xAI refresh in a multi-provider
setup silently flips the ``active_provider`` flag the next
``hermes`` invocation that defaults to the active provider
(e.g. setup wizard, ``hermes auth status``) would land on
whatever provider happened to refresh last, not whatever the
user actually chose.
"""
if entry.source != "device_code":
# Only sync entries that were seeded *from* a singleton. Manually
# added pool entries (source="manual:*") are independent credentials
# and must not write back to the singleton.
if entry.source not in {"device_code", "loopback_pkce"}:
return
try:
with _auth_store_lock():
@ -632,7 +707,7 @@ class CredentialPool:
state[extra_key] = val
if entry.inference_base_url:
state["inference_base_url"] = entry.inference_base_url
_save_provider_state(auth_store, "nous", state)
_store_provider_state(auth_store, "nous", state, set_active=False)
elif self.provider == "openai-codex":
state = _load_provider_state(auth_store, "openai-codex")
@ -646,7 +721,21 @@ class CredentialPool:
tokens["refresh_token"] = entry.refresh_token
if entry.last_refresh:
state["last_refresh"] = entry.last_refresh
_save_provider_state(auth_store, "openai-codex", state)
_store_provider_state(auth_store, "openai-codex", state, set_active=False)
elif self.provider == "xai-oauth":
state = _load_provider_state(auth_store, "xai-oauth")
if not isinstance(state, dict):
return
tokens = state.get("tokens")
if not isinstance(tokens, dict):
return
tokens["access_token"] = entry.access_token
if entry.refresh_token:
tokens["refresh_token"] = entry.refresh_token
if entry.last_refresh:
state["last_refresh"] = entry.last_refresh
_store_provider_state(auth_store, "xai-oauth", state, set_active=False)
else:
return
@ -699,6 +788,25 @@ class CredentialPool:
refresh_token=refreshed["refresh_token"],
last_refresh=refreshed.get("last_refresh"),
)
elif self.provider == "xai-oauth":
# Adopt fresher tokens from auth.json before spending the
# refresh_token — single-use tokens consumed by another
# process (or another profile sharing the singleton) would
# otherwise trigger ``refresh_token_reused`` on the next
# POST. Only meaningful for singleton-seeded entries.
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
refreshed = auth_mod.refresh_xai_oauth_pure(
entry.access_token,
entry.refresh_token,
)
updated = replace(
entry,
access_token=refreshed["access_token"],
refresh_token=refreshed["refresh_token"],
last_refresh=refreshed.get("last_refresh"),
)
elif self.provider == "nous":
synced = self._sync_nous_entry_from_auth_store(entry)
if synced is not entry:
@ -777,6 +885,30 @@ class CredentialPool:
# Credentials file had a valid (non-expired) token — use it directly
logger.debug("Credentials file has valid token, using without refresh")
return synced
# For xai-oauth: same race as nous — another process may have
# consumed the refresh token between our proactive sync and the
# HTTP call. Re-check auth.json and adopt the fresh tokens if
# they have rotated since. Only meaningful for singleton-seeded
# (loopback_pkce) entries; manual entries don't share state with
# the singleton.
if self.provider == "xai-oauth":
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
if synced.refresh_token != entry.refresh_token:
logger.debug(
"xAI OAuth refresh failed but auth.json has newer tokens — adopting"
)
updated = replace(
synced,
last_status=STATUS_OK,
last_status_at=None,
last_error_code=None,
last_error_reason=None,
last_error_message=None,
last_error_reset_at=None,
)
self._replace_entry(synced, updated)
self._persist()
return updated
# For nous: another process may have consumed the refresh token
# between our proactive sync and the HTTP call. Re-sync from
# auth.json and adopt the fresh tokens if available.
@ -829,6 +961,11 @@ class CredentialPool:
entry.access_token,
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
)
if self.provider == "xai-oauth":
return auth_mod._xai_access_token_is_expiring(
entry.access_token,
auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
)
if self.provider == "nous":
# Nous refresh/mint can require network access and should happen when
# runtime credentials are actually resolved, not merely when the pool
@ -883,6 +1020,17 @@ class CredentialPool:
if synced is not entry:
entry = synced
cleared_any = True
# For xai-oauth singleton-seeded entries, identical pattern:
# an entry frozen as exhausted may simply be holding stale
# tokens that another process (or a fresh `hermes model` ->
# xAI Grok OAuth login) has since rotated in auth.json.
if (self.provider == "xai-oauth"
and entry.source == "loopback_pkce"
and entry.last_status == STATUS_EXHAUSTED):
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
cleared_any = True
if entry.last_status == STATUS_EXHAUSTED:
exhausted_until = _exhausted_until(entry)
if exhausted_until is not None and now < exhausted_until:
@ -1394,6 +1542,37 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
},
)
elif provider == "xai-oauth":
# When the user logs in via ``hermes model`` -> xAI Grok OAuth,
# tokens are written to the auth.json singleton
# (``providers["xai-oauth"]``). Surface them in the pool too so
# ``hermes auth list`` reflects the logged-in state and so the pool
# is the single source of truth for refresh during runtime resolution.
if _is_suppressed(provider, "loopback_pkce"):
return changed, active_sources
state = _load_provider_state(auth_store, "xai-oauth")
tokens = state.get("tokens") if isinstance(state, dict) else None
if isinstance(tokens, dict) and tokens.get("access_token"):
active_sources.add("loopback_pkce")
from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
base_url = DEFAULT_XAI_OAUTH_BASE_URL
changed |= _upsert_entry(
entries,
provider,
"loopback_pkce",
{
"source": "loopback_pkce",
"auth_type": AUTH_TYPE_OAUTH,
"access_token": tokens.get("access_token", ""),
"refresh_token": tokens.get("refresh_token"),
"base_url": base_url,
"last_refresh": state.get("last_refresh"),
"label": label_from_token(tokens.get("access_token", ""), "loopback_pkce"),
},
)
return changed, active_sources

View file

@ -265,6 +265,31 @@ def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
return result
def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
"""xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
itself: the central dispatcher only removes the in-memory pool entry,
leaves ``providers.xai-oauth`` in auth.json intact, and on the next
``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
entry from the still-present singleton credentials reappear with no
user feedback. Clearing the singleton in step with the suppression set
by the central dispatcher makes the removal stick.
Belt-and-braces against the manual entry path: ``hermes auth add
xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
falls through to "unregistered → nothing to clean up" (correct
manual entries are pool-only).
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
result.hints.append(
"Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed."
)
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
@ -397,6 +422,11 @@ def _register_all_sources() -> None:
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="xai-oauth", source_id="loopback_pkce",
remove_fn=_remove_xai_oauth_loopback_pkce,
description="auth.json providers.xai-oauth",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,

View file

@ -107,9 +107,14 @@ class _BackgroundLoop:
Returns the coroutine's result, or raises its exception.
"""
from agent.async_utils import safe_schedule_threadsafe
if self._loop is None:
if asyncio.iscoroutine(coro):
coro.close()
raise RuntimeError("background loop not started")
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
fut = safe_schedule_threadsafe(coro, self._loop)
if fut is None:
raise RuntimeError("background loop not running")
try:
return fut.result(timeout=timeout)
except Exception:

View file

@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
"grok-4.3": 1000000, # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai
"grok-4": 256000, # grok-4, grok-4-0709
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
@ -357,6 +358,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.deepseek.com": "deepseek",
"api.githubcopilot.com": "copilot",
"models.github.ai": "copilot",
# GitHub Models free tier (Azure-hosted prototyping endpoint) — same
# canonical provider as the Copilot API. Hard per-request token cap
# (often 8K) makes it unusable for Hermes' system prompt, but mapping
# it here lets us recognize the endpoint and emit a targeted hint
# instead of falling through the unknown-custom-endpoint path.
"models.inference.ai.azure.com": "copilot",
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",

View file

@ -15,6 +15,18 @@ and MoonshotAI/kimi-cli#1595:
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
the parent. Presence of both causes "type should be defined in anyOf
items instead of the parent schema".
3. ``enum`` arrays on scalar-typed nodes may not contain ``null`` or empty
strings. Strip those entries (drop the enum entirely if it becomes empty).
4. ``$ref`` nodes may not carry sibling keywords. Moonshot expands the
reference before validation and then rejects the node if sibling keys
like ``description`` remain on the same node as ``$ref``. Strip every
sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
(Ported from anomalyco/opencode#24730.)
5. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
for positional element schemas). Moonshot's schema engine requires a
single object schema applied to every array element. Collapse tuple
``items`` to the first element schema (or ``{}`` if the tuple is empty).
(Ported from anomalyco/opencode#24730.)
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
@ -66,6 +78,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
}
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
elif key == "items" and isinstance(value, list):
# Rule 5: tuple-style ``items`` arrays (positional element
# schemas) are not accepted by Moonshot. Collapse to the
# first element schema if present, else to ``{}``. This
# matches opencode's behaviour for moonshotai / kimi models.
first = value[0] if value else {}
if isinstance(first, dict):
repaired[key] = _repair_schema(first, is_schema=True)
else:
repaired[key] = first
elif key in _SCHEMA_NODE_KEYS:
# items / not / additionalProperties: single nested schema.
# additionalProperties can also be a bool — leave those alone.
@ -130,6 +152,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
else:
repaired.pop("enum")
# Rule 4: $ref nodes must not have sibling keywords. Moonshot expands
# the reference before validation and then rejects the node if siblings
# like ``description`` / ``type`` / ``default`` appear alongside $ref.
# The referenced definition still carries its own description on the
# target node, which Moonshot accepts.
# (Ported from anomalyco/opencode#24730.)
if "$ref" in repaired:
return {"$ref": repaired["$ref"]}
return repaired

View file

@ -425,7 +425,7 @@ def build_skill_invocation_message(
loaded = _load_skill_payload(skill_info["skill_dir"], task_id=task_id)
if not loaded:
return f"[Failed to load skill: {skill_info['name']}]"
return None
loaded_skill, skill_dir, skill_name = loaded

View file

@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI chat messages to Responses API input items."""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
return _chat_messages_to_responses_input(messages)
return _chat_messages_to_responses_input(
messages,
is_xai_responses=bool(kwargs.get("is_xai_responses")),
)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Responses API function definitions."""
@ -89,24 +92,38 @@ class ResponsesApiTransport(ProviderTransport):
_effort_clamp = {"minimal": "low"}
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
response_tools = _responses_tools(tools)
kwargs = {
"model": model,
"instructions": instructions,
"input": _chat_messages_to_responses_input(payload_messages),
"tools": _responses_tools(tools),
"tool_choice": "auto",
"parallel_tool_calls": True,
"input": _chat_messages_to_responses_input(
payload_messages,
is_xai_responses=is_xai_responses,
),
"tools": response_tools,
"store": False,
}
if response_tools:
kwargs["tool_choice"] = "auto"
kwargs["parallel_tool_calls"] = True
session_id = params.get("session_id")
if not is_github_responses and session_id:
# xAI Responses takes prompt_cache_key in extra_body (set further
# down); GitHub Models opts out of cache-key routing entirely.
if not is_github_responses and not is_xai_responses and session_id:
kwargs["prompt_cache_key"] = session_id
if reasoning_enabled and is_xai_responses:
from agent.model_metadata import grok_supports_reasoning_effort
kwargs["include"] = ["reasoning.encrypted_content"]
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
# replayed encrypted reasoning items on turn 2+ — see
# _chat_messages_to_responses_input docstring. Requesting the field
# back would just have us cache something we then must strip. Grok
# still reasons natively each turn; coherence across turns rides on
# the visible message text alone.
kwargs["include"] = []
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
# those models reason natively. Only send the effort dial when
@ -165,6 +182,17 @@ class ResponsesApiTransport(ProviderTransport):
merged_extra_headers["x-grok-conv-id"] = session_id
kwargs["extra_headers"] = merged_extra_headers
# xAI Responses cache-routing — body-level field per
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits.
# Sent via extra_body (not the typed kwarg) so it survives openai
# SDK builds whose Responses.stream() signature has dropped the field.
existing_extra_body = kwargs.get("extra_body")
merged_extra_body: Dict[str, Any] = {}
if isinstance(existing_extra_body, dict):
merged_extra_body.update(existing_extra_body)
merged_extra_body.setdefault("prompt_cache_key", session_id)
kwargs["extra_body"] = merged_extra_body
return kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:

View file

@ -14,20 +14,28 @@ the user gets full Hermes capability inside a Codex turn.
Scope (what we expose):
- web_search, web_extract Firecrawl, no codex equivalent
- browser_navigate / _click / _type / Camofox/Browserbase automation
_snapshot / _screenshot / _scroll / _back / _press / _vision
- delegate_task Hermes subagents
_snapshot / _scroll / _back / _press /
_get_images / _console / _vision
- vision_analyze image inspection by vision model
- image_generate image generation
- memory Hermes' persistent memory store
- skill_view, skills_list Hermes' skill library
- session_search cross-session search
- text_to_speech TTS
- kanban_* (complete/block/comment/ kanban worker + orchestrator
heartbeat/show/list/create/ handoff (stateless: read env var,
unblock/link) write ~/.hermes/kanban.db)
What we DO NOT expose (codex has equivalents):
What we DO NOT expose:
- terminal / shell codex's own shell tool
- read_file / write_file / patch codex's apply_patch + shell
- search_files / process codex's shell
- clarify, todo codex's own UX
- clarify codex's own UX
- delegate_task / memory / `_AGENT_LOOP_TOOLS` in Hermes
session_search / todo (model_tools.py). They require
the running AIAgent context to
dispatch (mid-loop state), so a
stateless MCP callback can't
drive them. See the inline
comment on EXPOSED_TOOLS below.
Run with: python -m agent.transports.hermes_tools_mcp_server
Spawned by: CodexAppServerSession.ensure_started() when the runtime is

162
cli.py
View file

@ -1965,43 +1965,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
return resolved
def _format_process_notification(evt: dict) -> "str | None":
"""Format a process notification event into a [IMPORTANT: ...] message.
Handles both completion events (notify_on_complete) and watch pattern
match events from the unified completion_queue.
"""
evt_type = evt.get("type", "completion")
_sid = evt.get("session_id", "unknown")
_cmd = evt.get("command", "unknown")
if evt_type == "watch_disabled":
return f"[IMPORTANT: {evt.get('message', '')}]"
if evt_type == "watch_match":
_pat = evt.get("pattern", "?")
_out = evt.get("output", "")
_sup = evt.get("suppressed", 0)
text = (
f"[IMPORTANT: Background process {_sid} matched "
f"watch pattern \"{_pat}\".\n"
f"Command: {_cmd}\n"
f"Matched output:\n{_out}"
)
if _sup:
text += f"\n({_sup} earlier matches were suppressed by rate limit)"
text += "]"
return text
# Default: completion event
_exit = evt.get("exit_code", "?")
_out = evt.get("output", "")
return (
f"[IMPORTANT: Background process {_sid} completed "
f"(exit code {_exit}).\n"
f"Command: {_cmd}\n"
f"Output:\n{_out}]"
)
def _detect_file_drop(user_input: str) -> "dict | None":
@ -2860,6 +2824,11 @@ class HermesCLI:
# turn (which would make Ctrl+C feel like it did nothing).
self._last_turn_interrupted = False
self._should_exit = False
# /exit --delete: when True, the current session's SQLite history and
# on-disk transcripts are deleted during shutdown. Set by
# process_command() when the user runs /exit --delete or /quit --delete.
# Ported from google-gemini/gemini-cli#19332.
self._delete_session_on_exit = False
self._last_ctrl_c_time = 0
self._clarify_state = None
self._clarify_freetext = False
@ -3144,8 +3113,19 @@ class HermesCLI:
"session_total_tokens": 0,
"session_api_calls": 0,
"compressions": 0,
"active_background_tasks": 0,
}
# Count live /background tasks. The dict entry is removed in the
# task thread's finally block, so len() reflects truly-running tasks.
# len() on a CPython dict is atomic; safe to read without a lock.
try:
bg_tasks = getattr(self, "_background_tasks", None)
if bg_tasks:
snapshot["active_background_tasks"] = len(bg_tasks)
except Exception:
pass
if not agent:
return snapshot
@ -3370,15 +3350,23 @@ class HermesCLI:
percent_label = f"{percent}%" if percent is not None else "--"
duration_label = snapshot["duration"]
yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
if width < 52:
text = f"{snapshot['model_short']} · {duration_label}"
if yolo_active:
text += " · ⚠ YOLO"
return self._trim_status_bar_text(text, width)
if width < 76:
parts = [f"{snapshot['model_short']}", percent_label]
compressions = snapshot.get("compressions", 0)
if compressions:
parts.append(f"🗜️ {compressions}")
bg_count = snapshot.get("active_background_tasks", 0)
if bg_count:
parts.append(f"{bg_count}")
parts.append(duration_label)
if yolo_active:
parts.append("⚠ YOLO")
return self._trim_status_bar_text(" · ".join(parts), width)
if snapshot["context_length"]:
@ -3392,10 +3380,15 @@ class HermesCLI:
parts = [f"{snapshot['model_short']}", context_label, percent_label]
if compressions:
parts.append(f"🗜️ {compressions}")
bg_count = snapshot.get("active_background_tasks", 0)
if bg_count:
parts.append(f"{bg_count}")
parts.append(duration_label)
prompt_elapsed = snapshot.get("prompt_elapsed")
if prompt_elapsed:
parts.append(prompt_elapsed)
if yolo_active:
parts.append("⚠ YOLO")
return self._trim_status_bar_text("".join(parts), width)
except Exception:
return f"{self.model if getattr(self, 'model', None) else 'Hermes'}"
@ -3412,6 +3405,7 @@ class HermesCLI:
# line and produce duplicated status bar rows over long sessions.
width = self._get_tui_terminal_width()
duration_label = snapshot["duration"]
yolo_active = bool(os.getenv("HERMES_YOLO_MODE"))
if width < 52:
frags = [
@ -3419,13 +3413,17 @@ class HermesCLI:
("class:status-bar-strong", snapshot["model_short"]),
("class:status-bar-dim", " · "),
("class:status-bar-dim", duration_label),
("class:status-bar", " "),
]
if yolo_active:
frags.append(("class:status-bar-dim", " · "))
frags.append(("class:status-bar-yolo", "⚠ YOLO"))
frags.append(("class:status-bar", " "))
else:
percent = snapshot["context_percent"]
percent_label = f"{percent}%" if percent is not None else "--"
if width < 76:
compressions = snapshot.get("compressions", 0)
bg_count = snapshot.get("active_background_tasks", 0)
frags = [
("class:status-bar", ""),
("class:status-bar-strong", snapshot["model_short"]),
@ -3435,11 +3433,17 @@ class HermesCLI:
if compressions:
frags.append(("class:status-bar-dim", " · "))
frags.append((self._compression_count_style(compressions), f"🗜️ {compressions}"))
if bg_count:
frags.append(("class:status-bar-dim", " · "))
frags.append(("class:status-bar-strong", f"{bg_count}"))
frags.extend([
("class:status-bar-dim", " · "),
("class:status-bar-dim", duration_label),
("class:status-bar", " "),
])
if yolo_active:
frags.append(("class:status-bar-dim", " · "))
frags.append(("class:status-bar-yolo", "⚠ YOLO"))
frags.append(("class:status-bar", " "))
else:
if snapshot["context_length"]:
ctx_total = _format_context_length(snapshot["context_length"])
@ -3450,6 +3454,7 @@ class HermesCLI:
bar_style = self._status_bar_context_style(percent)
compressions = snapshot.get("compressions", 0)
bg_count = snapshot.get("active_background_tasks", 0)
frags = [
("class:status-bar", ""),
("class:status-bar-strong", snapshot["model_short"]),
@ -3463,6 +3468,9 @@ class HermesCLI:
if compressions:
frags.append(("class:status-bar-dim", ""))
frags.append((self._compression_count_style(compressions), f"🗜️ {compressions}"))
if bg_count:
frags.append(("class:status-bar-dim", ""))
frags.append(("class:status-bar-strong", f"{bg_count}"))
frags.extend([
("class:status-bar-dim", ""),
("class:status-bar-dim", duration_label),
@ -3472,6 +3480,9 @@ class HermesCLI:
if prompt_elapsed:
frags.append(("class:status-bar-dim", ""))
frags.append(("class:status-bar-dim", prompt_elapsed))
if yolo_active:
frags.append(("class:status-bar-dim", ""))
frags.append(("class:status-bar-yolo", "⚠ YOLO"))
frags.append(("class:status-bar", " "))
total_width = sum(self._status_bar_display_width(text) for _, text in frags)
@ -5458,6 +5469,24 @@ class HermesCLI:
f"Tokens: {total_tokens:,}",
f"Agent Running: {'Yes' if is_running else 'No'}",
])
# Session recap — pure local compute summary of recent activity
# (turn counts, tools used, files touched, last ask, last reply).
# No LLM call, no prompt-cache impact. Inspired by Claude Code
# 2.1.114's /recap.
try:
from hermes_cli.session_recap import build_recap
recap = build_recap(
self.conversation_history or [],
session_title=title or None,
session_id=self.session_id,
platform="cli",
)
if recap:
lines.extend(["", recap])
except Exception as exc: # defensive — don't let /status fail
logger.debug("build_recap failed in /status: %s", exc)
self._console_print("\n".join(lines), highlight=False, markup=False)
def _fast_command_available(self) -> bool:
@ -7672,6 +7701,16 @@ class HermesCLI:
canonical = _cmd_def.name if _cmd_def else _base_word
if canonical in {"quit", "exit"}:
# Parse --delete flag: /exit --delete also removes the current
# session's transcripts + SQLite history. Ported from
# google-gemini/gemini-cli#19332.
_rest = cmd_original.split(None, 1)
_args = (_rest[1] if len(_rest) > 1 else "").strip().lower()
if _args in ("--delete", "-d"):
self._delete_session_on_exit = True
elif _args:
_cprint(f" {_DIM}✗ Unknown argument: {_escape(_args)}. Use /exit --delete to also remove session history.{_RST}")
return True
return False
elif canonical == "help":
self.show_help()
@ -11755,11 +11794,13 @@ class HermesCLI:
# Ensure tirith security scanner is available (downloads if needed).
# Warn the user if tirith is enabled in config but not available,
# so they know command security scanning is degraded.
# so they know command security scanning is degraded. Suppressed
# on platforms where tirith ships no binary (Windows etc.) — the
# user can't act on it and pattern-matching guards still run.
try:
from tools.tirith_security import ensure_installed
from tools.tirith_security import ensure_installed, is_platform_supported
tirith_path = ensure_installed(log_failures=False)
if tirith_path is None:
if tirith_path is None and is_platform_supported():
security_cfg = self.config.get("security", {}) or {}
tirith_enabled = security_cfg.get("tirith_enabled", True)
if tirith_enabled:
@ -12563,6 +12604,7 @@ class HermesCLI:
paste_dir.mkdir(parents=True, exist_ok=True)
paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt"
paste_file.write_text(pasted_text, encoding="utf-8")
logger.info("Collapsed paste #%d: %d lines, %d chars -> %s", _paste_counter[0], line_count + 1, len(pasted_text), paste_file)
placeholder = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]"
prefix = ""
if buf.cursor_position > 0 and buf.text[buf.cursor_position - 1] != '\n':
@ -12730,6 +12772,7 @@ class HermesCLI:
paste_dir.mkdir(parents=True, exist_ok=True)
paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt"
paste_file.write_text(text, encoding="utf-8")
logger.info("Collapsed paste #%d: %d lines, %d chars -> %s (fallback)", _paste_counter[0], line_count + 1, len(text), paste_file)
_paste_just_collapsed[0] = True
buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]"
buf.cursor_position = len(buf.text)
@ -13344,6 +13387,7 @@ class HermesCLI:
'status-bar-warn': 'bg:#1a1a2e #FFD700 bold',
'status-bar-bad': 'bg:#1a1a2e #FF8C00 bold',
'status-bar-critical': 'bg:#1a1a2e #FF6B6B bold',
'status-bar-yolo': 'bg:#1a1a2e #FF4444 bold',
# Bronze horizontal rules around the input area
'input-rule': '#CD7F32',
# Clipboard image attachment badges
@ -13500,16 +13544,8 @@ class HermesCLI:
# and watch pattern matches) while agent is idle.
try:
from tools.process_registry import process_registry
if not process_registry.completion_queue.empty():
evt = process_registry.completion_queue.get_nowait()
# Skip if the agent already consumed this via wait/poll/log
_evt_sid = evt.get("session_id", "")
if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
pass # already delivered via tool result
else:
_synth = _format_process_notification(evt)
if _synth:
self._pending_input.put(_synth)
for _evt, _synth in process_registry.drain_notifications():
self._pending_input.put(_synth)
except Exception:
pass
continue
@ -13617,15 +13653,8 @@ class HermesCLI:
# that arrived while the agent was running.
try:
from tools.process_registry import process_registry
while not process_registry.completion_queue.empty():
evt = process_registry.completion_queue.get_nowait()
# Skip if the agent already consumed this via wait/poll/log
_evt_sid = evt.get("session_id", "")
if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
continue # already delivered via tool result
_synth = _format_process_notification(evt)
if _synth:
self._pending_input.put(_synth)
for _evt, _synth in process_registry.drain_notifications():
self._pending_input.put(_synth)
except Exception:
pass # Non-fatal — don't break the main loop
@ -13853,6 +13882,19 @@ class HermesCLI:
self._session_db.end_session(self.agent.session_id, "cli_close")
except (Exception, KeyboardInterrupt) as e:
logger.debug("Could not close session in DB: %s", e)
# /exit --delete: also remove the current session's transcripts
# and SQLite history. Ported from google-gemini/gemini-cli#19332.
if getattr(self, '_delete_session_on_exit', False):
try:
from hermes_constants import get_hermes_home as _ghh
_sessions_dir = _ghh() / "sessions"
_sid = self.agent.session_id
if self._session_db.delete_session(_sid, sessions_dir=_sessions_dir):
_cprint(f" {_DIM}✓ Session {_escape(_sid)} deleted{_RST}")
else:
_cprint(f" {_DIM}✗ Session {_escape(_sid)} not found for deletion{_RST}")
except (Exception, KeyboardInterrupt) as e:
logger.debug("Could not delete session on exit: %s", e)
# Plugin hook: on_session_end — safety net for interrupted exits.
# run_conversation() already fires this per-turn on normal completion,
# so only fire here if the agent was mid-turn (_agent_running) when

View file

@ -645,6 +645,44 @@ def get_job(job_id: str) -> Optional[Dict[str, Any]]:
return None
class AmbiguousJobReference(LookupError):
"""Raised when a job name matches more than one job."""
def __init__(self, ref: str, matches: List[Dict[str, Any]]):
self.ref = ref
self.matches = matches
ids = ", ".join(m["id"] for m in matches)
super().__init__(
f"Job name '{ref}' is ambiguous — matches {len(matches)} jobs: {ids}. "
f"Use the job ID instead."
)
def resolve_job_ref(ref: str) -> Optional[Dict[str, Any]]:
"""Resolve a job reference (ID or name) to a job record.
- Exact ID match wins (works even if a different job's name equals this ID).
- Otherwise, case-insensitive name match.
- If a name matches more than one job, raises AmbiguousJobReference so the
caller can surface the matching IDs rather than silently picking one.
"""
if not ref:
return None
jobs = load_jobs()
for job in jobs:
if job["id"] == ref:
return _normalize_job_record(job)
ref_lower = ref.lower()
name_matches = [j for j in jobs if (j.get("name") or "").lower() == ref_lower]
if not name_matches:
return None
if len(name_matches) > 1:
raise AmbiguousJobReference(
ref, [_normalize_job_record(j) for j in name_matches]
)
return _normalize_job_record(name_matches[0])
def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]:
"""List all jobs, optionally including disabled ones."""
jobs = [_normalize_job_record(j) for j in load_jobs()]
@ -702,9 +740,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]:
"""Pause a job without deleting it."""
"""Pause a job without deleting it. Accepts a job ID or name."""
job = resolve_job_ref(job_id)
if not job:
return None
return update_job(
job_id,
job["id"],
{
"enabled": False,
"state": "paused",
@ -715,14 +756,14 @@ def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, A
def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
"""Resume a paused job and compute the next future run from now."""
job = get_job(job_id)
"""Resume a paused job and compute the next future run from now. Accepts a job ID or name."""
job = resolve_job_ref(job_id)
if not job:
return None
next_run_at = compute_next_run(job["schedule"])
return update_job(
job_id,
job["id"],
{
"enabled": True,
"state": "scheduled",
@ -734,12 +775,12 @@ def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
"""Schedule a job to run on the next scheduler tick."""
job = get_job(job_id)
"""Schedule a job to run on the next scheduler tick. Accepts a job ID or name."""
job = resolve_job_ref(job_id)
if not job:
return None
return update_job(
job_id,
job["id"],
{
"enabled": True,
"state": "scheduled",
@ -751,14 +792,18 @@ def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
def remove_job(job_id: str) -> bool:
"""Remove a job by ID."""
"""Remove a job by ID or name."""
job = resolve_job_ref(job_id)
if not job:
return False
canonical_id = job["id"]
jobs = load_jobs()
original_len = len(jobs)
jobs = [j for j in jobs if j["id"] != job_id]
jobs = [j for j in jobs if j["id"] != canonical_id]
if len(jobs) < original_len:
save_jobs(jobs)
# Clean up output directory to prevent orphaned dirs accumulating
job_output_dir = OUTPUT_DIR / job_id
job_output_dir = OUTPUT_DIR / canonical_id
if job_output_dir.exists():
shutil.rmtree(job_output_dir)
return True

View file

@ -464,7 +464,14 @@ def _send_media_via_adapter(
else:
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
future = asyncio.run_coroutine_threadsafe(coro, loop)
from agent.async_utils import safe_schedule_threadsafe
future = safe_schedule_threadsafe(coro, loop)
if future is None:
logger.warning(
"Job '%s': cannot send media %s, gateway loop unavailable",
job.get("id", "?"), media_path,
)
return
try:
result = future.result(timeout=30)
except TimeoutError:
@ -585,22 +592,26 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
from agent.async_utils import safe_schedule_threadsafe
future = safe_schedule_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
if future is None:
adapter_ok = False
else:
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
@ -1791,7 +1802,12 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
for job in parallel_jobs:
_ctx = contextvars.copy_context()
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results.extend(f.result() for f in _futures)
for f in concurrent.futures.as_completed(_futures, timeout=600):
try:
_results.append(f.result())
except Exception as exc:
logger.error("Parallel cron job future failed: %s", exc)
_results.append(False)
# Best-effort sweep of MCP stdio subprocesses that survived their
# session teardown during this tick. Runs AFTER every job has

230
gateway/memory_monitor.py Normal file
View file

@ -0,0 +1,230 @@
"""Periodic process memory usage logging for the gateway.
Ported from cline/cline#10343 (src/standalone/memory-monitor.ts).
The gateway is a long-lived process that accumulates memory as it caches
agent instances, session transcripts, tool schemas, memory providers, MCP
connections, etc. A slow leak in any of those subsystems is invisible
in a single log line you only see it by watching RSS climb over hours.
This module emits a single structured ``[MEMORY] ...`` line every N
minutes (default 5) so maintainers investigating a suspected leak can
grep ``agent.log`` / ``gateway.log`` for a time series of RSS + Python
GC stats. The timer runs in a background thread and shuts down cleanly
with the gateway.
Design notes (parity with the Cline port):
* Grep-friendly single-line format beginning ``[MEMORY]``.
* Final snapshot logged on shutdown so "last RSS before exit" is
always in the log.
* Baseline snapshot logged immediately on start.
* Daemon thread never blocks process exit.
* Uses ``resource`` (stdlib, Linux/macOS) first and falls back to
``psutil`` when ``resource`` isn't available (Windows). Both are
optional; when neither works we emit a single WARNING and disable
the monitor rather than crashing the gateway.
Config: ``logging.memory_monitor`` in ``config.yaml`` see
``hermes_cli/config.py`` for the defaults block.
"""
from __future__ import annotations
import gc
import logging
import os
import sys
import threading
import time
from typing import Optional
logger = logging.getLogger(__name__)
_BYTES_TO_MB = 1024 * 1024
_monitor_thread: Optional[threading.Thread] = None
_stop_event: Optional[threading.Event] = None
_start_time: Optional[float] = None
_interval_seconds: float = 300.0 # 5 minutes
_lock = threading.Lock()
def _get_rss_mb() -> Optional[int]:
"""Return current process resident set size in MB, or None if unavailable.
Tries ``resource.getrusage`` first (Linux/macOS, no extra deps), then
falls back to ``psutil`` which is an optional hermes-agent dep.
"""
# Linux / macOS — resource is stdlib. On Linux ru_maxrss is in KB,
# on macOS it is in bytes (yes, really). We use it as a cheap
# "current" RSS — ru_maxrss reports the high-water mark for the
# process, which is what you actually want for leak detection.
try:
import resource
maxrss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if sys.platform == "darwin":
return int(maxrss / _BYTES_TO_MB)
# Linux / other unices: KB
return int(maxrss / 1024)
except Exception:
pass
# Fallback: psutil (Windows, or unusual unix without resource).
try:
import psutil # type: ignore
rss = psutil.Process(os.getpid()).memory_info().rss
return int(rss / _BYTES_TO_MB)
except Exception:
return None
def log_memory_usage(prefix: str = "") -> None:
"""Log current memory usage in a grep-friendly ``[MEMORY] ...`` line.
Safe to call on-demand from any thread at important lifecycle
moments (after shutdown, after context compression, etc.).
Parameters
----------
prefix
Optional extra tag inserted after ``[MEMORY]`` e.g.
``"baseline"``, ``"shutdown"``.
"""
rss = _get_rss_mb()
uptime = int(time.monotonic() - _start_time) if _start_time else 0
# gc.get_stats() returns per-generation collection counts; the sum
# is a cheap proxy for "how much garbage have we created".
try:
gc_counts = gc.get_count() # (gen0, gen1, gen2)
except Exception:
gc_counts = (0, 0, 0)
# Thread count is a handy correlate when diagnosing thread leaks.
try:
thread_count = threading.active_count()
except Exception:
thread_count = 0
tag = f"{prefix} " if prefix else ""
if rss is None:
logger.info(
"[MEMORY] %srss=unavailable gc=%s threads=%d uptime=%ds",
tag,
gc_counts,
thread_count,
uptime,
)
else:
logger.info(
"[MEMORY] %srss=%dMB gc=%s threads=%d uptime=%ds",
tag,
rss,
gc_counts,
thread_count,
uptime,
)
def _monitor_loop(stop_event: threading.Event, interval: float) -> None:
"""Background thread body — log every ``interval`` seconds until stopped."""
while not stop_event.wait(interval):
try:
log_memory_usage()
except Exception as e:
# Never let the monitor crash the gateway; just log and carry on.
logger.debug("Memory monitor iteration failed: %s", e)
def start_memory_monitoring(interval_seconds: float = 300.0) -> bool:
"""Start periodic memory usage logging in a daemon thread.
Logs immediately to capture a baseline, then every ``interval_seconds``.
Safe to call multiple times subsequent calls are no-ops while the
first monitor is still running.
Parameters
----------
interval_seconds
How often to log. Default 300s (5 minutes), matching the
upstream cline/cline implementation.
Returns
-------
bool
True if a fresh monitor thread was started, False if one was
already running or if memory introspection isn't available.
"""
global _monitor_thread, _stop_event, _start_time, _interval_seconds
with _lock:
if _monitor_thread is not None and _monitor_thread.is_alive():
return False
# Sanity-check that we can read RSS at all. If neither resource
# nor psutil works, no point spinning a thread that can only log
# "rss=unavailable" forever — warn once and bail.
if _get_rss_mb() is None:
logger.warning(
"[MEMORY] Memory monitoring unavailable: neither resource.getrusage "
"nor psutil could read process RSS — skipping periodic logging.",
)
return False
_start_time = time.monotonic()
_interval_seconds = float(interval_seconds)
_stop_event = threading.Event()
# Baseline snapshot before the loop starts.
log_memory_usage(prefix="baseline")
_monitor_thread = threading.Thread(
target=_monitor_loop,
args=(_stop_event, _interval_seconds),
name="gateway-memory-monitor",
daemon=True,
)
_monitor_thread.start()
logger.info(
"[MEMORY] Periodic memory monitoring started (interval: %ds)",
int(_interval_seconds),
)
return True
def stop_memory_monitoring(timeout: float = 2.0) -> None:
"""Stop the monitor thread and log a final snapshot.
Safe to call even if ``start_memory_monitoring()`` was never called.
"""
global _monitor_thread, _stop_event
with _lock:
if _stop_event is None or _monitor_thread is None:
return
# Final snapshot before teardown so "last RSS" is always in the log.
try:
log_memory_usage(prefix="shutdown")
except Exception:
pass
_stop_event.set()
thread = _monitor_thread
_monitor_thread = None
_stop_event = None
# Join outside the lock so a stuck log call can't deadlock shutdown.
try:
thread.join(timeout=timeout)
except Exception:
pass
logger.info("[MEMORY] Periodic memory monitoring stopped")
def is_running() -> bool:
"""True if the background monitor thread is alive."""
with _lock:
return _monitor_thread is not None and _monitor_thread.is_alive()

View file

@ -71,6 +71,35 @@ def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int:
return default
_TRUE_REQUEST_BOOL_STRINGS = frozenset({"1", "true", "yes", "on"})
_FALSE_REQUEST_BOOL_STRINGS = frozenset({"0", "false", "no", "off"})
def _coerce_request_bool(value: Any, default: bool = False) -> bool:
"""Normalize boolean-like API payload values.
External clients should send real JSON booleans, but some OpenAI-compatible
frontends and middleware serialize flags like ``stream`` as strings. Using
Python truthiness on those values misroutes requests because ``"false"`` is
still truthy. Treat only explicit bool-ish scalars as booleans; everything
else falls back to the caller's default.
"""
if isinstance(value, bool):
return value
if value is None:
return default
if isinstance(value, str):
normalized = value.strip().lower()
if normalized in _TRUE_REQUEST_BOOL_STRINGS:
return True
if normalized in _FALSE_REQUEST_BOOL_STRINGS:
return False
return default
if isinstance(value, (int, float)):
return bool(value)
return default
def _normalize_chat_content(
content: Any, *, _max_depth: int = 10, _depth: int = 0,
) -> str:
@ -356,15 +385,34 @@ class ResponseStore:
# Evict oldest entries beyond max_size
count = self._conn.execute("SELECT COUNT(*) FROM responses").fetchone()[0]
if count > self._max_size:
self._conn.execute(
"DELETE FROM responses WHERE response_id IN "
"(SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?)",
(count - self._max_size,),
)
# Collect IDs that will be evicted
evict_ids = [
row[0]
for row in self._conn.execute(
"SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?",
(count - self._max_size,),
).fetchall()
]
if evict_ids:
placeholders = ",".join("?" for _ in evict_ids)
# Clear conversation mappings pointing to evicted responses
self._conn.execute(
f"DELETE FROM conversations WHERE response_id IN ({placeholders})",
evict_ids,
)
# Delete evicted responses
self._conn.execute(
f"DELETE FROM responses WHERE response_id IN ({placeholders})",
evict_ids,
)
self._conn.commit()
def delete(self, response_id: str) -> bool:
"""Remove a response from the store. Returns True if found and deleted."""
# Clear conversation mappings pointing to this response
self._conn.execute(
"DELETE FROM conversations WHERE response_id = ?", (response_id,)
)
cursor = self._conn.execute(
"DELETE FROM responses WHERE response_id = ?", (response_id,)
)
@ -462,7 +510,12 @@ else:
body_limit_middleware = None # type: ignore[assignment]
_SECURITY_HEADERS = {
"Content-Security-Policy": "default-src 'none'; frame-ancestors 'none'",
"Permissions-Policy": "camera=(), microphone=(), geolocation=()",
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
"X-XSS-Protection": "0",
"Referrer-Policy": "no-referrer",
}
@ -986,7 +1039,7 @@ class APIServerAdapter(BasePlatformAdapter):
status=400,
)
stream = body.get("stream", False)
stream = _coerce_request_bool(body.get("stream"), default=False)
# Extract system message (becomes ephemeral system prompt layered ON TOP of core)
system_prompt = None
@ -2063,7 +2116,7 @@ class APIServerAdapter(BasePlatformAdapter):
instructions = body.get("instructions")
previous_response_id = body.get("previous_response_id")
conversation = body.get("conversation")
store = body.get("store", True)
store = _coerce_request_bool(body.get("store"), default=True)
# conversation and previous_response_id are mutually exclusive
if conversation and previous_response_id:
@ -2146,7 +2199,7 @@ class APIServerAdapter(BasePlatformAdapter):
# groups the entire conversation under one session entry.
session_id = stored_session_id or str(uuid.uuid4())
stream = bool(body.get("stream", False))
stream = _coerce_request_bool(body.get("stream"), default=False)
if stream:
# Streaming branch — emit OpenAI Responses SSE events as the
# agent runs so frontends can render text deltas and tool
@ -3209,7 +3262,10 @@ class APIServerAdapter(BasePlatformAdapter):
status=409,
)
resolve_all = bool(body.get("all") or body.get("resolve_all"))
resolve_all = (
_coerce_request_bool(body.get("all"), default=False)
or _coerce_request_bool(body.get("resolve_all"), default=False)
)
try:
from tools.approval import resolve_gateway_approval

View file

@ -829,6 +829,9 @@ SUPPORTED_DOCUMENT_TYPES = {
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
".ts": "text/plain",
".py": "text/plain",
".sh": "text/plain",
}
@ -2011,6 +2014,13 @@ class BasePlatformAdapter(ABC):
text = f"{caption}\n{text}"
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to, metadata=metadata)
def prepare_tts_text(self, text: str) -> str:
"""Prepare text for TTS. Override to filter tool output, code, etc.
Default strips markdown formatting and truncates to 4000 chars.
"""
return re.sub(r'[*_`#\[\]()]', '', text)[:4000].strip()
async def play_tts(
self,
chat_id: str,
@ -2961,9 +2971,25 @@ class BasePlatformAdapter(ABC):
merge_pending_message_event(self._pending_messages, session_key, event)
return # Don't interrupt now - will run after current task completes
# Default behavior for non-photo follow-ups: interrupt the running agent
# Default behavior for non-photo follow-ups: interrupt the running agent.
#
# Use merge_text=True so rapid TEXT follow-ups (#4469) accumulate
# into the single pending slot instead of clobbering each other.
# Without merging, three rapid messages "A", "B", "C" land like:
# _pending_messages[k] = A (interrupts)
# _pending_messages[k] = B (replaces A before consumer reads)
# _pending_messages[k] = C (replaces B)
# ...and only "C" reaches the next turn. merge_pending_message_event
# already does the right thing for photo/media bursts; the
# ``merge_text=True`` flag extends that to plain TEXT events.
# Same shape as the Telegram bursty-grace path in gateway/run.py.
logger.debug("[%s] New message while session %s is active — triggering interrupt", self.name, session_key)
self._pending_messages[session_key] = event
merge_pending_message_event(
self._pending_messages,
session_key,
event,
merge_text=True,
)
# Signal the interrupt (the processing task checks this)
self._active_sessions[session_key].set()
return # Don't process now - will be handled after current task finishes
@ -3125,7 +3151,7 @@ class BasePlatformAdapter(ABC):
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
if check_tts_requirements():
import json as _json
speech_text = re.sub(r'[*_`#\[\]()]', '', text_content)[:4000].strip()
speech_text = self.prepare_tts_text(text_content)
if not speech_text:
raise ValueError("Empty text after markdown cleanup")
tts_result_str = await asyncio.to_thread(

View file

@ -3564,6 +3564,43 @@ class DiscordAdapter(BasePlatformAdapter):
return bool(configured)
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in {"false", "0", "no", "off"}
def _discord_allow_any_attachment(self) -> bool:
"""Return whether Discord attachments bypass the SUPPORTED_DOCUMENT_TYPES allowlist.
When True, any uploaded file is cached to disk and surfaced to the
agent as a local path so it can be inspected via terminal / read_file
/ ffprobe / etc. Default False preserves the historical behaviour of
dropping unsupported types with a warning log.
"""
configured = self.config.extra.get("allow_any_attachment")
if configured is not None:
if isinstance(configured, str):
return configured.lower() not in {"false", "0", "no", "off", ""}
return bool(configured)
return os.getenv("DISCORD_ALLOW_ANY_ATTACHMENT", "false").lower() in {"true", "1", "yes", "on"}
def _discord_max_attachment_bytes(self) -> int:
"""Return the per-attachment byte cap. 0 means unlimited.
The whole attachment is held in memory while being written to the
cache, so unlimited carries a real memory cost. Default 32 MiB
matches the historical hardcoded value.
"""
configured = self.config.extra.get("max_attachment_bytes")
if configured is None:
configured = os.getenv("DISCORD_MAX_ATTACHMENT_BYTES")
if configured is None or configured == "":
return 32 * 1024 * 1024
try:
value = int(configured)
except (TypeError, ValueError):
logger.warning(
"[Discord] Invalid max_attachment_bytes value %r, falling back to 32 MiB",
configured,
)
return 32 * 1024 * 1024
return max(0, value)
def _discord_free_response_channels(self) -> set:
"""Return Discord channel IDs where no bot mention is required.
@ -4495,6 +4532,7 @@ class DiscordAdapter(BasePlatformAdapter):
if normalized_content.startswith("/"):
msg_type = MessageType.COMMAND
elif all_attachments:
_allow_any = self._discord_allow_any_attachment()
# Check attachment types
for att in all_attachments:
if att.content_type:
@ -4509,9 +4547,15 @@ class DiscordAdapter(BasePlatformAdapter):
if att.filename:
_, doc_ext = os.path.splitext(att.filename)
doc_ext = doc_ext.lower()
if doc_ext in SUPPORTED_DOCUMENT_TYPES:
if doc_ext in SUPPORTED_DOCUMENT_TYPES or _allow_any:
msg_type = MessageType.DOCUMENT
break
elif _allow_any:
# No content_type at all (rare — discord usually fills it
# in). Treat as a document so downstream pipelines surface
# the path to the agent.
msg_type = MessageType.DOCUMENT
break
# When auto-threading kicked in, route responses to the new thread
effective_channel = auto_threaded_channel or message.channel
@ -4594,31 +4638,48 @@ class DiscordAdapter(BasePlatformAdapter):
if not ext and content_type:
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(content_type, "")
if ext not in SUPPORTED_DOCUMENT_TYPES:
allow_any_attachment = self._discord_allow_any_attachment()
in_allowlist = ext in SUPPORTED_DOCUMENT_TYPES
if not in_allowlist and not allow_any_attachment:
logger.warning(
"[Discord] Unsupported document type '%s' (%s), skipping",
ext or "unknown", content_type,
)
else:
MAX_DOC_BYTES = 32 * 1024 * 1024
if att.size and att.size > MAX_DOC_BYTES:
max_doc_bytes = self._discord_max_attachment_bytes()
if max_doc_bytes and att.size and att.size > max_doc_bytes:
logger.warning(
"[Discord] Document too large (%s bytes), skipping: %s",
att.size, att.filename,
"[Discord] Document too large (%s bytes > cap %s), skipping: %s",
att.size, max_doc_bytes, att.filename,
)
else:
try:
raw_bytes = await self._cache_discord_document(att, ext)
cached_path = cache_document_from_bytes(
raw_bytes, att.filename or f"document{ext}"
raw_bytes, att.filename or f"document{ext or '.bin'}"
)
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
if in_allowlist:
doc_mime = SUPPORTED_DOCUMENT_TYPES[ext]
else:
# allow_any_attachment path: untyped file. Use the
# source content_type if discord gave us one,
# otherwise fall back to octet-stream so the agent
# knows it's binary and reaches for terminal tools.
doc_mime = (
content_type
if content_type and content_type != "unknown"
else "application/octet-stream"
)
media_urls.append(cached_path)
media_types.append(doc_mime)
logger.info("[Discord] Cached user document: %s", cached_path)
logger.info(
"[Discord] Cached user %s: %s",
"document" if in_allowlist else "attachment",
cached_path,
)
# Inject text content for plain-text documents (capped at 100 KB)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
if in_allowlist and ext in {".md", ".txt", ".log"} and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = att.filename or f"document{ext}"
@ -4630,6 +4691,13 @@ class DiscordAdapter(BasePlatformAdapter):
pending_text_injection = injection
except UnicodeDecodeError:
pass
# NOTE: for the allow_any_attachment path we deliberately
# do NOT inject a path string here. ``gateway/run.py``
# already detects DOCUMENT-typed events with
# ``application/octet-stream`` MIME and emits a context
# note with the sandbox-translated cache path via
# ``to_agent_visible_cache_path()`` (important for
# Docker/Modal terminal backends).
except Exception as e:
logger.warning(
"[Discord] Failed to cache document %s: %s",

View file

@ -2273,11 +2273,7 @@ class FeishuAdapter(BasePlatformAdapter):
daemon=True,
).start()
return
future = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(data),
loop,
)
future.add_done_callback(self._log_background_failure)
self._submit_on_loop(loop, self._handle_message_event_data(data))
def _enqueue_pending_inbound_event(self, data: Any) -> bool:
"""Append an event to the pending-inbound queue.
@ -2353,16 +2349,12 @@ class FeishuAdapter(BasePlatformAdapter):
dispatched = 0
requeue: List[Any] = []
for event in batch:
try:
fut = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(event),
loop,
)
fut.add_done_callback(self._log_background_failure)
if self._submit_on_loop(
loop, self._handle_message_event_data(event)
):
dispatched += 1
except RuntimeError:
# Loop closed between check and submit — requeue
# and poll again.
else:
# Loop closed/unavailable — requeue and poll again.
requeue.append(event)
if requeue:
with self._pending_inbound_lock:
@ -2466,11 +2458,10 @@ class FeishuAdapter(BasePlatformAdapter):
if not self._loop_accepts_callbacks(loop):
logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
return
future = asyncio.run_coroutine_threadsafe(
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
self._submit_on_loop(
loop,
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
)
future.add_done_callback(self._log_background_failure)
def _on_reaction_event(self, event_type: str, data: Any) -> None:
"""Route user reactions on bot messages as synthetic text events."""
@ -2498,11 +2489,7 @@ class FeishuAdapter(BasePlatformAdapter):
or bool(getattr(loop, "is_closed", lambda: False)())
):
return
future = asyncio.run_coroutine_threadsafe(
self._handle_reaction_event(event_type, data),
loop,
)
future.add_done_callback(self._log_background_failure)
self._submit_on_loop(loop, self._handle_reaction_event(event_type, data))
def _on_card_action_trigger(self, data: Any) -> Any:
"""Handle card-action callback from the Feishu SDK (synchronous).
@ -2548,11 +2535,14 @@ class FeishuAdapter(BasePlatformAdapter):
def _submit_on_loop(self, loop: Any, coro: Any) -> bool:
"""Schedule background work on the adapter loop with shared failure logging."""
try:
future = asyncio.run_coroutine_threadsafe(coro, loop)
except Exception:
coro.close()
logger.warning("[Feishu] Failed to schedule background callback work", exc_info=True)
from agent.async_utils import safe_schedule_threadsafe
future = safe_schedule_threadsafe(
coro, loop,
logger=logger,
log_message="[Feishu] Failed to schedule background callback work",
log_level=logging.WARNING,
)
if future is None:
return False
future.add_done_callback(self._log_background_failure)
return True

View file

@ -168,8 +168,8 @@ class TextBatchAggregator:
# Pre-compiled regexes for performance
_RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
_RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
_RE_BOLD_UNDER = re.compile(r"__(.+?)__", re.DOTALL)
_RE_ITALIC_UNDER = re.compile(r"_(.+?)_", re.DOTALL)
_RE_BOLD_UNDER = re.compile(r"\b__(?![\s_])(.+?)(?<![\s_])__\b", re.DOTALL)
_RE_ITALIC_UNDER = re.compile(r"\b_(?![\s_])(.+?)(?<![\s_])_\b", re.DOTALL)
_RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
_RE_INLINE_CODE = re.compile(r"`(.+?)`")
_RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)

View file

@ -490,9 +490,19 @@ class SignalAdapter(BasePlatformAdapter):
if not data_message:
return
# Check for group message
# Check for group message.
# Modern Signal groups surface on dataMessage.groupV2.id; legacy V1
# groups still arrive under dataMessage.groupInfo.groupId. signal-cli
# versions differ in which field they expose for V2 groups — some
# forward the underlying libsignal envelope verbatim (groupV2), others
# normalize everything into groupInfo. Read groupV2 first and fall
# back to groupInfo so V2-only groups aren't misrouted as DMs.
group_info = data_message.get("groupInfo")
group_id = group_info.get("groupId") if group_info else None
group_v2 = data_message.get("groupV2")
group_id = (
(group_v2.get("id") if isinstance(group_v2, dict) else None)
or (group_info.get("groupId") if isinstance(group_info, dict) else None)
)
is_group = bool(group_id)
# Group message filtering — derived from SIGNAL_GROUP_ALLOWED_USERS:
@ -562,7 +572,7 @@ class SignalAdapter(BasePlatformAdapter):
# Build session source
source = self.build_source(
chat_id=chat_id,
chat_name=group_info.get("groupName") if group_info else sender_name,
chat_name=(group_info.get("groupName") if isinstance(group_info, dict) else None) or sender_name,
chat_type=chat_type,
user_id=sender,
user_name=sender_name or sender,

View file

@ -482,7 +482,7 @@ class SlackAdapter(BasePlatformAdapter):
"text": text,
}
try:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.post(
ctx["response_url"],
json=payload,
@ -2785,7 +2785,10 @@ class SlackAdapter(BasePlatformAdapter):
from hermes_cli.commands import slack_subcommand_map
subcommand_map = slack_subcommand_map()
subcommand_map["compact"] = "/compress"
first_word = text.split()[0] if text else ""
# Guard against whitespace-only text where ``text`` is truthy but
# ``text.split()`` returns ``[]`` (e.g. user sends ``/hermes ``).
parts = text.split() if text else []
first_word = parts[0] if parts else ""
if first_word in subcommand_map:
rest = text[len(first_word):].strip()
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]

View file

@ -128,6 +128,7 @@ class SmsAdapter(BasePlatformAdapter):
await site.start()
self._http_session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=30),
trust_env=True,
)
self._running = True
@ -169,6 +170,7 @@ class SmsAdapter(BasePlatformAdapter):
session = self._http_session or aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=30),
trust_env=True,
)
try:
for chunk in chunks:

View file

@ -3504,14 +3504,6 @@ class TelegramAdapter(BasePlatformAdapter):
if self._bot:
try:
_typing_thread = self._metadata_thread_id(metadata)
# Skip the Bot API call entirely for Hermes-created DM topic
# lanes: send_chat_action only accepts message_thread_id, which
# Telegram's Bot API 10.0 rejects for these lanes. The send
# path uses the reply-anchor fallback instead, but typing has
# no equivalent — skipping avoids noisy "thread not found"
# debug logs on every typing tick.
if metadata and metadata.get("telegram_dm_topic_reply_fallback"):
return
message_thread_id = self._message_thread_id_for_typing(_typing_thread)
# No retry-without-thread fallback here: _message_thread_id_for_typing
# already maps the forum General topic to None, so any non-None value

View file

@ -54,6 +54,13 @@ from gateway.platforms.base import (
logger = logging.getLogger(__name__)
_BUILTIN_DELIVER_PLATFORMS = {
"telegram", "discord", "slack", "signal", "sms", "whatsapp",
"matrix", "mattermost", "homeassistant", "email", "dingtalk",
"feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
"qqbot", "yuanbao",
}
DEFAULT_HOST = "0.0.0.0"
DEFAULT_PORT = 8644
_INSECURE_NO_AUTH = "INSECURE_NO_AUTH"
@ -238,12 +245,6 @@ class WebhookAdapter(BasePlatformAdapter):
# Cross-platform delivery — any platform with a gateway adapter.
# Check both built-in names and plugin-registered platforms.
_BUILTIN_DELIVER_PLATFORMS = {
"telegram", "discord", "slack", "signal", "sms", "whatsapp",
"matrix", "mattermost", "homeassistant", "email", "dingtalk",
"feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
"qqbot", "yuanbao",
}
_is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
if not _is_known_platform:
try:

View file

@ -493,13 +493,45 @@ class WhatsAppAdapter(BasePlatformAdapter):
"""
if not check_whatsapp_requirements():
logger.warning("[%s] Node.js not found. WhatsApp requires Node.js.", self.name)
self._set_fatal_error(
"whatsapp_node_missing",
"Node.js is not installed — install Node.js and re-run `hermes gateway`.",
retryable=False,
)
return False
bridge_path = Path(self._bridge_script)
if not bridge_path.exists():
logger.warning("[%s] Bridge script not found: %s", self.name, bridge_path)
self._set_fatal_error(
"whatsapp_bridge_missing",
f"WhatsApp bridge script missing at {bridge_path}.",
retryable=False,
)
return False
# Pre-flight: skip the 30s bridge bootstrap entirely if the user
# never finished pairing. Without creds.json the bridge prints
# QR codes to its log file and never reaches status:connected,
# so every gateway restart paid the 30s timeout + queued WhatsApp
# for indefinite retries. Mark non-retryable so the user gets a
# clear "run hermes whatsapp" message instead of the watcher
# silently hammering an unconfigured platform.
creds_path = self._session_path / "creds.json"
if not creds_path.exists():
logger.warning(
"[%s] WhatsApp is enabled but not paired (no creds.json at %s). "
"Run `hermes whatsapp` to pair, or remove WHATSAPP_ENABLED from "
"your .env to disable.",
self.name, creds_path,
)
self._set_fatal_error(
"whatsapp_not_paired",
"WhatsApp enabled but not paired — run `hermes whatsapp` to pair.",
retryable=False,
)
return False
logger.info("[%s] Bridge found at %s", self.name, bridge_path)
# Acquire scoped lock to prevent duplicate sessions

View file

@ -147,6 +147,9 @@ _YB_RES_REF_RE = re.compile(
r"\[(image|voice|video|file(?::[^|\]]*)?)\|ybres:([A-Za-z0-9_\-]+)\]"
)
# Media kinds that can be resolved and injected into the model context
_RESOLVABLE_MEDIA_KINDS = frozenset({"image", "file"})
# Strip page indicators like (1/3) appended by BasePlatformAdapter
_INDICATOR_RE = re.compile(r'\s*\(\d+/\d+\)$')
@ -925,6 +928,7 @@ class InboundContext:
# Populated by QuoteContextMiddleware
reply_to_message_id: Optional[str] = None
reply_to_text: Optional[str] = None
quote_media_refs: list = dc_field(default_factory=list) # List of (rid, kind, filename)
# Populated by MediaResolveMiddleware
media_urls: list = dc_field(default_factory=list)
@ -1645,6 +1649,25 @@ class ExtractContentMiddleware(InboundMiddleware):
return None
return f"[link: {link} | visit link for full content]"
@staticmethod
def _parse_resource_id(url: str) -> str:
"""Extract resourceId from Yuanbao resource URL query parameters.
Args:
url: Resource URL (e.g., https://...?resourceId=abc123)
Returns:
Resource ID string, or empty string if not found
"""
if not url:
return ""
try:
query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
ids = query.get("resourceId") or query.get("resourceid") or []
return str(ids[0]).strip() if ids else ""
except Exception:
return ""
@classmethod
def _extract_text(cls, msg_body: list) -> str:
"""Extract plain text content from MsgBody.
@ -1668,14 +1691,35 @@ class ExtractContentMiddleware(InboundMiddleware):
if text:
parts.append(text)
elif elem_type == "TIMImageElem":
parts.append("[image]")
# Extract resourceId from image_info_array URL
image_info_array = content.get("image_info_array")
if not isinstance(image_info_array, list):
image_info_array = []
image_info = None
# Prefer medium image (index 1), fallback to index 0
if len(image_info_array) > 1 and isinstance(image_info_array[1], dict):
image_info = image_info_array[1]
elif len(image_info_array) > 0 and isinstance(image_info_array[0], dict):
image_info = image_info_array[0]
image_url = str((image_info or {}).get("url") or "").strip()
rid = cls._parse_resource_id(image_url)
parts.append(f"[image|ybres:{rid}]" if rid else "[image]")
elif elem_type == "TIMFileElem":
filename = content.get("file_name", content.get("fileName", content.get("filename", "")))
parts.append(f"[file: {filename}]" if filename else "[file]")
file_url = str(content.get("url") or "").strip()
rid = cls._parse_resource_id(file_url)
if rid:
parts.append(f"[file:{filename}|ybres:{rid}]" if filename else f"[file|ybres:{rid}]")
else:
parts.append(f"[file: {filename}]" if filename else "[file]")
elif elem_type == "TIMSoundElem":
parts.append("[voice]")
sound_url = str(content.get("url") or "").strip()
rid = cls._parse_resource_id(sound_url)
parts.append(f"[voice|ybres:{rid}]" if rid else "[voice]")
elif elem_type == "TIMVideoFileElem":
parts.append("[video]")
video_url = str(content.get("url") or "").strip()
rid = cls._parse_resource_id(video_url)
parts.append(f"[video|ybres:{rid}]" if rid else "[video]")
elif elem_type == "TIMCustomElem":
data_val = content.get("data", "")
if data_val:
@ -2132,22 +2176,23 @@ class QuoteContextMiddleware(InboundMiddleware):
name = "quote-context"
@staticmethod
def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str]]:
def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str], list]:
"""Extract quote context, mapping to MessageEvent.reply_to_*.
Returns:
(reply_to_message_id, reply_to_text)
(reply_to_message_id, reply_to_text, quote_media_refs)
where quote_media_refs is a list of (rid, kind, filename) tuples
"""
if not cloud_custom_data:
return None, None
return None, None, []
try:
parsed = json.loads(cloud_custom_data)
except (json.JSONDecodeError, TypeError):
return None, None
return None, None, []
quote = parsed.get("quote") if isinstance(parsed, dict) else None
if not isinstance(quote, dict):
return None, None
return None, None, []
# type=2 corresponds to image reference; desc may be empty, provide a placeholder.
quote_type = int(quote.get("type") or 0)
@ -2155,15 +2200,26 @@ class QuoteContextMiddleware(InboundMiddleware):
if quote_type == 2 and not desc:
desc = "[image]"
if not desc:
return None, None
return None, None, []
quote_id = str(quote.get("id") or "").strip() or None
sender = str(quote.get("sender_nickname") or quote.get("sender_id") or "").strip()
quote_text = f"{sender}: {desc}" if sender else desc
return quote_id, quote_text
# Extract media references from desc using _YB_RES_REF_RE regex
media_refs: list = []
for m in _YB_RES_REF_RE.finditer(desc):
head = m.group(1) # "image" | "file:<name>" | "voice" | "video"
rid = m.group(2)
kind, _, filename = head.partition(":")
kind = kind.strip()
media_refs.append((rid, kind, filename.strip()))
return quote_id, quote_text, media_refs
async def handle(self, ctx: InboundContext, next_fn) -> None:
ctx.reply_to_message_id, ctx.reply_to_text = self._extract_quote_context(ctx.cloud_custom_data)
ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data)
await next_fn()
@ -2332,7 +2388,7 @@ class MediaResolveMiddleware(InboundMiddleware):
for ref in media_refs:
kind = str(ref.get("kind") or "").strip().lower()
url = str(ref.get("url") or "").strip()
if kind not in {"image", "file"} or not url:
if kind not in _RESOLVABLE_MEDIA_KINDS or not url:
continue
try:
@ -2391,7 +2447,7 @@ class MediaResolveMiddleware(InboundMiddleware):
rid = m.group(2)
kind, _, filename = head.partition(":")
kind = kind.strip()
if kind not in {"image", "file"}:
if kind not in _RESOLVABLE_MEDIA_KINDS:
continue
if rid in seen:
continue
@ -2458,26 +2514,82 @@ class DispatchMiddleware(InboundMiddleware):
media_urls = list(ctx.media_urls)
media_types = list(ctx.media_types)
# Backfill observed media from recent transcript history
extra_img_urls: List[str] = []
extra_img_mimes: List[str] = []
try:
extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media(
adapter, ctx.source,
)
except Exception as exc:
logger.warning(
"[%s] observed-image hydration raised, continuing anyway: %s",
adapter.name, exc,
)
if extra_img_urls:
current = set(media_urls)
for u, m in zip(extra_img_urls, extra_img_mimes):
if u in current:
# If user quoted a message (reply_to_message_id is set), resolve only
# quote_media_refs to avoid injecting unrelated history media.
# Otherwise, backfill observed media from recent transcript history.
if ctx.reply_to_message_id is not None:
# Fallback: if desc didn't contain ybres refs, look up transcript
if not ctx.quote_media_refs:
try:
store = getattr(adapter, "_session_store", None)
if store:
session_entry = store.get_or_create_session(ctx.source)
history = store.load_transcript(session_entry.session_id)
for msg in reversed(history or []):
mid = msg.get("message_id", "")
if mid and mid == ctx.reply_to_message_id:
_content = msg.get("content", "")
if isinstance(_content, str) and "|ybres:" in _content:
for m in _YB_RES_REF_RE.finditer(_content):
head = m.group(1)
rid = m.group(2)
kind, _, filename = head.partition(":")
kind = kind.strip()
if kind in _RESOLVABLE_MEDIA_KINDS:
ctx.quote_media_refs.append((rid, kind, filename.strip()))
break
except Exception as exc:
logger.warning(
"[%s] quote transcript lookup failed: %s",
adapter.name, exc,
)
# User quoted a message — resolve only media from the quote
for rid, kind, filename in ctx.quote_media_refs:
if kind not in _RESOLVABLE_MEDIA_KINDS:
continue
media_urls.append(u)
media_types.append(m)
current.add(u)
try:
fresh_url = await MediaResolveMiddleware._resolve_by_resource_id(adapter, rid)
except Exception as exc:
logger.warning(
"[%s] quote media resolve failed: rid=%s kind=%s err=%s",
adapter.name, rid, kind, exc,
)
continue
cached = await MediaResolveMiddleware._download_and_cache(
adapter,
fetch_url=fresh_url,
kind=kind,
file_name=filename or None,
log_tag=f"quote rid={rid}",
)
if cached is None:
continue
path, mime = cached
# Avoid duplicates
if path not in media_urls:
media_urls.append(path)
media_types.append(mime)
else:
# No quote — backfill observed media from recent transcript history
extra_img_urls: List[str] = []
extra_img_mimes: List[str] = []
try:
extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media(
adapter, ctx.source,
)
except Exception as exc:
logger.warning(
"[%s] observed-image hydration raised, continuing anyway: %s",
adapter.name, exc,
)
if extra_img_urls:
current = set(media_urls)
for u, m in zip(extra_img_urls, extra_img_mimes):
if u in current:
continue
media_urls.append(u)
media_types.append(m)
current.add(u)
# Replace [kind|ybres:xxx] anchors with local cache paths so
# the transcript records usable paths for the model.
@ -2506,7 +2618,11 @@ class DispatchMiddleware(InboundMiddleware):
event = MessageEvent(
text=_patched_event_text,
message_type=ctx.msg_type,
message_type=(
MessageType.DOCUMENT
if any(mt.startswith(("application/", "text/")) for mt in media_types)
else ctx.msg_type
),
source=ctx.source,
message_id=ctx.msg_id or None,
raw_message=ctx.push,

View file

@ -50,6 +50,7 @@ from typing import Dict, Optional, Any, List, Union
# gateway is a long-running daemon, so its boot cost matters less than
# preserving the established test-patch surface.
from agent.account_usage import fetch_account_usage, render_account_usage_lines
from agent.async_utils import safe_schedule_threadsafe
from agent.i18n import t
from hermes_cli.config import cfg_get
@ -1989,21 +1990,21 @@ class GatewayRunner:
await self.stop()
elif not self.adapters and self._failed_platforms:
# All platforms are down and queued for background reconnection.
# If the error is retryable, exit with failure so systemd Restart=on-failure
# can restart the process. Otherwise stay alive and keep retrying in background.
if adapter.fatal_error_retryable:
self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
self._exit_with_failure = True
logger.error(
"All messaging platforms failed with retryable errors. "
"Shutting down gateway for service restart (systemd will retry)."
)
await self.stop()
else:
logger.warning(
"No connected messaging platforms remain, but %d platform(s) queued for reconnection",
len(self._failed_platforms),
)
# Keep the gateway alive so:
# • cron jobs still run
# • the reconnect watcher can recover platforms when the
# underlying problem clears (proxy comes back, user runs
# `hermes whatsapp`, etc.)
# We used to exit-with-failure here to trigger systemd restart,
# but that converted a transient outage into a restart loop and
# killed in-process state every time. The reconnect watcher
# already handles long-running recovery — let it do its job.
logger.warning(
"No connected messaging platforms remain, but %d platform(s) "
"queued for reconnection — gateway staying alive, watcher will "
"retry in background.",
len(self._failed_platforms),
)
def _request_clean_exit(self, reason: str) -> None:
self._exit_cleanly = True
@ -2179,6 +2180,73 @@ class GatewayRunner:
except Exception:
pass
# ------------------------------------------------------------------
# Per-platform circuit breaker (pause/resume) — used by the reconnect
# watcher when a retryable failure recurs past a threshold, and by the
# /platform pause|resume slash command for manual control.
# ------------------------------------------------------------------
def _pause_failed_platform(self, platform, *, reason: str = "") -> None:
"""Mark a queued platform as paused — keep it in ``_failed_platforms``
but stop the reconnect watcher from hammering it.
Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive
retryable failures, and by ``/platform pause <name>`` for manual
intervention. Paused platforms are surfaced in ``/platform list``
and resumed with ``/platform resume <name>``.
"""
info = getattr(self, "_failed_platforms", {}).get(platform)
if info is None:
return
if info.get("paused"):
return
info["paused"] = True
info["pause_reason"] = reason or "auto-paused after repeated failures"
# Push next_retry far enough out that even if "paused" is missed
# by a stale code path, the watcher won't fire on it.
info["next_retry"] = float("inf")
try:
self._update_platform_runtime_status(
platform.value,
platform_state="paused",
error_code=None,
error_message=info["pause_reason"],
)
except Exception:
pass
logger.warning(
"%s paused after %d consecutive failures (%s) — "
"fix the underlying issue then run `/platform resume %s` "
"to retry, or `hermes gateway restart` to restart the gateway.",
platform.value, info.get("attempts", 0),
info["pause_reason"], platform.value,
)
def _resume_paused_platform(self, platform) -> bool:
"""Unpause a platform — reset its attempt counter and schedule an
immediate retry. Returns True if the platform was paused and is
now queued; False if it wasn't paused (or wasn't in the queue).
"""
info = getattr(self, "_failed_platforms", {}).get(platform)
if info is None:
return False
if not info.get("paused"):
return False
info["paused"] = False
info.pop("pause_reason", None)
info["attempts"] = 0
info["next_retry"] = time.monotonic() # retry on next watcher tick
try:
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",
error_code=None,
error_message=None,
)
except Exception:
pass
logger.info("%s resumed — retrying on next watcher tick", platform.value)
return True
@staticmethod
def _load_prefill_messages() -> List[Dict[str, Any]]:
"""Load ephemeral prefill messages from config or env var.
@ -3612,16 +3680,32 @@ class GatewayRunner:
return True
if enabled_platform_count > 0:
if startup_retryable_errors:
# At least one platform attempted a connection and failed —
# this is a real startup error that should block the gateway.
# All enabled platforms hit retryable failures (network
# blip, bridge not paired, npm install timeout, etc.).
# Keep the gateway alive so:
# • cron jobs still run
# • the reconnect watcher gets a chance to recover the
# failing platforms once the underlying problem is
# fixed (e.g. user runs `hermes whatsapp`, fixes
# proxy, etc.)
# Exiting here used to convert a single misconfigured
# platform into an infinite systemd restart loop.
reason = "; ".join(startup_retryable_errors)
logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
logger.warning(
"Gateway started with no connected platforms — "
"%d platform(s) queued for retry: %s",
len(self._failed_platforms), reason,
)
try:
from gateway.status import write_runtime_status
write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
write_runtime_status(
gateway_state="degraded",
exit_reason=None,
)
except Exception:
pass
return False
# Fall through to the normal "running" state — reconnect
# watcher takes it from here.
# All enabled platforms had no adapter (missing library or credentials).
# In fleet deployments the same config.yaml is shared across nodes that
# may only have credentials for a subset of platforms. Rather than
@ -4736,11 +4820,15 @@ class GatewayRunner:
"""Background task that periodically retries connecting failed platforms.
Uses exponential backoff: 30s 60s 120s 240s 300s (cap).
Stops retrying a platform after 20 failed attempts or if the error
is non-retryable (e.g. bad auth token).
Retryable failures keep retrying at the backoff cap indefinitely
but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row
without ever succeeding, it is *paused*: kept in the retry queue
but no longer hammered. The user surfaces it with ``/platform list``
and resumes it with ``/platform resume <name>``. Non-retryable
failures (bad auth, etc.) still drop out of the queue immediately.
"""
_MAX_ATTEMPTS = 20
_BACKOFF_CAP = 300 # 5 minutes max between retries
_PAUSE_AFTER_FAILURES = 10 # circuit-breaker threshold
await asyncio.sleep(10) # initial delay — let startup finish
while self._running:
@ -4757,22 +4845,18 @@ class GatewayRunner:
if not self._running:
return
info = self._failed_platforms[platform]
# Skip paused platforms entirely — they need explicit
# /platform resume to come back.
if info.get("paused"):
continue
if now < info["next_retry"]:
continue # not time yet
if info["attempts"] >= _MAX_ATTEMPTS:
logger.warning(
"Giving up reconnecting %s after %d attempts",
platform.value, info["attempts"],
)
del self._failed_platforms[platform]
continue
platform_config = info["config"]
attempt = info["attempts"] + 1
logger.info(
"Reconnecting %s (attempt %d/%d)...",
platform.value, attempt, _MAX_ATTEMPTS,
"Reconnecting %s (attempt %d)...",
platform.value, attempt,
)
try:
@ -4837,6 +4921,14 @@ class GatewayRunner:
"Reconnect %s failed, next retry in %ds",
platform.value, backoff,
)
if attempt >= _PAUSE_AFTER_FAILURES:
self._pause_failed_platform(
platform,
reason=(
adapter.fatal_error_message
or "failed to reconnect"
),
)
except Exception as e:
self._update_platform_runtime_status(
platform.value,
@ -4851,6 +4943,8 @@ class GatewayRunner:
"Reconnect %s error: %s, next retry in %ds",
platform.value, e, backoff,
)
if attempt >= _PAUSE_AFTER_FAILURES:
self._pause_failed_platform(platform, reason=str(e))
# Check every 10 seconds for platforms that need reconnection
for _ in range(10):
@ -6450,6 +6544,9 @@ class GatewayRunner:
if canonical == "agents":
return await self._handle_agents_command(event)
if canonical == "platform":
return await self._handle_platform_command(event)
if canonical == "restart":
return await self._handle_restart_command(event)
@ -7991,6 +8088,8 @@ class GatewayRunner:
try:
if _err_body is not None:
_err_json = _err_body.json().get("error", {})
if not isinstance(_err_json, dict):
_err_json = {}
except Exception:
pass
if _err_json.get("type") == "usage_limit_reached":
@ -8564,6 +8663,24 @@ class GatewayRunner:
t("gateway.status.platforms", platforms=', '.join(connected_platforms)),
])
# Session recap — what was this session ABOUT? Pure local compute,
# no LLM call, no prompt-cache impact. Useful when juggling multiple
# gateway sessions and you want a one-glance reminder of where this
# one left off. Inspired by Claude Code 2.1.114's /recap.
try:
from hermes_cli.session_recap import build_recap
history = self.session_store.load_transcript(session_entry.session_id)
recap = build_recap(
history,
session_title=title,
session_id=session_entry.session_id,
platform=source.platform.value if source else None,
)
if recap:
lines.extend(["", recap])
except Exception as exc: # pragma: no cover — defensive
logger.debug("build_recap failed in /status: %s", exc)
return "\n".join(lines)
async def _handle_agents_command(self, event: MessageEvent) -> str:
@ -8695,6 +8812,99 @@ class GatewayRunner:
else:
return t("gateway.stop.no_active")
async def _handle_platform_command(self, event: MessageEvent) -> str:
"""Handle ``/platform list|pause|resume [name]`` — surface and
manually control failed/paused gateway adapters.
Examples:
``/platform list`` show connected + failed/paused platforms
``/platform pause whatsapp`` stop the reconnect watcher hammering whatsapp
``/platform resume whatsapp`` re-queue a paused platform for retry
"""
text = (getattr(event, "content", "") or "").strip()
# Strip the leading "/platform" (or "/PLATFORM") token if present
parts = text.split(maxsplit=2)
if parts and parts[0].lower().lstrip("/").startswith("platform"):
parts = parts[1:]
action = (parts[0] if parts else "list").lower()
target = parts[1].lower() if len(parts) > 1 else ""
# Resolve platform name (case-insensitive, value match)
def _resolve_platform(name: str):
if not name:
return None
for p in Platform.__members__.values():
if p.value.lower() == name:
return p
return None
if action == "list":
lines = ["**Gateway platforms**"]
connected = sorted(p.value for p in self.adapters.keys())
if connected:
lines.append("Connected: " + ", ".join(connected))
else:
lines.append("Connected: (none)")
failed = getattr(self, "_failed_platforms", {}) or {}
if failed:
for p, info in failed.items():
if info.get("paused"):
reason = info.get("pause_reason") or "paused"
lines.append(
f" · {p.value} — PAUSED ({reason}). "
f"Resume with `/platform resume {p.value}`."
)
else:
attempts = info.get("attempts", 0)
lines.append(
f" · {p.value} — retrying (attempt {attempts})"
)
else:
lines.append("Failed/paused: (none)")
return "\n".join(lines)
if action in ("pause", "resume"):
if not target:
return f"Usage: /platform {action} <name>"
platform = _resolve_platform(target)
if platform is None:
return f"Unknown platform: {target}"
failed = getattr(self, "_failed_platforms", {}) or {}
if action == "pause":
if platform not in failed:
return (
f"{platform.value} is not in the retry queue "
f"(it's either connected or not enabled)."
)
if failed[platform].get("paused"):
return f"{platform.value} is already paused."
self._pause_failed_platform(platform, reason="paused via /platform pause")
return (
f"{platform.value} paused. "
f"Resume with `/platform resume {platform.value}` or "
f"`hermes gateway restart` to reset."
)
# action == "resume"
if platform not in failed:
return (
f"{platform.value} is not in the retry queue — "
f"nothing to resume."
)
if not failed[platform].get("paused"):
return (
f"{platform.value} is already retrying — "
f"no resume needed."
)
self._resume_paused_platform(platform)
return f"{platform.value} resumed — retrying on next watcher tick."
return (
"Usage: /platform <list|pause|resume> [name]\n"
" /platform list — show platform status\n"
" /platform pause <name> — stop retrying a failing platform\n"
" /platform resume <name> — re-queue a paused platform"
)
async def _handle_restart_command(self, event: MessageEvent) -> Union[str, EphemeralReply]:
"""Handle /restart command - drain active work, then restart the gateway."""
# Defensive idempotency check: if the previous gateway process
@ -11215,10 +11425,14 @@ class GatewayRunner:
copied_source = dataclasses.replace(source)
except Exception:
copied_source = source
future = asyncio.run_coroutine_threadsafe(
future = safe_schedule_threadsafe(
self._rename_telegram_topic_for_session_title(copied_source, session_id, title),
loop,
logger=logger,
log_message="Telegram topic title rename failed to schedule",
)
if future is None:
return
def _log_rename_failure(fut) -> None:
try:
fut.result()
@ -12332,6 +12546,12 @@ class GatewayRunner:
and getattr(source, "chat_type", None) == "dm"
):
metadata["telegram_dm_topic_reply_fallback"] = True
# Telegram DM topic lanes need direct_messages_topic_id in metadata
# so synthetic/queued messages (goal continuations, status notices)
# route to the correct topic even when reply anchor is unavailable.
tid = str(thread_id)
if tid and tid not in {"", "1"}:
metadata["direct_messages_topic_id"] = tid
anchor = reply_to_message_id or getattr(source, "message_id", None)
if anchor is not None:
metadata["telegram_reply_to_message_id"] = str(anchor)
@ -12617,7 +12837,11 @@ class GatewayRunner:
update_cmd = (
f"PYTHONUNBUFFERED=1 {hermes_cmd_str} update --gateway"
f" > {shlex.quote(str(output_path))} 2>&1; "
f"status=$?; printf '%s' \"$status\" > {shlex.quote(str(exit_code_path))}"
# Avoid `status=$?`: `status` is a read-only special parameter
# in zsh, and this command string is copied/reused in macOS/zsh
# operator wrappers. Keep the template zsh-safe even though this
# specific subprocess currently runs under bash.
f"rc=$?; printf '%s' \"$rc\" > {shlex.quote(str(exit_code_path))}"
)
setsid_bin = shutil.which("setsid")
if setsid_bin:
@ -14808,29 +15032,28 @@ class GatewayRunner:
def _step_callback_sync(iteration: int, prev_tools: list) -> None:
if not _run_still_current():
return
try:
# prev_tools may be list[str] or list[dict] with "name"/"result"
# keys. Normalise to keep "tool_names" backward-compatible for
# user-authored hooks that do ', '.join(tool_names)'.
_names: list[str] = []
for _t in (prev_tools or []):
if isinstance(_t, dict):
_names.append(_t.get("name") or "")
else:
_names.append(str(_t))
asyncio.run_coroutine_threadsafe(
_hooks_ref.emit("agent:step", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_id": session_id,
"iteration": iteration,
"tool_names": _names,
"tools": prev_tools,
}),
_loop_for_step,
)
except Exception as _e:
logger.debug("agent:step hook error: %s", _e)
# prev_tools may be list[str] or list[dict] with "name"/"result"
# keys. Normalise to keep "tool_names" backward-compatible for
# user-authored hooks that do ', '.join(tool_names)'.
_names: list[str] = []
for _t in (prev_tools or []):
if isinstance(_t, dict):
_names.append(_t.get("name") or "")
else:
_names.append(str(_t))
safe_schedule_threadsafe(
_hooks_ref.emit("agent:step", {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"session_id": session_id,
"iteration": iteration,
"tool_names": _names,
"tools": prev_tools,
}),
_loop_for_step,
logger=logger,
log_message="agent:step hook scheduling error",
)
# Bridge sync status_callback → async adapter.send for context pressure
_status_adapter = self.adapters.get(source.platform)
@ -14850,27 +15073,28 @@ class GatewayRunner:
def _status_callback_sync(event_type: str, message: str) -> None:
if not _status_adapter or not _run_still_current():
return
try:
_fut = asyncio.run_coroutine_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
),
_loop_for_step,
)
if _cleanup_progress:
def _track_status_id(fut) -> None:
try:
res = fut.result()
except Exception:
return
mid = getattr(res, "message_id", None)
if getattr(res, "success", False) and mid:
_cleanup_msg_ids.append(str(mid))
_fut.add_done_callback(_track_status_id)
except Exception as _e:
logger.debug("status_callback error (%s): %s", event_type, _e)
_fut = safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message=f"status_callback ({event_type}) scheduling error",
)
if _fut is None:
return
if _cleanup_progress:
def _track_status_id(fut) -> None:
try:
res = fut.result()
except Exception:
return
mid = getattr(res, "message_id", None)
if getattr(res, "success", False) and mid:
_cleanup_msg_ids.append(str(mid))
_fut.add_done_callback(_track_status_id)
def run_sync():
# The conditional re-assignment of `message` further below
@ -15024,17 +15248,16 @@ class GatewayRunner:
return
if already_streamed or not _status_adapter or not str(text or "").strip():
return
try:
asyncio.run_coroutine_threadsafe(
_status_adapter.send(
_status_chat_id,
text,
metadata=_status_thread_metadata,
),
_loop_for_step,
)
except Exception as _e:
logger.debug("interim_assistant_callback error: %s", _e)
safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
text,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="interim_assistant_callback scheduling error",
)
turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
@ -15123,17 +15346,16 @@ class GatewayRunner:
def _deliver_bg_review_message(message: str) -> None:
if not _status_adapter or not _run_still_current():
return
try:
asyncio.run_coroutine_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
),
_loop_for_step,
)
except Exception as _e:
logger.debug("background_review_callback error: %s", _e)
safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
message,
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="background_review_callback scheduling error",
)
def _release_bg_review_messages() -> None:
_bg_review_release.set()
@ -15205,23 +15427,28 @@ class GatewayRunner:
pass
send_ok = False
try:
fut = asyncio.run_coroutine_threadsafe(
_status_adapter.send_clarify(
chat_id=_status_chat_id,
question=question,
choices=list(choices) if choices else None,
clarify_id=clarify_id,
session_key=session_key or "",
metadata=_status_thread_metadata,
),
_loop_for_step,
)
result = fut.result(timeout=15)
send_ok = bool(getattr(result, "success", False))
except Exception as exc:
logger.warning("Clarify send failed: %s", exc)
fut = safe_schedule_threadsafe(
_status_adapter.send_clarify(
chat_id=_status_chat_id,
question=question,
choices=list(choices) if choices else None,
clarify_id=clarify_id,
session_key=session_key or "",
metadata=_status_thread_metadata,
),
_loop_for_step,
logger=logger,
log_message="Clarify send failed to schedule",
)
if fut is None:
send_ok = False
else:
try:
result = fut.result(timeout=15)
send_ok = bool(getattr(result, "success", False))
except Exception as exc:
logger.warning("Clarify send failed: %s", exc)
send_ok = False
if not send_ok:
# Couldn't deliver the prompt — clean up and return
@ -15341,7 +15568,7 @@ class GatewayRunner:
# false positives from MagicMock auto-attribute creation in tests.
if getattr(type(_status_adapter), "send_exec_approval", None) is not None:
try:
_approval_result = asyncio.run_coroutine_threadsafe(
_approval_fut = safe_schedule_threadsafe(
_status_adapter.send_exec_approval(
chat_id=_status_chat_id,
command=cmd,
@ -15350,7 +15577,12 @@ class GatewayRunner:
metadata=_status_thread_metadata,
),
_loop_for_step,
).result(timeout=15)
logger=logger,
log_message="send_exec_approval scheduling error",
)
if _approval_fut is None:
raise RuntimeError("send_exec_approval: loop unavailable")
_approval_result = _approval_fut.result(timeout=15)
if _approval_result.success:
return
logger.warning(
@ -15372,14 +15604,18 @@ class GatewayRunner:
f"for the session, `/approve always` to approve permanently, or `/deny` to cancel."
)
try:
asyncio.run_coroutine_threadsafe(
_approval_send_fut = safe_schedule_threadsafe(
_status_adapter.send(
_status_chat_id,
msg,
metadata=_status_thread_metadata,
),
_loop_for_step,
).result(timeout=15)
logger=logger,
log_message="Approval text-send scheduling error",
)
if _approval_send_fut is not None:
_approval_send_fut.result(timeout=15)
except Exception as _e:
logger.error("Failed to send approval request: %s", _e)
@ -16341,7 +16577,11 @@ class GatewayRunner:
except Exception:
pass
try:
asyncio.run_coroutine_threadsafe(_delete_all(), _loop_snapshot)
safe_schedule_threadsafe(
_delete_all(), _loop_snapshot,
logger=logger,
log_message="Temp bubble cleanup scheduling error",
)
except Exception:
pass
@ -16398,10 +16638,13 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
# this ticker runs in a background thread. Schedule onto
# the gateway event loop and wait briefly for completion
# so refresh failures are still logged via the except.
fut = asyncio.run_coroutine_threadsafe(
build_channel_directory(adapters), loop
fut = safe_schedule_threadsafe(
build_channel_directory(adapters), loop,
logger=logger,
log_message="Channel directory refresh scheduling error",
)
fut.result(timeout=30)
if fut is not None:
fut.result(timeout=30)
except Exception as e:
logger.debug("Channel directory refresh error: %s", e)
@ -16585,6 +16828,33 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
from hermes_logging import setup_logging
setup_logging(hermes_home=_hermes_home, mode="gateway")
# Periodic process memory usage logging (gateway only) — emits a
# grep-friendly "[MEMORY] rss=...MB ..." line every N minutes so
# slow leaks in the long-lived gateway process show up as a time
# series in agent.log / gateway.log. Ported from cline/cline#10343.
# Controlled by the logging.memory_monitor section in config.yaml.
try:
from gateway import memory_monitor as _memory_monitor
_mm_cfg = {}
try:
# config is loaded a few lines up; re-read the logging section
# here so we pick up user overrides without coupling to local
# variable names inside the start_gateway body.
from hermes_cli.config import load_config as _load_cli_config
_mm_cfg = (_load_cli_config() or {}).get("logging", {}).get("memory_monitor", {}) or {}
except Exception:
_mm_cfg = {}
if _mm_cfg.get("enabled", True):
try:
_mm_interval = float(_mm_cfg.get("interval_seconds", 300))
except (TypeError, ValueError):
_mm_interval = 300.0
_memory_monitor.start_memory_monitoring(interval_seconds=_mm_interval)
except Exception as _mm_exc:
logger.debug("Failed to start memory monitor: %s", _mm_exc)
# Optional stderr handler — level driven by -v/-q flags on the CLI.
# verbosity=None (-q/--quiet): no stderr output
# verbosity=0 (default): WARNING and above
@ -16801,6 +17071,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
except Exception:
pass
# Stop the periodic memory monitor (if it was started above).
# This also emits one final "[MEMORY] shutdown rss=..." line so the
# last RSS reading before gateway exit is always in the log.
try:
from gateway import memory_monitor as _memory_monitor
_memory_monitor.stop_memory_monitoring(timeout=2.0)
except Exception:
pass
if runner.exit_code is not None:
raise SystemExit(runner.exit_code)

View file

@ -518,6 +518,9 @@ class SessionEntry:
else None
),
"is_fresh_reset": self.is_fresh_reset,
"was_auto_reset": self.was_auto_reset,
"auto_reset_reason": self.auto_reset_reason,
"reset_had_activity": self.reset_had_activity,
}
if self.origin:
result["origin"] = self.origin.to_dict()
@ -567,6 +570,9 @@ class SessionEntry:
resume_reason=data.get("resume_reason"),
last_resume_marked_at=last_resume_marked_at,
is_fresh_reset=data.get("is_fresh_reset", False),
was_auto_reset=data.get("was_auto_reset", False),
auto_reset_reason=data.get("auto_reset_reason"),
reset_had_activity=data.get("reset_had_activity", False),
)

View file

@ -14,8 +14,8 @@ Provides subcommands for:
import os
import sys
__version__ = "0.13.0"
__release_date__ = "2026.5.7"
__version__ = "0.14.0"
__release_date__ = "2026.5.16"
def _ensure_utf8():

View file

@ -72,6 +72,7 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s
DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
DEFAULT_XAI_OAUTH_BASE_URL = "https://api.x.ai/v1"
MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
@ -89,6 +90,14 @@ STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
XAI_OAUTH_ISSUER = "https://auth.x.ai"
XAI_OAUTH_DISCOVERY_URL = f"{XAI_OAUTH_ISSUER}/.well-known/openid-configuration"
XAI_OAUTH_CLIENT_ID = "b1a00492-073a-47ea-816f-4c329264a828"
XAI_OAUTH_SCOPE = "openid profile email offline_access grok-cli:access api:access"
XAI_OAUTH_REDIRECT_HOST = "127.0.0.1"
XAI_OAUTH_REDIRECT_PORT = 56121
XAI_OAUTH_REDIRECT_PATH = "/callback"
XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@ -98,6 +107,9 @@ DEFAULT_SPOTIFY_REDIRECT_URI = "http://127.0.0.1:43827/spotify/callback"
SPOTIFY_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify"
SPOTIFY_DASHBOARD_URL = "https://developer.spotify.com/dashboard"
SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
XAI_OAUTH_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/guides/xai-grok-oauth"
OAUTH_OVER_SSH_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/guides/oauth-over-ssh"
DEFAULT_SPOTIFY_SCOPE = " ".join((
"user-modify-playback-state",
"user-read-playback-state",
@ -162,6 +174,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="oauth_external",
inference_base_url=DEFAULT_CODEX_BASE_URL,
),
"xai-oauth": ProviderConfig(
id="xai-oauth",
name="xAI Grok OAuth (SuperGrok Subscription)",
auth_type="oauth_external",
inference_base_url=DEFAULT_XAI_OAUTH_BASE_URL,
),
"qwen-oauth": ProviderConfig(
id="qwen-oauth",
name="Qwen OAuth",
@ -1364,6 +1382,8 @@ def resolve_provider(
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"xai-oauth": "xai-oauth", "x-ai-oauth": "xai-oauth",
"grok-oauth": "xai-oauth", "xai-grok-oauth": "xai-oauth",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"step": "stepfun", "stepfun-coding-plan": "stepfun",
@ -1907,6 +1927,16 @@ def _spotify_code_challenge(code_verifier: str) -> str:
return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=")
def _oauth_pkce_code_verifier(length: int = 64) -> str:
raw = base64.urlsafe_b64encode(os.urandom(length)).decode("ascii")
return raw.rstrip("=")[:128]
def _oauth_pkce_code_challenge(code_verifier: str) -> str:
digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=")
def _spotify_build_authorize_url(
*,
client_id: str,
@ -2029,6 +2059,158 @@ def _spotify_wait_for_callback(
)
def _xai_validate_loopback_redirect_uri(redirect_uri: str) -> tuple[str, int, str]:
parsed = urlparse(redirect_uri)
if parsed.scheme != "http":
raise AuthError(
"xAI OAuth redirect_uri must use http://127.0.0.1.",
provider="xai-oauth",
code="xai_redirect_invalid",
)
host = parsed.hostname or ""
if host != XAI_OAUTH_REDIRECT_HOST:
raise AuthError(
"xAI OAuth redirect_uri must point to 127.0.0.1.",
provider="xai-oauth",
code="xai_redirect_invalid",
)
if not parsed.port:
raise AuthError(
"xAI OAuth redirect_uri must include an explicit localhost port.",
provider="xai-oauth",
code="xai_redirect_invalid",
)
return host, parsed.port, parsed.path or "/"
def _xai_callback_cors_origin(origin: Optional[str]) -> str:
# CORS allowlist for the loopback callback. Only xAI's own auth origins
# are accepted; the redirect_uri itself is bound to 127.0.0.1 and gated by
# PKCE+state, so additional dev/3p origins are not needed here.
allowed = {
"https://accounts.x.ai",
"https://auth.x.ai",
}
return origin if origin in allowed else ""
def _make_xai_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequestHandler], dict[str, Any]]:
result: dict[str, Any] = {
"code": None,
"state": None,
"error": None,
"error_description": None,
}
class _XAICallbackHandler(BaseHTTPRequestHandler):
def _maybe_write_cors_headers(self) -> None:
origin = self.headers.get("Origin")
allow_origin = _xai_callback_cors_origin(origin)
if allow_origin:
self.send_header("Access-Control-Allow-Origin", allow_origin)
self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.send_header("Access-Control-Allow-Private-Network", "true")
self.send_header("Vary", "Origin")
def do_OPTIONS(self) -> None: # noqa: N802
self.send_response(204)
self._maybe_write_cors_headers()
self.end_headers()
def do_GET(self) -> None: # noqa: N802
parsed = urlparse(self.path)
if parsed.path != expected_path:
self.send_response(404)
self.end_headers()
self.wfile.write(b"Not found.")
return
params = parse_qs(parsed.query)
result["code"] = params.get("code", [None])[0]
result["state"] = params.get("state", [None])[0]
result["error"] = params.get("error", [None])[0]
result["error_description"] = params.get("error_description", [None])[0]
self.send_response(200)
self._maybe_write_cors_headers()
self.send_header("Content-Type", "text/html; charset=utf-8")
self.end_headers()
if result["error"]:
body = "<html><body><h1>xAI authorization failed.</h1>You can close this tab.</body></html>"
else:
body = "<html><body><h1>xAI authorization received.</h1>You can close this tab.</body></html>"
self.wfile.write(body.encode("utf-8"))
def log_message(self, format: str, *args: Any) -> None: # noqa: A003
return
return _XAICallbackHandler, result
def _xai_start_callback_server(
preferred_port: int = XAI_OAUTH_REDIRECT_PORT,
) -> tuple[HTTPServer, threading.Thread, dict[str, Any], str]:
host = XAI_OAUTH_REDIRECT_HOST
expected_path = XAI_OAUTH_REDIRECT_PATH
handler_cls, result = _make_xai_callback_handler(expected_path)
class _ReuseHTTPServer(HTTPServer):
allow_reuse_address = True
ports_to_try = [preferred_port]
if preferred_port != 0:
ports_to_try.append(0)
server = None
last_error: Optional[OSError] = None
for port in ports_to_try:
try:
server = _ReuseHTTPServer((host, port), handler_cls)
break
except OSError as exc:
last_error = exc
if server is None:
raise AuthError(
f"Could not bind xAI callback server on {host}:{preferred_port}: {last_error}",
provider="xai-oauth",
code="xai_callback_bind_failed",
) from last_error
actual_port = int(server.server_address[1])
redirect_uri = f"http://{host}:{actual_port}{expected_path}"
thread = threading.Thread(
target=server.serve_forever,
kwargs={"poll_interval": 0.1},
daemon=True,
)
thread.start()
return server, thread, result, redirect_uri
def _xai_wait_for_callback(
server: HTTPServer,
thread: threading.Thread,
result: dict[str, Any],
*,
timeout_seconds: float = 180.0,
) -> dict[str, Any]:
deadline = time.monotonic() + max(5.0, timeout_seconds)
try:
while time.monotonic() < deadline:
if result["code"] or result["error"]:
return result
time.sleep(0.1)
finally:
server.shutdown()
server.server_close()
thread.join(timeout=1.0)
raise AuthError(
"xAI authorization timed out waiting for the local callback.",
provider="xai-oauth",
code="xai_callback_timeout",
)
def _spotify_token_payload_to_state(
token_payload: Dict[str, Any],
*,
@ -2349,6 +2531,8 @@ def login_spotify_command(args) -> None:
print(f"Full setup guide: {SPOTIFY_DOCS_URL}")
print()
_print_loopback_ssh_hint(redirect_uri, docs_url=SPOTIFY_DOCS_URL)
if open_browser and not _is_remote_session():
try:
opened = webbrowser.open(authorize_url)
@ -2405,6 +2589,45 @@ def _is_remote_session() -> bool:
return bool(os.getenv("SSH_CLIENT") or os.getenv("SSH_TTY"))
def _print_loopback_ssh_hint(redirect_uri: str, *, docs_url: str | None = None) -> None:
"""Print an SSH tunnel hint when running a loopback-redirect OAuth flow on a
remote host. The auth server (xAI, Spotify, ...) will redirect the user's
browser to ``127.0.0.1:<port>/callback``. If the browser is on a different
machine than the loopback listener (the usual SSH case), the redirect can't
reach the listener without a local port forward.
The hint is best-effort: silent if we don't think we're remote, or if we
can't parse a host/port out of the redirect URI.
Pass ``docs_url`` for a provider-specific guide (e.g. the xAI Grok OAuth
page); the generic OAuth-over-SSH guide is always shown after it.
"""
if not _is_remote_session():
return
try:
parsed = urlparse(redirect_uri)
except Exception:
return
host = parsed.hostname or ""
port = parsed.port
if host not in ("127.0.0.1", "::1", "localhost") or not port:
return
print()
print("Remote session detected. Your browser will redirect to")
print(f" {redirect_uri}")
print("which the loopback listener on THIS machine is waiting on. If your")
print("browser is on a different machine, forward the port first from your")
print("local machine in a separate terminal:")
print()
print(f" ssh -N -L {port}:127.0.0.1:{port} <user>@<this-host>")
print()
print("Then open the authorize URL above in your local browser.")
if docs_url:
print(f"Provider docs: {docs_url}")
print(f"SSH/jump-box guide: {OAUTH_OVER_SSH_DOCS_URL}")
print()
# =============================================================================
# OpenAI Codex auth — tokens stored in ~/.hermes/auth.json (not ~/.codex/)
#
@ -2680,6 +2903,348 @@ def resolve_codex_runtime_credentials(
}
# =============================================================================
# xAI Grok OAuth — tokens stored in ~/.hermes/auth.json
# =============================================================================
def _read_xai_oauth_tokens(*, _lock: bool = True) -> Dict[str, Any]:
if _lock:
with _auth_store_lock():
auth_store = _load_auth_store()
else:
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "xai-oauth")
if not state:
raise AuthError(
"No xAI OAuth credentials stored. Select xAI Grok OAuth (SuperGrok Subscription) in `hermes model`.",
provider="xai-oauth",
code="xai_auth_missing",
relogin_required=True,
)
tokens = state.get("tokens")
if not isinstance(tokens, dict):
raise AuthError(
"xAI OAuth state is missing tokens. Re-authenticate with `hermes model`.",
provider="xai-oauth",
code="xai_auth_invalid_shape",
relogin_required=True,
)
access_token = str(tokens.get("access_token", "") or "").strip()
refresh_token = str(tokens.get("refresh_token", "") or "").strip()
if not access_token:
raise AuthError(
"xAI OAuth state is missing access_token. Re-authenticate with `hermes model`.",
provider="xai-oauth",
code="xai_auth_missing_access_token",
relogin_required=True,
)
if not refresh_token:
raise AuthError(
"xAI OAuth state is missing refresh_token. Re-authenticate with `hermes model`.",
provider="xai-oauth",
code="xai_auth_missing_refresh_token",
relogin_required=True,
)
return {
"tokens": tokens,
"last_refresh": state.get("last_refresh"),
"discovery": state.get("discovery") or {},
"redirect_uri": state.get("redirect_uri"),
}
def _save_xai_oauth_tokens(
tokens: Dict[str, Any],
*,
discovery: Optional[Dict[str, Any]] = None,
redirect_uri: str = "",
last_refresh: Optional[str] = None,
) -> None:
if last_refresh is None:
last_refresh = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
with _auth_store_lock():
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "xai-oauth") or {}
state["tokens"] = tokens
state["last_refresh"] = last_refresh
state["auth_mode"] = "oauth_pkce"
if discovery:
state["discovery"] = discovery
if redirect_uri:
state["redirect_uri"] = redirect_uri
_save_provider_state(auth_store, "xai-oauth", state)
_save_auth_store(auth_store)
def _xai_access_token_is_expiring(access_token: str, skew_seconds: int = 0) -> bool:
if not isinstance(access_token, str) or "." not in access_token:
return False
try:
parts = access_token.split(".")
if len(parts) < 2:
return False
payload_b64 = parts[1]
payload_b64 += "=" * (-len(payload_b64) % 4)
payload = json.loads(base64.urlsafe_b64decode(payload_b64.encode("ascii")).decode("utf-8"))
exp = payload.get("exp")
if not isinstance(exp, (int, float)):
return False
return float(exp) <= (time.time() + max(0, int(skew_seconds)))
except Exception:
return False
def _xai_validate_oauth_endpoint(url: str, *, field: str) -> str:
"""Refuse any OIDC discovery endpoint that isn't HTTPS on the xAI origin.
The OIDC discovery response is a long-lived, low-frequency request whose
output is cached in ``~/.hermes/auth.json``. A single MITM during initial
login could substitute a malicious ``token_endpoint``; that URL would
then receive the refresh_token on every subsequent refresh a permanent
credential leak from a one-time MITM. Validating scheme + host pins the
cached endpoint to the xAI auth origin (or a future ``*.x.ai`` subdomain
if xAI migrates) so the cache poisoning loses its persistence guarantee.
RFC 8414 §2 requires the issuer to be ``https://`` and SHOULD-keeps the
token_endpoint on the same origin; we enforce both. ``x.ai`` is the
bare apex, so we accept either exact host match or any ``.x.ai`` suffix.
"""
parsed = urlparse(url)
if parsed.scheme != "https":
raise AuthError(
f"xAI OIDC discovery returned a non-HTTPS {field}: {url!r}.",
provider="xai-oauth",
code="xai_discovery_invalid",
)
host = (parsed.hostname or "").lower()
if not host:
raise AuthError(
f"xAI OIDC discovery {field} is missing a hostname: {url!r}.",
provider="xai-oauth",
code="xai_discovery_invalid",
)
if host != "x.ai" and not host.endswith(".x.ai"):
raise AuthError(
f"xAI OIDC discovery {field} host {host!r} is not on the xAI origin "
f"(expected x.ai or a *.x.ai subdomain). Refusing to use a cached "
f"endpoint that may have been substituted by a MITM during initial "
f"discovery; re-authenticate with `hermes model` to re-fetch.",
provider="xai-oauth",
code="xai_discovery_invalid",
)
return url
def _xai_oauth_discovery(timeout_seconds: float = 15.0) -> Dict[str, str]:
try:
response = httpx.get(
XAI_OAUTH_DISCOVERY_URL,
headers={"Accept": "application/json"},
timeout=timeout_seconds,
)
except Exception as exc:
raise AuthError(
f"xAI OIDC discovery failed: {exc}",
provider="xai-oauth",
code="xai_discovery_failed",
) from exc
if response.status_code != 200:
raise AuthError(
f"xAI OIDC discovery returned status {response.status_code}.",
provider="xai-oauth",
code="xai_discovery_failed",
)
try:
payload = response.json()
except Exception as exc:
raise AuthError(
f"xAI OIDC discovery returned invalid JSON: {exc}",
provider="xai-oauth",
code="xai_discovery_invalid_json",
) from exc
if not isinstance(payload, dict):
raise AuthError(
"xAI OIDC discovery response was not a JSON object.",
provider="xai-oauth",
code="xai_discovery_incomplete",
)
authorization_endpoint = str(payload.get("authorization_endpoint", "") or "").strip()
token_endpoint = str(payload.get("token_endpoint", "") or "").strip()
if not authorization_endpoint or not token_endpoint:
raise AuthError(
"xAI OIDC discovery response was missing required endpoints.",
provider="xai-oauth",
code="xai_discovery_incomplete",
)
_xai_validate_oauth_endpoint(authorization_endpoint, field="authorization_endpoint")
_xai_validate_oauth_endpoint(token_endpoint, field="token_endpoint")
return {
"authorization_endpoint": authorization_endpoint,
"token_endpoint": token_endpoint,
}
def refresh_xai_oauth_pure(
access_token: str,
refresh_token: str,
*,
token_endpoint: str = "",
timeout_seconds: float = 20.0,
) -> Dict[str, Any]:
del access_token
if not isinstance(refresh_token, str) or not refresh_token.strip():
raise AuthError(
"xAI OAuth is missing refresh_token. Re-authenticate with `hermes model`.",
provider="xai-oauth",
code="xai_auth_missing_refresh_token",
relogin_required=True,
)
endpoint = token_endpoint.strip() or _xai_oauth_discovery(timeout_seconds)["token_endpoint"]
# Re-validate cached endpoints on the refresh hot path: an auth.json
# written by an older Hermes (or hand-edited) may carry a non-xAI
# token_endpoint that would receive every future refresh_token in
# plaintext if we trusted it blindly. Cheap suffix check; fast-fail
# with a clear error so the user can re-run `hermes model` to refetch.
_xai_validate_oauth_endpoint(endpoint, field="token_endpoint")
timeout = httpx.Timeout(max(5.0, float(timeout_seconds)))
with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}) as client:
response = client.post(
endpoint,
headers={"Content-Type": "application/x-www-form-urlencoded"},
data={
"grant_type": "refresh_token",
"client_id": XAI_OAUTH_CLIENT_ID,
"refresh_token": refresh_token,
},
)
if response.status_code != 200:
detail = response.text.strip()
raise AuthError(
"xAI token refresh failed."
+ (f" Response: {detail}" if detail else ""),
provider="xai-oauth",
code="xai_refresh_failed",
relogin_required=(response.status_code in {400, 401, 403}),
)
try:
payload = response.json()
except Exception as exc:
raise AuthError(
f"xAI token refresh returned invalid JSON: {exc}",
provider="xai-oauth",
code="xai_refresh_invalid_json",
) from exc
if not isinstance(payload, dict):
raise AuthError(
"xAI token refresh response was not a JSON object.",
provider="xai-oauth",
code="xai_refresh_invalid_response",
relogin_required=True,
)
refreshed_access = str(payload.get("access_token", "") or "").strip()
if not refreshed_access:
raise AuthError(
"xAI token refresh response was missing access_token.",
provider="xai-oauth",
code="xai_refresh_missing_access_token",
relogin_required=True,
)
updated = {
"access_token": refreshed_access,
"refresh_token": str(payload.get("refresh_token") or refresh_token).strip(),
"id_token": str(payload.get("id_token") or "").strip(),
"expires_in": payload.get("expires_in"),
"token_type": str(payload.get("token_type") or "Bearer").strip() or "Bearer",
"last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
}
return updated
def _refresh_xai_oauth_tokens(
tokens: Dict[str, Any],
*,
token_endpoint: str,
redirect_uri: str = "",
timeout_seconds: float,
) -> Dict[str, Any]:
refreshed = refresh_xai_oauth_pure(
str(tokens.get("access_token", "") or ""),
str(tokens.get("refresh_token", "") or ""),
token_endpoint=token_endpoint,
timeout_seconds=timeout_seconds,
)
updated_tokens = dict(tokens)
updated_tokens["access_token"] = refreshed["access_token"]
updated_tokens["refresh_token"] = refreshed["refresh_token"]
if refreshed.get("id_token"):
updated_tokens["id_token"] = refreshed["id_token"]
if refreshed.get("expires_in") is not None:
updated_tokens["expires_in"] = refreshed["expires_in"]
if refreshed.get("token_type"):
updated_tokens["token_type"] = refreshed["token_type"]
_save_xai_oauth_tokens(
updated_tokens,
discovery={"token_endpoint": token_endpoint},
redirect_uri=redirect_uri,
last_refresh=refreshed["last_refresh"],
)
return updated_tokens
def resolve_xai_oauth_runtime_credentials(
*,
force_refresh: bool = False,
refresh_if_expiring: bool = True,
refresh_skew_seconds: int = XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
) -> Dict[str, Any]:
data = _read_xai_oauth_tokens()
tokens = dict(data["tokens"])
access_token = str(tokens.get("access_token", "") or "").strip()
refresh_timeout_seconds = float(os.getenv("HERMES_XAI_REFRESH_TIMEOUT_SECONDS", "20"))
discovery = dict(data.get("discovery") or {})
token_endpoint = str(discovery.get("token_endpoint", "") or "").strip()
redirect_uri = str(data.get("redirect_uri", "") or "").strip()
should_refresh = bool(force_refresh)
if (not should_refresh) and refresh_if_expiring:
should_refresh = _xai_access_token_is_expiring(access_token, refresh_skew_seconds)
if should_refresh:
with _auth_store_lock(timeout_seconds=max(float(AUTH_LOCK_TIMEOUT_SECONDS), refresh_timeout_seconds + 5.0)):
data = _read_xai_oauth_tokens(_lock=False)
tokens = dict(data["tokens"])
access_token = str(tokens.get("access_token", "") or "").strip()
discovery = dict(data.get("discovery") or {})
token_endpoint = str(discovery.get("token_endpoint", "") or "").strip()
redirect_uri = str(data.get("redirect_uri", "") or "").strip()
should_refresh = bool(force_refresh)
if (not should_refresh) and refresh_if_expiring:
should_refresh = _xai_access_token_is_expiring(access_token, refresh_skew_seconds)
if should_refresh:
if not token_endpoint:
token_endpoint = _xai_oauth_discovery(refresh_timeout_seconds)["token_endpoint"]
tokens = _refresh_xai_oauth_tokens(
tokens,
token_endpoint=token_endpoint,
redirect_uri=redirect_uri,
timeout_seconds=refresh_timeout_seconds,
)
access_token = str(tokens.get("access_token", "") or "").strip()
base_url = (
os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
or DEFAULT_XAI_OAUTH_BASE_URL
)
return {
"provider": "xai-oauth",
"base_url": base_url,
"api_key": access_token,
"source": "hermes-auth-store",
"last_refresh": data.get("last_refresh"),
"auth_mode": "oauth_pkce",
}
# =============================================================================
# TLS verification helper
# =============================================================================
@ -4030,6 +4595,48 @@ def get_codex_auth_status() -> Dict[str, Any]:
}
def get_xai_oauth_auth_status() -> Dict[str, Any]:
try:
from agent.credential_pool import load_pool
pool = load_pool("xai-oauth")
if pool and pool.has_credentials():
entry = pool.select()
if entry is not None:
api_key = (
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
)
if api_key and not _xai_access_token_is_expiring(api_key, 0):
return {
"logged_in": True,
"auth_store": str(_auth_file_path()),
"last_refresh": getattr(entry, "last_refresh", None),
"auth_mode": "oauth_pkce",
"source": f"pool:{getattr(entry, 'label', 'unknown')}",
"api_key": api_key,
}
except Exception:
pass
try:
creds = resolve_xai_oauth_runtime_credentials()
return {
"logged_in": True,
"auth_store": str(_auth_file_path()),
"last_refresh": creds.get("last_refresh"),
"auth_mode": creds.get("auth_mode"),
"source": creds.get("source"),
"api_key": creds.get("api_key"),
}
except AuthError as exc:
return {
"logged_in": False,
"auth_store": str(_auth_file_path()),
"error": str(exc),
}
def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
"""Status snapshot for API-key providers (z.ai, Kimi, MiniMax)."""
pconfig = PROVIDER_REGISTRY.get(provider_id)
@ -4100,6 +4707,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_nous_auth_status()
if target == "openai-codex":
return get_codex_auth_status()
if target == "xai-oauth":
return get_xai_oauth_auth_status()
if target == "qwen-oauth":
return get_qwen_auth_status()
if target == "google-gemini-cli":
@ -4320,7 +4929,7 @@ def _logout_default_provider_from_config() -> Optional[str]:
"No provider is currently logged in" and never reset model.provider.
"""
provider = _get_config_provider()
if provider in {"nous", "openai-codex"}:
if provider in {"nous", "openai-codex", "xai-oauth"}:
return provider
return None
@ -4619,6 +5228,247 @@ def _login_openai_codex(
print(f" Config updated: {config_path} (model.provider=openai-codex)")
def _login_xai_oauth(
args,
pconfig: ProviderConfig,
*,
force_new_login: bool = False,
) -> None:
del pconfig
if not force_new_login:
try:
existing = resolve_xai_oauth_runtime_credentials()
api_key = existing.get("api_key", "")
if isinstance(api_key, str) and api_key and not _xai_access_token_is_expiring(api_key, 60):
print("Existing xAI OAuth credentials found in Hermes auth store.")
try:
reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
reuse = "y"
if reuse in ("", "y", "yes"):
config_path = _update_config_for_provider(
"xai-oauth",
existing.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL),
)
print()
print("Login successful!")
print(f" Config updated: {config_path} (model.provider=xai-oauth)")
return
except AuthError:
pass
print()
print("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
print("(Hermes creates its own local OAuth session)")
print()
timeout_seconds = float(getattr(args, "timeout", None) or 20.0)
open_browser = not getattr(args, "no_browser", False)
if _is_remote_session():
open_browser = False
creds = _xai_oauth_loopback_login(timeout_seconds=timeout_seconds, open_browser=open_browser)
_save_xai_oauth_tokens(
creds["tokens"],
discovery=creds.get("discovery"),
redirect_uri=creds.get("redirect_uri", ""),
last_refresh=creds.get("last_refresh"),
)
config_path = _update_config_for_provider("xai-oauth", creds.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL))
print()
print("Login successful!")
from hermes_constants import display_hermes_home as _dhh
print(f" Auth state: {_dhh()}/auth.json")
print(f" Config updated: {config_path} (model.provider=xai-oauth)")
def _xai_oauth_build_authorize_url(
*,
authorization_endpoint: str,
redirect_uri: str,
code_challenge: str,
state: str,
nonce: str,
) -> str:
# `plan=generic` opts the consent screen into xAI's generic OAuth plan
# tier instead of falling back to the per-account default. Without it,
# accounts.x.ai rejects loopback OAuth from non-allowlisted clients.
# `referrer=hermes-agent` lets xAI attribute Hermes-originated logins
# in their OAuth server logs (we still impersonate the upstream Grok-CLI
# client_id; this is best-effort attribution until xAI mints us our own).
authorize_params = {
"response_type": "code",
"client_id": XAI_OAUTH_CLIENT_ID,
"redirect_uri": redirect_uri,
"scope": XAI_OAUTH_SCOPE,
"code_challenge": code_challenge,
"code_challenge_method": "S256",
"state": state,
"nonce": nonce,
"plan": "generic",
"referrer": "hermes-agent",
}
return f"{authorization_endpoint}?{urlencode(authorize_params)}"
def _xai_oauth_loopback_login(
*,
timeout_seconds: float = 20.0,
open_browser: bool = True,
) -> Dict[str, Any]:
discovery = _xai_oauth_discovery(timeout_seconds)
authorization_endpoint = discovery["authorization_endpoint"]
token_endpoint = discovery["token_endpoint"]
server, thread, callback_result, redirect_uri = _xai_start_callback_server()
try:
_xai_validate_loopback_redirect_uri(redirect_uri)
code_verifier = _oauth_pkce_code_verifier()
code_challenge = _oauth_pkce_code_challenge(code_verifier)
state = uuid.uuid4().hex
nonce = uuid.uuid4().hex
authorize_url = _xai_oauth_build_authorize_url(
authorization_endpoint=authorization_endpoint,
redirect_uri=redirect_uri,
code_challenge=code_challenge,
state=state,
nonce=nonce,
)
print("Open this URL to authorize Hermes with xAI:")
print(authorize_url)
print()
print(f"Waiting for callback on {redirect_uri}")
_print_loopback_ssh_hint(redirect_uri, docs_url=XAI_OAUTH_DOCS_URL)
if open_browser and not _is_remote_session():
try:
opened = webbrowser.open(authorize_url)
except Exception:
opened = False
if opened:
print("Browser opened for xAI authorization.")
else:
print("Could not open the browser automatically; use the URL above.")
callback = _xai_wait_for_callback(
server,
thread,
callback_result,
timeout_seconds=max(30.0, timeout_seconds * 9),
)
except Exception:
try:
server.shutdown()
server.server_close()
except Exception:
pass
try:
thread.join(timeout=1.0)
except Exception:
pass
raise
if callback.get("error"):
detail = callback.get("error_description") or callback["error"]
raise AuthError(
f"xAI authorization failed: {detail}",
provider="xai-oauth",
code="xai_authorization_failed",
)
if callback.get("state") != state:
raise AuthError(
"xAI authorization failed: state mismatch.",
provider="xai-oauth",
code="xai_state_mismatch",
)
code = str(callback.get("code") or "").strip()
if not code:
raise AuthError(
"xAI authorization failed: missing authorization code.",
provider="xai-oauth",
code="xai_code_missing",
)
try:
response = httpx.post(
token_endpoint,
headers={"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"},
data={
"grant_type": "authorization_code",
"code": code,
"redirect_uri": redirect_uri,
"client_id": XAI_OAUTH_CLIENT_ID,
"code_verifier": code_verifier,
},
timeout=max(20.0, timeout_seconds),
)
except Exception as exc:
raise AuthError(
f"xAI token exchange failed: {exc}",
provider="xai-oauth",
code="xai_token_exchange_failed",
) from exc
if response.status_code != 200:
detail = response.text.strip()
raise AuthError(
"xAI token exchange failed."
+ (f" Response: {detail}" if detail else ""),
provider="xai-oauth",
code="xai_token_exchange_failed",
)
try:
payload = response.json()
except Exception as exc:
raise AuthError(
f"xAI token exchange returned invalid JSON: {exc}",
provider="xai-oauth",
code="xai_token_exchange_invalid",
) from exc
if not isinstance(payload, dict):
raise AuthError(
"xAI token exchange response was not a JSON object.",
provider="xai-oauth",
code="xai_token_exchange_invalid",
)
access_token = str(payload.get("access_token", "") or "").strip()
refresh_token = str(payload.get("refresh_token", "") or "").strip()
if not access_token:
raise AuthError(
"xAI token exchange did not return an access_token.",
provider="xai-oauth",
code="xai_token_exchange_invalid",
)
if not refresh_token:
raise AuthError(
"xAI token exchange did not return a refresh_token.",
provider="xai-oauth",
code="xai_token_exchange_invalid",
)
base_url = (
os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
or DEFAULT_XAI_OAUTH_BASE_URL
)
return {
"tokens": {
"access_token": access_token,
"refresh_token": refresh_token,
"id_token": str(payload.get("id_token", "") or "").strip(),
"expires_in": payload.get("expires_in"),
"token_type": str(payload.get("token_type") or "Bearer").strip() or "Bearer",
},
"discovery": discovery,
"redirect_uri": redirect_uri,
"base_url": base_url,
"last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
"source": "oauth-loopback",
}
def _codex_device_code_login() -> Dict[str, Any]:
"""Run the OpenAI device code login flow and return credentials dict."""
import time as _time

View file

@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL
# Providers that support OAuth login in addition to API keys.
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "xai-oauth", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}
def _get_custom_provider_names() -> list:
@ -77,6 +77,8 @@ def _normalize_provider(provider: str) -> str:
normalized = (provider or "").strip().lower()
if normalized in {"or", "open-router"}:
return "openrouter"
if normalized in {"grok-oauth", "xai-oauth", "x-ai-oauth", "xai-grok-oauth"}:
return "xai-oauth"
# Check if it matches a custom provider name
custom_key = _resolve_custom_provider_input(normalized)
if custom_key:
@ -170,7 +172,7 @@ def auth_add_command(args) -> None:
if provider.startswith(CUSTOM_POOL_PREFIX):
requested_type = AUTH_TYPE_API_KEY
else:
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY
requested_type = AUTH_TYPE_OAUTH if provider in _OAUTH_CAPABLE_PROVIDERS else AUTH_TYPE_API_KEY
pool = load_pool(provider)
@ -333,6 +335,31 @@ def auth_add_command(args) -> None:
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "xai-oauth":
creds = auth_mod._xai_oauth_loopback_login(
timeout_seconds=getattr(args, "timeout", None) or 20.0,
open_browser=not getattr(args, "no_browser", False),
)
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds["tokens"]["access_token"],
_oauth_default_label(provider, len(pool.entries()) + 1),
)
entry = PooledCredential(
provider=provider,
id=uuid.uuid4().hex[:6],
label=label,
auth_type=AUTH_TYPE_OAUTH,
priority=0,
source=f"{SOURCE_MANUAL}:xai_pkce",
access_token=creds["tokens"]["access_token"],
refresh_token=creds["tokens"].get("refresh_token"),
base_url=creds.get("base_url"),
last_refresh=creds.get("last_refresh"),
)
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "google-gemini-cli":
from agent.google_oauth import run_gemini_oauth_login_pure

View file

@ -175,6 +175,48 @@ def _check_via_local_git(repo_dir: Path) -> Optional[int]:
return None
def _version_tuple(v: str) -> tuple[int, ...]:
"""Parse '0.13.0' into (0, 13, 0) for comparison. Non-numeric segments become 0."""
parts = []
for segment in v.split("."):
try:
parts.append(int(segment))
except ValueError:
parts.append(0)
return tuple(parts)
def _fetch_pypi_latest(package: str = "hermes-agent") -> Optional[str]:
"""Fetch the latest version of a package from PyPI. Returns None on failure."""
try:
import urllib.request
url = f"https://pypi.org/pypi/{package}/json"
req = urllib.request.Request(url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read())
return data.get("info", {}).get("version")
except Exception:
return None
def check_via_pypi() -> Optional[int]:
"""Compare installed version against PyPI latest.
Returns 0 if up-to-date, 1 if behind, None on failure.
"""
latest = _fetch_pypi_latest()
if latest is None:
return None
if latest == VERSION:
return 0
try:
if _version_tuple(latest) > _version_tuple(VERSION):
return 1
return 0
except Exception:
return 1 if latest != VERSION else 0
def check_for_updates() -> Optional[int]:
"""Check whether a Hermes update is available.
@ -213,8 +255,9 @@ def check_for_updates() -> Optional[int]:
if not (repo_dir / ".git").exists():
repo_dir = hermes_home / "hermes-agent"
if not (repo_dir / ".git").exists():
return None
behind = _check_via_local_git(repo_dir)
behind = check_via_pypi()
else:
behind = _check_via_local_git(repo_dir)
try:
cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
@ -470,6 +513,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
model_short = model_short[:25] + "..."
ctx_str = f" [dim {dim}]·[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else ""
left_lines.append(f"[{accent}]{model_short}[/]{ctx_str} [dim {dim}]·[/] [dim {dim}]Nous Research[/]")
if os.getenv("HERMES_YOLO_MODE"):
left_lines.append(f"[bold red]⚠ YOLO mode[/] [dim {dim}]— all approval prompts bypassed[/]")
left_lines.append(f"[dim {dim}]{cwd}[/]")
if session_id:
left_lines.append(f"[dim {session_color}]Session: {session_id}[/]")

View file

@ -304,6 +304,103 @@ def render_codex_toml_section(
return "\n".join(out) + "\n"
def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> str:
"""Insert Hermes' managed Codex TOML block while keeping root keys root-scoped.
TOML has no syntax to return to the document root after a table header.
Therefore appending a root key like `default_permissions = ...` after a
user table such as `[features]` actually creates `features.default_permissions`,
which Codex rejects. Insert the managed block before the first table header
so its root keys remain top-level, while preserving user content verbatim.
"""
if not user_text.strip():
return managed_block
lines = user_text.splitlines(keepends=True)
first_table_idx: Optional[int] = None
for idx, line in enumerate(lines):
stripped = line.lstrip()
if stripped.startswith("["):
first_table_idx = idx
break
if first_table_idx is None:
prefix = user_text.rstrip("\n")
return f"{prefix}\n\n{managed_block}" if prefix else managed_block
prefix = "".join(lines[:first_table_idx]).rstrip("\n")
suffix = "".join(lines[first_table_idx:]).lstrip("\n")
if prefix:
return f"{prefix}\n\n{managed_block}\n{suffix}"
return f"{managed_block}\n{suffix}"
def _strip_unmanaged_plugin_tables(toml_text: str) -> str:
"""Remove ``[plugins."<name>@<marketplace>"]`` tables that live OUTSIDE the
managed block.
Codex itself writes these tables when the user runs ``codex plugins enable``
directly (i.e. before Hermes' migrate has ever touched the file). When we
later run migrate, ``_query_codex_plugins()`` reports the same plugins via
the live ``plugin/list`` RPC and we re-emit them inside the managed block.
The result without this strip is duplicate ``[plugins."X@Y"]`` table
headers codex's strict TOML parser then refuses to load the file.
We own the ``[plugins.*]`` namespace once migrate has run, so dropping any
pre-existing ``[plugins.*]`` tables is safe: ``plugin/list`` is the source
of truth for what's actually installed. The caller is expected to only
invoke this strip when ``plugin/list`` succeeded otherwise we'd lose
plugins the user installed via ``codex`` without a way to re-emit them.
Behavior:
* Lines beginning with ``[plugins.`` start a swallow region that ends at
the next non-``[plugins.`` table header or end-of-file.
* Content inside the managed block is untouched (callers should run
``_strip_existing_managed_block`` first so the managed block has
already been removed when this runs).
"""
lines = toml_text.splitlines(keepends=True)
out: list[str] = []
in_plugin_table = False
for line in lines:
stripped = line.lstrip()
# Only treat a line as a table header when it has the shape
# ``[...]`` (optionally followed by a comment). Multi-line array
# continuations like ``["nested"],`` also start with ``[`` after
# lstrip but are not headers — without this guard they would
# falsely flip ``in_plugin_table`` to False mid-table and leak
# array fragments into the output.
if _looks_like_table_header(stripped):
in_plugin_table = stripped.startswith("[plugins.")
if in_plugin_table:
continue
if in_plugin_table:
# Swallow keys/comments/blanks until the next table header.
continue
out.append(line)
return "".join(out)
def _looks_like_table_header(stripped_line: str) -> bool:
"""Return True if ``stripped_line`` is a TOML table header.
A header has the shape ``[name]`` or ``[[name]]`` (array-of-tables),
optionally followed by a comment. The closing ``]`` (or ``]]``) must
appear on the same line, and no key-assignment ``=`` can precede it.
This distinguishes real headers from multi-line array continuation
lines that also start with ``[`` after ``lstrip()``.
"""
if not stripped_line.startswith("["):
return False
# Drop trailing comment so e.g. ``[features] # note`` still matches.
head = stripped_line.split("#", 1)[0].rstrip()
if not head.endswith("]"):
return False
# ``key = [x]`` would have an ``=`` before the bracket; a header doesn't.
bracket_idx = head.index("]")
return "=" not in head[: bracket_idx + 1]
def _strip_existing_managed_block(toml_text: str) -> str:
"""Remove any prior managed section so re-runs idempotently replace it.
@ -431,6 +528,32 @@ def _query_codex_plugins(
return out, None
def _looks_like_test_tempdir(path: str) -> bool:
"""Heuristic: does ``path`` look like a pytest/transient tempdir?
pytest tempdirs live under ``pytest-of-<user>/pytest-<n>/`` (created via
``tmp_path`` / ``tmp_path_factory``) and are reaped between sessions.
macOS routes ``/tmp`` through ``/private/var/folders/<>/T`` which is
what pytest's tempdir factory uses by default. If a HERMES_HOME pointing
at one of those paths is burned into ``~/.codex/config.toml``, every
codex-routed hermes-tools call fails silently once the directory is GC'd.
We err on the side of refusing losing a (very unlikely) real
``~/.hermes`` symlink that happens to live under ``/private/var/folders``
is much less harmful than silently bricking codex's tool surface.
"""
if not path:
return False
needles = (
"pytest-of-",
"/pytest-",
"/tmp/pytest",
"/private/var/folders/", # macOS tempdir root
)
normalized = path.lower()
return any(needle in normalized for needle in needles)
def _build_hermes_tools_mcp_entry() -> dict:
"""Build the codex stdio-transport entry that launches Hermes' own
tool surface as an MCP server. Codex's subprocess will call back into
@ -443,9 +566,22 @@ def _build_hermes_tools_mcp_entry() -> dict:
import sys
env: dict[str, str] = {}
# HERMES_HOME passes through if set so the MCP subprocess sees the
# same config / auth / sessions DB as the parent CLI.
hermes_home = os.environ.get("HERMES_HOME")
# HERMES_HOME passes through IF SET so the MCP subprocess sees the same
# config / auth / sessions DB as the parent CLI. Read from os.environ
# (not get_hermes_home()) on purpose: when the env var is unset we want
# codex's subprocess to inherit whatever HERMES_HOME its launcher sets
# at runtime (systemd unit, gateway, kanban dispatcher, custom shell),
# rather than burning the migrate-time resolved default into config.toml
# — that would override the launcher's HERMES_HOME and pin the subprocess
# to the wrong profile.
#
# The pytest-tempdir guard below catches the issue #26250 Bug C scenario:
# a sibling test's monkeypatch.setenv("HERMES_HOME", tmp_path) would
# otherwise leak a transient pytest tempdir into the user's real
# ~/.codex/config.toml and silently brick codex once the tempdir is GC'd.
hermes_home = os.environ.get("HERMES_HOME") or ""
if hermes_home and _looks_like_test_tempdir(hermes_home):
hermes_home = ""
if hermes_home:
env["HERMES_HOME"] = hermes_home
# PYTHONPATH passes through so a worktree-launched hermes finds the
@ -533,10 +669,16 @@ def migrate(
# Discover installed Codex curated plugins. Best-effort — never blocks
# the migration if codex is unreachable or the RPC fails.
plugins: list[dict] = []
plugin_query_succeeded = False
if discover_plugins and not dry_run:
plugins, plugin_err = _query_codex_plugins(codex_home=codex_home)
if plugin_err:
report.plugin_query_error = plugin_err
else:
# plugin/list returned authoritatively (even if the list is empty).
# That means we own [plugins.*] for this re-render and can safely
# strip any pre-existing tables outside the managed block.
plugin_query_succeeded = True
for p in plugins:
report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}")
@ -571,14 +713,15 @@ def migrate(
report.errors.append(f"could not read {target}: {exc}")
return report
without_managed = _strip_existing_managed_block(existing)
# Ensure exactly one blank line between user content and managed block
if without_managed and not without_managed.endswith("\n"):
without_managed += "\n"
new_text = (
without_managed.rstrip("\n") + "\n\n" + managed_block
if without_managed.strip()
else managed_block
)
# Bug B: when plugin/list ran authoritatively, codex's own
# [plugins."<name>@<marketplace>"] tables outside our managed block
# would survive _strip_existing_managed_block and then collide with
# the entries we re-emit inside the managed block — producing
# duplicate-table-header parse errors on codex's next startup. Drop
# those pre-existing tables since plugin/list is the source of truth.
if plugin_query_succeeded:
without_managed = _strip_unmanaged_plugin_tables(without_managed)
new_text = _insert_managed_block_at_top_level(without_managed, managed_block)
else:
new_text = managed_block

View file

@ -198,6 +198,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[days]"),
CommandDef("platforms", "Show gateway/messaging platform status", "Info",
cli_only=True, aliases=("gateway",)),
CommandDef("platform", "Pause, resume, or list a failing gateway platform", "Info",
gateway_only=True, args_hint="<pause|resume|list> [name]"),
CommandDef("copy", "Copy the last assistant response to clipboard", "Info",
cli_only=True, args_hint="[number]"),
CommandDef("paste", "Attach clipboard image from your clipboard", "Info",
@ -209,8 +211,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),
# Exit
CommandDef("quit", "Exit the CLI", "Exit",
cli_only=True, aliases=("exit",)),
CommandDef("quit", "Exit the CLI (use --delete to also remove session history)", "Exit",
cli_only=True, aliases=("exit",), args_hint="[--delete]"),
]

View file

@ -134,8 +134,7 @@ _EXTRA_ENV_KEYS = frozenset({
"MATRIX_RECOVERY_KEY",
# Langfuse observability plugin — optional tuning keys + standard SDK vars.
# Activation is via plugins.enabled (opt-in through `hermes plugins enable
# observability/langfuse` or `hermes tools → Langfuse`); credentials gate
# the plugin at runtime.
# observability/langfuse`); credentials gate the plugin at runtime.
"HERMES_LANGFUSE_ENV",
"HERMES_LANGFUSE_RELEASE",
"HERMES_LANGFUSE_SAMPLE_RATE",
@ -199,9 +198,40 @@ def get_managed_update_command() -> Optional[str]:
return None
def detect_install_method(project_root: Optional[Path] = None) -> str:
"""Detect how Hermes was installed: 'nixos', 'homebrew', 'git', or 'pip'."""
managed = get_managed_system()
if managed:
return managed.lower().replace(" ", "-")
if project_root is None:
project_root = Path(__file__).parent.parent.resolve()
if (project_root / ".git").is_dir():
return "git"
return "pip"
def recommended_update_command_for_method(method: str) -> str:
"""Return the update command for a given install method."""
if method == "nixos":
return "sudo nixos-rebuild switch"
if method == "homebrew":
return "brew upgrade hermes-agent"
if method == "pip":
import shutil
uv = shutil.which("uv")
if uv:
return "uv pip install --upgrade hermes-agent"
return "pip install --upgrade hermes-agent"
return "hermes update"
def recommended_update_command() -> str:
"""Return the best update command for the current installation."""
return get_managed_update_command() or "hermes update"
managed_cmd = get_managed_update_command()
if managed_cmd:
return managed_cmd
method = detect_install_method()
return recommended_update_command_for_method(method)
def format_managed_message(action: str = "modify this Hermes installation") -> str:
@ -401,7 +431,10 @@ def ensure_hermes_home():
else:
home.mkdir(parents=True, exist_ok=True)
_secure_dir(home)
for subdir in ("cron", "sessions", "logs", "logs/curator", "memories"):
for subdir in (
"cron", "sessions", "logs", "logs/curator", "memories",
"pairing", "hooks", "image_cache", "audio_cache", "skills",
):
d = home / subdir
d.mkdir(parents=True, exist_ok=True)
_secure_dir(d)
@ -1112,6 +1145,10 @@ DEFAULT_CONFIG = {
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
"base_url": "", # direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
"api_mode": "", # wire protocol for delegation.base_url: "chat_completions",
# "codex_responses", or "anthropic_messages". Empty = auto-detect
# from URL (e.g. /anthropic suffix → anthropic_messages). Set this
# explicitly for non-standard endpoints the heuristic can't detect.
# When delegate_task narrows child toolsets explicitly, preserve any
# MCP toolsets the parent already has enabled. On by default so
# narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
@ -1269,6 +1306,18 @@ DEFAULT_CONFIG = {
# list_roles, member_info, search_members, fetch_messages, list_pins,
# pin_message, unpin_message, create_thread, add_role, remove_role.
"server_actions": "",
# Accept arbitrary attachment file types (not just SUPPORTED_DOCUMENT_TYPES).
# When True, any uploaded file is cached to disk with mime
# application/octet-stream and the path is surfaced to the agent so it
# can use terminal/read_file/etc. against it. Default False preserves
# the historical allowlist behaviour.
# Env override: DISCORD_ALLOW_ANY_ATTACHMENT.
"allow_any_attachment": False,
# Maximum bytes per attachment the gateway will cache. The whole file
# is held in memory while being written, so unlimited uploads carry a
# real memory cost. Default 32 MiB matches the historical hardcoded
# cap. Set to 0 for no cap. Env override: DISCORD_MAX_ATTACHMENT_BYTES.
"max_attachment_bytes": 33554432,
},
# WhatsApp platform settings (gateway mode)
@ -1439,6 +1488,15 @@ DEFAULT_CONFIG = {
"level": "INFO", # Minimum level for agent.log: DEBUG, INFO, WARNING
"max_size_mb": 5, # Max size per log file before rotation
"backup_count": 3, # Number of rotated backup files to keep
# Periodic process memory usage logging (gateway only). Emits a
# grep-friendly "[MEMORY] rss=...MB ..." line at the configured
# interval so slow leaks in the long-lived gateway are visible
# in agent.log / gateway.log as a time series. Ported from
# cline/cline#10343.
"memory_monitor": {
"enabled": True, # Flip to false to silence the periodic line
"interval_seconds": 300, # Default: every 5 minutes
},
},
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
@ -1569,6 +1627,23 @@ DEFAULT_CONFIG = {
"servers": {},
},
# X (Twitter) Search via xAI's built-in x_search Responses tool.
# The tool registers when xAI credentials are available (SuperGrok
# OAuth or XAI_API_KEY) AND the x_search toolset is enabled in
# `hermes tools`. These settings tune the backing Responses API call.
"x_search": {
# xAI model used for the Responses call. grok-4.20-reasoning is
# the recommended default; any Grok model with x_search tool
# access works.
"model": "grok-4.20-reasoning",
# Request timeout in seconds (minimum 30). x_search can take
# 60-120s for complex queries — the default is generous.
"timeout_seconds": 180,
# Number of automatic retries on 5xx / ReadTimeout / ConnectionError.
# Each retry backs off (1.5x attempt seconds, capped at 5s).
"retries": 2,
},
# Config schema version - bump this when adding new required fields
"_config_version": 23,
}
@ -2839,6 +2914,7 @@ def _normalize_custom_provider_entry(
"api_mode", "transport", "model", "default_model", "models",
"context_length", "rate_limit_delay",
"request_timeout_seconds", "stale_timeout_seconds",
"discover_models",
}
for camel, snake in _CAMEL_ALIASES.items():
if camel in entry and snake not in entry:
@ -2929,6 +3005,10 @@ def _normalize_custom_provider_entry(
if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
normalized["rate_limit_delay"] = rate_limit_delay
discover_models = entry.get("discover_models")
if isinstance(discover_models, bool):
normalized["discover_models"] = discover_models
return normalized

View file

@ -196,9 +196,15 @@ def cron_create(args):
def cron_edit(args):
from cron.jobs import get_job
from cron.jobs import AmbiguousJobReference, resolve_job_ref
job = get_job(args.job_id)
try:
job = resolve_job_ref(args.job_id)
except AmbiguousJobReference as exc:
print(color(str(exc), Colors.RED))
for m in exc.matches:
print(f" {m['id']} (name: {m.get('name')!r})")
return 1
if not job:
print(color(f"Job not found: {args.job_id}", Colors.RED))
return 1

106
hermes_cli/dep_ensure.py Normal file
View file

@ -0,0 +1,106 @@
"""Lazy dependency bootstrapper for non-Python runtime deps.
Detection and prompting live here in Python not in install.sh because:
1. shutil.which() works on every platform; install.sh needs bash.
2. Detection is instant; spawning bash for a "is node installed?" check is waste.
3. Python controls the UX (rich prompts, non-interactive fallback, TTY detection).
install.sh is still the *installation* backend because it has 1900 lines of
battle-tested OS detection and package-manager logic (apt/brew/pacman/dnf/
zypper/Termux/). Reimplementing that in Python would be huge duplication.
Deps that degrade gracefully (ripgrep grep fallback, ffmpeg skip conversion)
don't need ensure_dependency wired in — only hard-fail sites do (TUI needs node,
browser tool needs agent-browser).
"""
from __future__ import annotations
import os
import shutil
import subprocess
import sys
from pathlib import Path
_DEP_CHECKS = {
"node": lambda: shutil.which("node") is not None,
"browser": lambda: (
shutil.which("agent-browser") is not None
or _has_system_browser()
or _has_hermes_agent_browser()
),
"ripgrep": lambda: shutil.which("rg") is not None,
"ffmpeg": lambda: shutil.which("ffmpeg") is not None,
}
_DEP_DESCRIPTIONS = {
"node": "Node.js (required for browser tools and TUI)",
"browser": "Browser engine (Chromium, for web browsing tools)",
"ripgrep": "ripgrep (fast file search)",
"ffmpeg": "ffmpeg (TTS voice messages)",
}
def _has_system_browser() -> bool:
for name in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser", "chrome"):
if shutil.which(name):
return True
return False
def _has_hermes_agent_browser() -> bool:
from hermes_constants import get_hermes_home
return (get_hermes_home() / "node_modules" / ".bin" / "agent-browser").is_file()
def _find_install_script(
package_dir: Path | None = None,
repo_root: Path | None = None,
) -> Path | None:
"""Locate install.sh — bundled in wheel or in git checkout."""
if package_dir is None:
package_dir = Path(__file__).parent
if repo_root is None:
repo_root = package_dir.parent
bundled = package_dir / "scripts" / "install.sh"
if bundled.is_file():
return bundled
repo = repo_root / "scripts" / "install.sh"
if repo.is_file():
return repo
return None
def ensure_dependency(dep: str, interactive: bool = True) -> bool:
"""Ensure a non-Python dependency is available. Returns True if available."""
check = _DEP_CHECKS.get(dep)
if check and check():
return True
script = _find_install_script()
if script is None:
if interactive:
desc = _DEP_DESCRIPTIONS.get(dep, dep)
print(f" {desc} is not installed and install.sh was not found.")
print(f" Install {dep} manually and try again.")
return False
if interactive and sys.stdin.isatty():
desc = _DEP_DESCRIPTIONS.get(dep, dep)
try:
reply = input(f"{desc} is not installed. Install now? [Y/n] ").strip().lower()
except (EOFError, KeyboardInterrupt):
return False
if reply not in ("", "y", "yes"):
return False
result = subprocess.run(
["bash", str(script), "--ensure", dep],
env={**os.environ, "IS_INTERACTIVE": "false"},
)
if result.returncode != 0:
return False
if check:
return check()
return True

View file

@ -152,6 +152,30 @@ def _apply_doctor_tool_availability_overrides(available: list[str], unavailable:
return updated_available, updated_unavailable
def _has_healthy_oauth_fallback_for_apikey_provider(provider_label: str) -> bool:
"""Return True when a direct API-key probe failure is non-blocking.
Some provider families support both a direct API-key path and a separate
OAuth runtime path. When the OAuth path is already healthy, doctor should
still show a failed API-key connectivity row, but it should not promote
that direct-key problem into the final blocking summary.
"""
try:
from hermes_cli.auth import (
get_gemini_oauth_auth_status,
get_minimax_oauth_auth_status,
)
except Exception:
return False
normalized = (provider_label or "").strip().lower()
if normalized in {"google / gemini", "gemini"}:
return bool((get_gemini_oauth_auth_status() or {}).get("logged_in"))
if normalized == "minimax":
return bool((get_minimax_oauth_auth_status() or {}).get("logged_in"))
return False
def check_ok(text: str, detail: str = ""):
print(f" {color('', Colors.GREEN)} {text}" + (f" {color(detail, Colors.DIM)}" if detail else ""))
@ -656,15 +680,17 @@ def run_doctor(args):
if fallback_config.exists():
check_ok("cli-config.yaml exists (in project directory)")
else:
example_config = PROJECT_ROOT / 'cli-config.yaml.example'
if should_fix and example_config.exists():
if should_fix:
config_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(example_config), str(config_path))
check_ok(f"Created {_DHH}/config.yaml from cli-config.yaml.example")
example_config = PROJECT_ROOT / 'cli-config.yaml.example'
if example_config.exists():
shutil.copy2(str(example_config), str(config_path))
check_ok(f"Created {_DHH}/config.yaml from cli-config.yaml.example")
else:
from hermes_cli.config import DEFAULT_CONFIG, save_config
save_config(DEFAULT_CONFIG)
check_ok(f"Created {_DHH}/config.yaml from defaults")
fixed_count += 1
elif should_fix:
check_warn("config.yaml not found and no example to copy from")
manual_issues.append(f"Create {_DHH}/config.yaml manually")
else:
check_warn("config.yaml not found", "(using defaults)")
@ -1448,6 +1474,15 @@ def run_doctor(args):
}
if base_url_host_matches(base, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
# Google's Generative Language API (generativelanguage.googleapis.com)
# rejects ``Authorization: Bearer <api-key>`` with 401
# ``ACCESS_TOKEN_TYPE_UNSUPPORTED`` — that header is reserved for
# OAuth 2 access tokens, not plain API keys. Plain keys use
# ``x-goog-api-key`` (or ``?key=``). Without this, a perfectly valid
# GOOGLE_API_KEY/GEMINI_API_KEY always shows red in ``hermes doctor``.
if url and base_url_host_matches(url, "generativelanguage.googleapis.com"):
headers.pop("Authorization", None)
headers["x-goog-api-key"] = key
r = httpx.get(url, headers=headers, timeout=10)
if (
pname == "Alibaba/DashScope"
@ -1592,7 +1627,10 @@ def run_doctor(args):
print(f" {_glyph} {_label} {_detail}")
else:
print(f" {_glyph} {_label}")
for _issue in _r.issues:
_issues_to_add = list(_r.issues)
if _issues_to_add and _has_healthy_oauth_fallback_for_apikey_provider(_r.label):
_issues_to_add = []
for _issue in _issues_to_add:
issues.append(_issue)
# =========================================================================

View file

@ -5,6 +5,7 @@ Handles: hermes gateway [run|start|stop|restart|status|install|uninstall|setup]
"""
import asyncio
import logging
import os
import shutil
import signal
@ -38,6 +39,7 @@ from hermes_cli.setup import (
)
from hermes_cli.colors import Colors, color
logger = logging.getLogger(__name__)
# =============================================================================
# Process Management (for manual gateway runs)
@ -2103,15 +2105,41 @@ def _hermes_home_for_target_user(target_home_dir: str) -> str:
return str(current_hermes)
def _build_service_path_dirs(project_root: Path | None = None) -> list[str]:
"""Build PATH directory list for service units, excluding non-existent dirs."""
if project_root is None:
project_root = PROJECT_ROOT
candidates = []
venv_bin = project_root / "venv" / "bin"
if venv_bin.is_dir():
candidates.append(str(venv_bin))
elif sys.prefix != sys.base_prefix:
candidates.append(str(Path(sys.prefix) / "bin"))
node_bin = project_root / "node_modules" / ".bin"
if node_bin.is_dir():
candidates.append(str(node_bin))
hermes_home = get_hermes_home()
hermes_node = hermes_home / "node" / "bin"
if hermes_node.is_dir():
candidates.append(str(hermes_node))
hermes_nm = hermes_home / "node_modules" / ".bin"
if hermes_nm.is_dir():
candidates.append(str(hermes_nm))
return candidates
def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
python_path = get_python_path()
working_dir = str(PROJECT_ROOT)
detected_venv = _detect_venv_dir()
venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")
path_entries = [venv_bin, node_bin]
path_entries = _build_service_path_dirs()
resolved_node = shutil.which("node")
if resolved_node:
resolved_node_dir = str(Path(resolved_node).resolve().parent)
@ -2138,8 +2166,6 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
python_path = _remap_path_for_user(python_path, home_dir)
working_dir = _remap_path_for_user(working_dir, home_dir)
venv_dir = _remap_path_for_user(venv_dir, home_dir)
venv_bin = _remap_path_for_user(venv_bin, home_dir)
node_bin = _remap_path_for_user(node_bin, home_dir)
path_entries = [_remap_path_for_user(p, home_dir) for p in path_entries]
path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
path_entries.extend(_build_wsl_interop_paths(path_entries))
@ -2754,12 +2780,10 @@ def generate_launchd_plist() -> str:
# the systemd unit), then capture the user's full shell PATH so every
# user-installed tool (node, ffmpeg, …) is reachable.
detected_venv = _detect_venv_dir()
venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")
# Resolve the directory containing the node binary (e.g. Homebrew, nvm)
# so it's explicitly in PATH even if the user's shell PATH changes later.
priority_dirs = [venv_bin, node_bin]
priority_dirs = _build_service_path_dirs()
resolved_node = shutil.which("node")
if resolved_node:
resolved_node_dir = str(Path(resolved_node).resolve().parent)

View file

@ -34,6 +34,7 @@ import logging
import re
import time
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
@ -45,6 +46,16 @@ logger = logging.getLogger(__name__)
DEFAULT_MAX_TURNS = 20
DEFAULT_JUDGE_TIMEOUT = 30.0
# Judge output budget. The freeform judge returns a one-line JSON verdict, but
# reasoning models (deepseek-v4, qwq, etc.) burn tokens on hidden reasoning
# before emitting the visible JSON — and the first /goal turn's prompt is
# larger than later turns, which pushes total reply length past tight caps.
# 200 tokens (the original default) reliably truncated the JSON on reasoning
# models, leaving '{"done": true, "reason": "The agent successfully' and
# triggering the auto-pause. 4096 covers reasoning + verdict on every model
# we've live-tested; override via auxiliary.goal_judge.max_tokens for
# specifically constrained setups.
DEFAULT_JUDGE_MAX_TOKENS = 4096
# Cap how much of the last response + recent messages we send to the judge.
_JUDGE_RESPONSE_SNIPPET_CHARS = 4000
# After this many consecutive judge *parse* failures (empty output / non-JSON),
@ -100,6 +111,7 @@ JUDGE_SYSTEM_PROMPT = (
JUDGE_USER_PROMPT_TEMPLATE = (
"Goal:\n{goal}\n\n"
"Agent's most recent response:\n{response}\n\n"
"Current time: {current_time}\n\n"
"Is the goal satisfied?"
)
@ -110,6 +122,7 @@ JUDGE_USER_PROMPT_WITH_SUBGOALS_TEMPLATE = (
"Additional criteria the user added mid-loop (all must also be "
"satisfied for the goal to be DONE):\n{subgoals_block}\n\n"
"Agent's most recent response:\n{response}\n\n"
"Current time: {current_time}\n\n"
"Decision: For each numbered criterion above, find concrete "
"evidence in the agent's response that the criterion is "
"satisfied. Do not accept generic phrases like 'all requirements "
@ -282,6 +295,30 @@ def _truncate(text: str, limit: int) -> str:
_JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
def _goal_judge_max_tokens() -> int:
"""Resolve auxiliary.goal_judge.max_tokens, falling back to the default.
``load_config()`` is cached on the config file's (mtime, size), so calling
this once per judge turn is cheap. A non-positive or non-int value falls
back to the default rather than crashing the goal loop.
"""
try:
from hermes_cli.config import load_config
cfg = load_config()
value = (
(cfg.get("auxiliary") or {})
.get("goal_judge", {})
.get("max_tokens", DEFAULT_JUDGE_MAX_TOKENS)
)
value = int(value)
if value > 0:
return value
except Exception:
pass
return DEFAULT_JUDGE_MAX_TOKENS
def _parse_judge_response(raw: str) -> Tuple[bool, str, bool]:
"""Parse the judge's reply. Fail-open to ``(False, "<reason>", parse_failed)``.
@ -381,6 +418,7 @@ def judge_goal(
# Build the prompt — pick the with-subgoals variant when applicable.
clean_subgoals = [s.strip() for s in (subgoals or []) if s and s.strip()]
current_time = datetime.now(tz=timezone.utc).astimezone().strftime("%Y-%m-%d %H:%M:%S %Z")
if clean_subgoals:
subgoals_block = "\n".join(
f"- {i}. {text}" for i, text in enumerate(clean_subgoals, start=1)
@ -389,11 +427,13 @@ def judge_goal(
goal=_truncate(goal, 2000),
subgoals_block=_truncate(subgoals_block, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
current_time=current_time,
)
else:
prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
goal=_truncate(goal, 2000),
response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
current_time=current_time,
)
try:
@ -404,7 +444,7 @@ def judge_goal(
{"role": "user", "content": prompt},
],
temperature=0,
max_tokens=200,
max_tokens=_goal_judge_max_tokens(),
timeout=timeout,
extra_body=get_auxiliary_extra_body() or None,
)

View file

@ -1403,7 +1403,7 @@ def _cmd_diagnostics(args: argparse.Namespace) -> int:
sev = getattr(args, "severity", None)
if sev:
for tid in list(diags_by_task.keys()):
kept = [d for d in diags_by_task[tid] if d.severity == sev]
kept = [d for d in diags_by_task[tid] if kd.SEVERITY_ORDER.index(d.severity) >= kd.SEVERITY_ORDER.index(sev)]
if kept:
diags_by_task[tid] = kept
else:

View file

@ -93,6 +93,7 @@ from toolsets import get_toolset_names
VALID_STATUSES = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"}
KNOWN_TOOLSET_NAMES = frozenset(name.casefold() for name in get_toolset_names())
_IS_WINDOWS = sys.platform == "win32"
# A running task's claim is valid for 15 minutes; after that the next
# dispatcher tick reclaims it. Workers that outlive this window should call
@ -4024,6 +4025,7 @@ def _default_spawn(
stderr=subprocess.STDOUT,
env=env,
start_new_session=True,
creationflags=subprocess.CREATE_NO_WINDOW if _IS_WINDOWS else 0,
)
except FileNotFoundError:
log_f.close()

View file

@ -1024,6 +1024,14 @@ def _ensure_tui_node() -> None:
os.environ["PATH"] = os.pathsep.join(parts)
def _find_bundled_tui(hermes_cli_dir: Path | None = None) -> Path | None:
"""Find a pre-built TUI entry.js bundled in the wheel."""
if hermes_cli_dir is None:
hermes_cli_dir = Path(__file__).parent
bundled = hermes_cli_dir / "tui_dist" / "entry.js"
return bundled if bundled.is_file() else None
def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
"""TUI: --dev → tsx src; else node dist (HERMES_TUI_DIR prebuilt or esbuild)."""
_ensure_tui_node()
@ -1034,6 +1042,13 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK):
return env_node
path = shutil.which(bin)
if not path and bin == "node":
try:
from hermes_cli.dep_ensure import ensure_dependency
if ensure_dependency("node"):
path = shutil.which("node")
except Exception:
pass
if not path:
print(f"{bin} not found — install Node.js to use the TUI.")
sys.exit(1)
@ -1058,8 +1073,14 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
node = _node_bin("node")
return [node, str(p / "dist" / "entry.js")], p
# 1b. Bundled in wheel (pip install)
bundled = _find_bundled_tui()
if bundled is not None:
node = _node_bin("node")
return [node, str(bundled)], bundled.parent
# 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
# --dev flow: npm install if needed, then tsx src/entry.tsx (no build).
# --dev flow: npm install if needed, then tsx src/entry.tsx.
if _tui_need_npm_install(tui_dir):
npm = _node_bin("npm")
if not os.environ.get("HERMES_QUIET"):
@ -1081,10 +1102,30 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
sys.exit(1)
if tui_dev:
# Keep the local @hermes/ink package exports in sync with source.
# --dev runs src/entry.tsx directly, but @hermes/ink resolves through
# packages/hermes-ink/dist/entry-exports.js. If that dist bundle is
# stale after a pull, newer hooks/components can exist in src while
# being missing at runtime (e.g. useCursorAdvance). Prebuild it here.
npm = _node_bin("npm")
ink_dir = tui_dir / "packages" / "hermes-ink"
result = subprocess.run(
[npm, "run", "build"],
cwd=str(ink_dir),
capture_output=True,
text=True,
)
if result.returncode != 0:
combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
preview = "\n".join(combined.splitlines()[-30:])
print("TUI dev prebuild failed.")
if preview:
print(preview)
sys.exit(1)
tsx = tui_dir / "node_modules" / ".bin" / "tsx"
if tsx.exists():
return [str(tsx), "src/entry.tsx"], tui_dir
npm = _node_bin("npm")
return [npm, "start"], tui_dir
# Always rebuild — esbuild is fast and this avoids staleness-edge-case bugs.
@ -1522,14 +1563,18 @@ def cmd_whatsapp(args):
)
print(f"\n✓ Mode: {mode_label}")
# ── Step 2: Enable WhatsApp ──────────────────────────────────────────
# ── Step 2: Mode is selected, will enable WhatsApp only after pairing ──
# We intentionally don't write WHATSAPP_ENABLED=true here. If the user
# aborts the wizard later (Ctrl+C, failed npm install, missed QR scan),
# we'd otherwise leave .env claiming WhatsApp is ready when the bridge
# has no creds.json. Every subsequent `hermes gateway` then paid a 30s
# bridge-bootstrap timeout and queued WhatsApp for indefinite retries.
# Now: aborted setup leaves WHATSAPP_ENABLED unset → gateway skips it.
# Re-runs that already have WHATSAPP_ENABLED=true (from a prior
# successful pairing) stay enabled — we just don't write it pre-emptively.
print()
current = get_env_value("WHATSAPP_ENABLED")
if current and current.lower() == "true":
if (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true":
print("✓ WhatsApp is already enabled")
else:
save_env_value("WHATSAPP_ENABLED", "true")
print("✓ WhatsApp enabled")
# ── Step 3: Allowed users ────────────────────────────────────────────
current_users = get_env_value("WHATSAPP_ALLOWED_USERS") or ""
@ -1619,6 +1664,12 @@ def cmd_whatsapp(args):
session_dir.mkdir(parents=True, exist_ok=True)
print(" ✓ Session cleared")
else:
# Existing pairing — ensure WHATSAPP_ENABLED reflects that.
# (Older installs may have lost the env var; covers re-runs
# where the user picked "no, keep my session" but the var
# was never set or got removed.)
if (get_env_value("WHATSAPP_ENABLED") or "").lower() != "true":
save_env_value("WHATSAPP_ENABLED", "true")
print("\n✓ WhatsApp is configured and paired!")
print(" Start the gateway with: hermes gateway")
return
@ -1647,6 +1698,11 @@ def cmd_whatsapp(args):
# ── Step 7: Post-pairing ─────────────────────────────────────────────
print()
if (session_dir / "creds.json").exists():
# Only enable WhatsApp now that pairing actually succeeded. If the
# user Ctrl+C'd at any earlier step, WHATSAPP_ENABLED stays unset
# and `hermes gateway` skips it cleanly instead of paying a 30s
# bridge timeout + queueing the platform for indefinite retries.
save_env_value("WHATSAPP_ENABLED", "true")
print("✓ WhatsApp paired successfully!")
print()
if wa_mode == "bot":
@ -1677,6 +1733,24 @@ def cmd_setup(args):
run_setup_wizard(args)
def cmd_postinstall(args):
"""One-shot bootstrap for pip users: install non-Python deps + run setup."""
from hermes_cli.dep_ensure import ensure_dependency
print("⚕ Hermes post-install bootstrap")
print()
for dep in ("node", "browser", "ripgrep", "ffmpeg"):
ensure_dependency(dep)
if not _has_any_provider_configured():
print()
cmd_setup(args)
else:
print()
print("✓ Post-install complete.")
def cmd_model(args):
"""Select default model — starts with provider selection, then model picker."""
_require_tty("model")
@ -1932,6 +2006,8 @@ def select_provider_and_model(args=None):
_model_flow_nous(config, current_model, args=args)
elif selected_provider == "openai-codex":
_model_flow_openai_codex(config, current_model)
elif selected_provider == "xai-oauth":
_model_flow_xai_oauth(config, current_model)
elif selected_provider == "qwen-oauth":
_model_flow_qwen_oauth(config, current_model)
elif selected_provider == "minimax-oauth":
@ -2813,6 +2889,87 @@ def _model_flow_openai_codex(config, current_model=""):
print("No change.")
def _model_flow_xai_oauth(_config, current_model=""):
"""xAI Grok OAuth (SuperGrok Subscription) provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
get_xai_oauth_auth_status,
_prompt_model_selection,
_save_model_choice,
_update_config_for_provider,
resolve_xai_oauth_runtime_credentials,
_login_xai_oauth,
DEFAULT_XAI_OAUTH_BASE_URL,
PROVIDER_REGISTRY,
)
from hermes_cli.models import _PROVIDER_MODELS
status = get_xai_oauth_auth_status()
if status.get("logged_in"):
print(" xAI Grok OAuth (SuperGrok Subscription) credentials: ✓")
print()
print(" 1. Use existing credentials")
print(" 2. Reauthenticate (new OAuth login)")
print(" 3. Cancel")
print()
try:
choice = input(" Choice [1/2/3]: ").strip()
except (KeyboardInterrupt, EOFError):
choice = "1"
if choice == "2":
print("Starting a fresh xAI OAuth login...")
print()
try:
mock_args = argparse.Namespace()
_login_xai_oauth(
mock_args,
PROVIDER_REGISTRY["xai-oauth"],
force_new_login=True,
)
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
elif choice == "3":
return
else:
print("Not logged into xAI Grok OAuth (SuperGrok Subscription). Starting login...")
print()
try:
mock_args = argparse.Namespace()
_login_xai_oauth(mock_args, PROVIDER_REGISTRY["xai-oauth"])
except SystemExit:
print("Login cancelled or failed.")
return
except Exception as exc:
print(f"Login failed: {exc}")
return
# Resolve a usable base URL. ``resolve_xai_oauth_runtime_credentials``
# only reads from the auth.json singleton — but credentials may legitimately
# live only in the pool (e.g. after ``hermes auth add xai-oauth``). Fall
# back to the default base URL in that case so the model picker still
# completes successfully instead of bailing out with
# ``Could not resolve xAI OAuth credentials``.
base_url = DEFAULT_XAI_OAUTH_BASE_URL
try:
creds = resolve_xai_oauth_runtime_credentials()
base_url = (creds.get("base_url") or "").strip().rstrip("/") or base_url
except Exception:
pass
models = list(_PROVIDER_MODELS.get("xai-oauth") or _PROVIDER_MODELS.get("xai") or [])
selected = _prompt_model_selection(models, current_model=current_model or (models[0] if models else "grok-4.3"))
if selected:
_save_model_choice(selected)
_update_config_for_provider("xai-oauth", base_url)
print(f"Default model set to: {selected} (via xAI Grok OAuth — SuperGrok Subscription)")
else:
print("No change.")
_DEFAULT_QWEN_PORTAL_MODELS = [
"qwen3-coder-plus",
"qwen3-coder",
@ -7089,17 +7246,24 @@ def _update_node_dependencies() -> None:
if not (path / "package.json").exists():
continue
# Stream npm output (no `--silent`, no `capture_output`) so any
# optional dependency postinstall scripts (e.g. `agent-browser`'s
# Chromium fetch on first install) print progress instead of
# appearing to hang silently for minutes (#18840). The
# `_UpdateOutputStream` wrapper installed by the updater mirrors
# streamed output to ``~/.hermes/logs/update.log`` so nothing is lost.
result = _run_npm_install_deterministic(
npm,
path,
extra_args=("--silent", "--no-fund", "--no-audit", "--progress=false"),
extra_args=("--no-fund", "--no-audit", "--progress=false"),
capture_output=False,
)
if result.returncode == 0:
print(f"{label}")
continue
print(f" ⚠ npm install failed in {label}")
stderr = (result.stderr or "").strip()
stderr = (result.stderr or "").strip() if result.stderr else ""
if stderr:
print(f" {stderr.splitlines()[-1]}")
@ -7282,6 +7446,22 @@ def _finalize_update_output(state):
def _cmd_update_check():
"""Implement ``hermes update --check``: fetch and report without installing."""
from hermes_cli.config import detect_install_method
method = detect_install_method(PROJECT_ROOT)
if method == "pip":
from hermes_cli.config import recommended_update_command
from hermes_cli.banner import check_via_pypi
result = check_via_pypi()
if result is None:
print("✗ Could not reach PyPI to check for updates.")
sys.exit(1)
elif result == 0:
print("✓ Already up to date.")
else:
print("⚕ Update available on PyPI.")
print(f" Run '{recommended_update_command()}' to install.")
return
git_dir = PROJECT_ROOT / ".git"
if not git_dir.exists():
print("✗ Not a git repository — cannot check for updates.")
@ -7559,6 +7739,28 @@ def cmd_update(args):
_finalize_update_output(_update_io_state)
def _cmd_update_pip(args):
"""Update Hermes via pip (for PyPI installs)."""
from hermes_cli import __version__
print(f"→ Current version: {__version__}")
print("→ Checking PyPI for updates...")
uv = shutil.which("uv")
if uv:
cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
else:
cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]
print(f"→ Running: {' '.join(cmd)}")
result = subprocess.run(cmd)
if result.returncode != 0:
print("✗ Update failed")
sys.exit(1)
print("✓ Update complete! Restart hermes to use the new version.")
def _cmd_update_impl(args, gateway_mode: bool):
"""Body of ``cmd_update`` — kept separate so the wrapper can always
restore stdio even on ``sys.exit``."""
@ -7586,6 +7788,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
if sys.platform == "win32":
use_zip_update = True
else:
from hermes_cli.config import detect_install_method
method = detect_install_method(PROJECT_ROOT)
if method == "pip":
_cmd_update_pip(args)
return
print("✗ Not a git repository. Please reinstall:")
print(
" curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash"
@ -9400,7 +9607,7 @@ def _build_provider_choices() -> list[str]:
except Exception:
# Fallback: static list guarantees the CLI always works
return [
"auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
"auto", "openrouter", "nous", "openai-codex", "xai-oauth", "copilot-acp", "copilot",
"anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
"ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
"stepfun", "minimax", "minimax-cn", "kilocode", "novita", "xiaomi", "arcee",
@ -9424,7 +9631,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
"config", "cron", "curator", "dashboard", "debug", "doctor",
"dump", "fallback", "gateway", "hooks", "import", "insights",
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
"model", "pairing", "plugins", "profile", "proxy", "sessions", "setup",
"model", "pairing", "plugins", "postinstall", "profile", "proxy", "sessions", "setup",
"skills", "slack", "status", "tools", "uninstall", "update",
"version", "webhook", "whatsapp", "chat",
# Help-ish invocations — plugin commands not being listed in
@ -9863,6 +10070,17 @@ def main():
)
setup_parser.set_defaults(func=cmd_setup)
# =========================================================================
# postinstall command
# =========================================================================
postinstall_parser = subparsers.add_parser(
"postinstall",
help="Bootstrap non-Python deps for pip installs (node, browser, ripgrep, ffmpeg)",
description="One-shot post-install for pip users. Installs system "
"dependencies that pip cannot provide, then runs setup if needed.",
)
postinstall_parser.set_defaults(func=cmd_postinstall)
# =========================================================================
# whatsapp command
# =========================================================================
@ -9921,6 +10139,12 @@ def main():
)
slack_parser.set_defaults(func=cmd_slack)
# =========================================================================
# send command — pipe shell-script output to any configured platform
# =========================================================================
from hermes_cli.send_cmd import register_send_subparser
register_send_subparser(subparsers)
# =========================================================================
# login command
# =========================================================================
@ -9931,7 +10155,7 @@ def main():
)
login_parser.add_argument(
"--provider",
choices=["nous", "openai-codex"],
choices=["nous", "openai-codex", "xai-oauth"],
default=None,
help="Provider to authenticate with (default: nous)",
)
@ -9977,7 +10201,7 @@ def main():
)
logout_parser.add_argument(
"--provider",
choices=["nous", "openai-codex", "spotify"],
choices=["nous", "openai-codex", "xai-oauth", "spotify"],
default=None,
help="Provider to log out from (default: active provider)",
)
@ -11715,6 +11939,20 @@ Examples:
action="store_true",
help="Run interactive Hermes provider/model setup for ACP terminal auth",
)
acp_parser.add_argument(
"--setup-browser",
action="store_true",
help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ "
"for browser tool support (idempotent).",
)
acp_parser.add_argument(
"--yes",
"-y",
action="store_true",
dest="assume_yes",
help="Accept all prompts (used by --setup-browser to skip the "
"~400 MB Chromium download confirmation).",
)
def cmd_acp(args):
"""Launch Hermes Agent as an ACP server."""
@ -11728,6 +11966,10 @@ Examples:
acp_argv.append("--check")
if getattr(args, "setup", False):
acp_argv.append("--setup")
if getattr(args, "setup_browser", False):
acp_argv.append("--setup-browser")
if getattr(args, "assume_yes", False):
acp_argv.append("--yes")
acp_main(acp_argv)
except ImportError:
print("ACP dependencies not installed.", file=sys.stderr)

View file

@ -25,6 +25,7 @@ from hermes_cli.config import (
)
from hermes_cli.colors import Colors, color
from hermes_constants import display_hermes_home
from tools.mcp_tool import _ENV_VAR_PATTERN
logger = logging.getLogger(__name__)
@ -551,7 +552,7 @@ def cmd_mcp_test(args):
for k, v in headers.items():
if isinstance(v, str) and ("key" in k.lower() or "auth" in k.lower()):
# Mask the value
resolved = _interpolate_value(v)
resolved = _ENV_VAR_PATTERN.sub(lambda m: os.getenv(m.group(1), ""), v)
if len(resolved) > 8:
masked = resolved[:4] + "***" + resolved[-4:]
else:
@ -581,13 +582,6 @@ def cmd_mcp_test(args):
print()
def _interpolate_value(value: str) -> str:
"""Resolve ``${ENV_VAR}`` references in a string."""
def _replace(m):
return os.getenv(m.group(1), "")
return re.sub(r"\$\{(\w+)\}", _replace, value)
# ─── hermes mcp login ────────────────────────────────────────────────────────
def cmd_mcp_login(args):

View file

@ -1688,7 +1688,11 @@ def list_authenticated_providers(
continue
# Live model discovery from custom provider endpoints (matches
# Section 3 behavior for user ``providers:`` entries).
if api_url and api_key:
# Also probes when no api_key is set (e.g. local llama.cpp /
# Ollama servers) — the /models endpoint often works without
# auth. The CLI's _model_flow_named_custom always probes, so
# the Telegram/Discord picker should do the same for parity.
if api_url:
try:
from hermes_cli.models import fetch_api_models

View file

@ -116,13 +116,23 @@ def _codex_curated_models() -> list[str]:
# (grok-4, grok-4-0709, grok-4-fast{,-reasoning,-non-reasoning},
# grok-4-1-fast{,-reasoning,-non-reasoning}, grok-code-fast-1 → grok-4.3).
_XAI_STATIC_FALLBACK: list[str] = [
"grok-4.3",
"grok-4.20-0309-reasoning",
"grok-4.20-0309-non-reasoning",
"grok-4.20-multi-agent-0309",
"grok-4.3",
]
_XAI_TOP_MODEL = "grok-4.3"
def _xai_promote_top(ids: list[str]) -> list[str]:
"""Pin the headline xAI model to the top of the curated list."""
if _XAI_TOP_MODEL in ids:
return [_XAI_TOP_MODEL] + [m for m in ids if m != _XAI_TOP_MODEL]
return ids
def _xai_curated_models() -> list[str]:
"""Derive the xAI-direct curated list from models.dev disk cache.
@ -142,7 +152,7 @@ def _xai_curated_models() -> list[str]:
if isinstance(models, dict) and models:
ids = [mid for mid in models.keys() if isinstance(mid, str)]
if ids:
return sorted(ids)
return _xai_promote_top(sorted(ids))
except Exception:
# Any failure (missing file, malformed JSON, import error)
# falls through to the static list.
@ -190,6 +200,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gpt-4o-mini",
],
"openai-codex": _codex_curated_models(),
"xai-oauth": _xai_curated_models(),
"copilot-acp": [
"copilot-acp",
],
@ -918,6 +929,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("xai-oauth", "xAI Grok OAuth (SuperGrok Subscription)", "xAI Grok OAuth (SuperGrok Subscription)"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
@ -1036,6 +1048,10 @@ _PROVIDER_ALIASES = {
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
"grok": "xai",
"grok-oauth": "xai-oauth",
"xai-oauth": "xai-oauth",
"x-ai-oauth": "xai-oauth",
"xai-grok-oauth": "xai-oauth",
"x-ai": "xai",
"x.ai": "xai",
"nim": "nvidia",
@ -2166,6 +2182,8 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
except Exception:
access_token = None
return get_codex_model_ids(access_token=access_token)
if normalized == "xai-oauth":
return list(_PROVIDER_MODELS.get("xai-oauth", _PROVIDER_MODELS.get("xai", [])))
if normalized in {"copilot", "copilot-acp"}:
try:
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
@ -2507,6 +2525,7 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
return (
normalized.startswith(COPILOT_BASE_URL)
or normalized.startswith("https://models.github.ai/inference")
or normalized.startswith("https://models.inference.ai.azure.com")
)
@ -3444,14 +3463,14 @@ def validate_requested_model(
"message": message,
}
# OpenAI Codex has its own catalog path; /v1/models probing is not the right validation path.
if normalized == "openai-codex":
# Providers with non-standard catalog validation — /v1/models probing is not the right path.
if normalized in {"openai-codex", "xai-oauth"}:
try:
codex_models = provider_model_ids("openai-codex")
catalog_models = provider_model_ids(normalized)
except Exception:
codex_models = []
if codex_models:
if requested_for_lookup in set(codex_models):
catalog_models = []
if catalog_models:
if requested_for_lookup in set(catalog_models):
return {
"accepted": True,
"persist": True,
@ -3459,7 +3478,7 @@ def validate_requested_model(
"message": None,
}
# Auto-correct if the top match is very similar (e.g. typo)
auto = get_close_matches(requested_for_lookup, codex_models, n=1, cutoff=0.9)
auto = get_close_matches(requested_for_lookup, catalog_models, n=1, cutoff=0.9)
if auto:
return {
"accepted": True,
@ -3468,17 +3487,18 @@ def validate_requested_model(
"corrected_model": auto[0],
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
}
suggestions = get_close_matches(requested_for_lookup, codex_models, n=3, cutoff=0.5)
suggestions = get_close_matches(requested_for_lookup, catalog_models, n=3, cutoff=0.5)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
provider_label = "OpenAI Codex" if normalized == "openai-codex" else "xAI Grok OAuth (SuperGrok Subscription)"
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
"It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID."
f"Note: `{requested}` was not found in the {provider_label} model listing. "
"It may still work if your account has access to a newer or hidden model ID."
f"{suggestion_text}"
),
}
@ -3702,13 +3722,12 @@ def validate_requested_model(
# Static-catalog fallback: when the /models probe was unreachable,
# validate against the curated list from provider_model_ids() — same
# pattern as the openai-codex and minimax branches above. This fixes
# /model switches in the gateway for providers like opencode-go and
# opencode-zen whose /models endpoint returns 404 against the HTML
# marketing site. Without this block, validate_requested_model would
# reject every model on such providers, switch_model() would return
# success=False, and the gateway would never write to
# _session_model_overrides.
# pattern as the openai-codex and minimax branches above. This keeps
# /model switches working in the gateway for providers whose /models
# endpoint is temporarily unreachable or returns a non-JSON payload.
# Without this block, validate_requested_model would reject every model
# on such providers, switch_model() would return success=False, and
# the gateway would never write to _session_model_overrides.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
try:
catalog_models = provider_model_ids(normalized)

View file

@ -325,8 +325,15 @@ class PluginContext:
is_async: bool = False,
description: str = "",
emoji: str = "",
override: bool = False,
) -> None:
"""Register a tool in the global registry **and** track it as plugin-provided."""
"""Register a tool in the global registry **and** track it as plugin-provided.
Pass ``override=True`` to replace an existing built-in tool with the
same name (e.g. swap the default ``browser_navigate`` for a custom
CDP-backed implementation). Without it, attempting to register a name
already claimed by a different toolset is rejected.
"""
from tools.registry import registry
registry.register(
@ -339,9 +346,13 @@ class PluginContext:
is_async=is_async,
description=description,
emoji=emoji,
override=override,
)
self._manager._plugin_tool_names.add(name)
logger.debug("Plugin %s registered tool: %s", self.manifest.name, name)
logger.debug(
"Plugin %s registered tool: %s%s",
self.manifest.name, name, " (override)" if override else "",
)
# -- message injection --------------------------------------------------

View file

@ -708,55 +708,85 @@ def _plugin_exists(name: str) -> bool:
def _discover_all_plugins() -> list:
"""Return a list of (name, version, description, source, dir_path) for
every plugin the loader can see user + bundled + project.
"""Return a list of (key, version, description, source, dir_path) for
every plugin the loader can see user + bundled.
Matches the ordering/dedup of ``PluginManager.discover_and_load``:
bundled first, then user, then project; user overrides bundled on
name collision.
Mirrors :meth:`PluginManager._scan_directory_level` so category-namespaced
plugins (``observability/langfuse``, ``image_gen/openai``) surface here
just like flat ones (``disk-cleanup``). A subdirectory with no
``plugin.yaml`` of its own is treated as a category and recursed into
one level deeper (depth capped at 2, same as the loader).
The returned ``key`` is the path-derived registry key the value the
user types into ``hermes plugins enable <key>``. For category-namespaced
plugins that's ``<category>/<dirname>``; for flat plugins it's the
manifest's ``name`` (or the directory name if the manifest omits it).
User entries override bundled on key collision, matching
``PluginManager.discover_and_load``.
"""
try:
import yaml
except ImportError:
yaml = None
seen: dict = {} # name -> (name, version, description, source, path)
seen: dict = {} # key -> (key, version, description, source, path)
# Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
from hermes_cli.plugins import get_bundled_plugins_dir
repo_plugins = get_bundled_plugins_dir()
for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
def _scan(base: Path, source: str, prefix: str, depth: int) -> None:
if not base.is_dir():
continue
return
for d in sorted(base.iterdir()):
if not d.is_dir():
continue
if source == "bundled" and d.name in {"memory", "context_engine"}:
if (
depth == 0
and source == "bundled"
and d.name in {"memory", "context_engine"}
):
continue
manifest_file = d / "plugin.yaml"
if not manifest_file.exists():
manifest_file = d / "plugin.yml"
if not manifest_file.exists():
if manifest_file.exists():
manifest_name = d.name
version = ""
description = ""
if yaml:
try:
with open(manifest_file, encoding="utf-8") as f:
manifest = yaml.safe_load(f) or {}
manifest_name = manifest.get("name", d.name)
version = manifest.get("version", "")
description = manifest.get("description", "")
except Exception:
pass
# Path-derived key, intentionally ignoring the manifest
# ``name:`` field for category-namespaced plugins — mirrors
# ``PluginManager._parse_manifest`` in plugins.py:1027-1028
# so renaming a directory (without touching plugin.yaml) shifts
# the registry key in both places consistently.
key = f"{prefix}/{d.name}" if prefix else manifest_name
src_label = source
if source == "user" and (d / ".git").exists():
src_label = "git"
# Bundled is scanned before user, so the user pass overwrites
# bundled entries with the same key — matches
# PluginManager.discover_and_load's "user wins" semantics.
seen[key] = (key, version, description, src_label, d)
continue
name = d.name
version = ""
description = ""
if yaml:
try:
with open(manifest_file, encoding="utf-8") as f:
manifest = yaml.safe_load(f) or {}
name = manifest.get("name", d.name)
version = manifest.get("version", "")
description = manifest.get("description", "")
except Exception:
pass
# User plugins override bundled on name collision.
if name in seen and source == "bundled":
# No manifest at this level — treat as a category namespace and
# recurse one level deeper. Cap at depth 2 (same as the loader).
if depth >= 1:
continue
src_label = source
if source == "user" and (d / ".git").exists():
src_label = "git"
seen[name] = (name, version, description, src_label, d)
sub_prefix = f"{prefix}/{d.name}" if prefix else d.name
_scan(d, source, sub_prefix, depth + 1)
from hermes_cli.plugins import get_bundled_plugins_dir
_scan(get_bundled_plugins_dir(), "bundled", "", 0)
_scan(_plugins_dir(), "user", "", 0)
return list(seen.values())

View file

@ -60,6 +60,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
auth_type="oauth_external",
base_url_override="https://chatgpt.com/backend-api/codex",
),
"xai-oauth": HermesOverlay(
transport="codex_responses",
auth_type="oauth_external",
base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL",
),
"qwen-oauth": HermesOverlay(
transport="openai_chat",
auth_type="oauth_external",
@ -244,6 +250,10 @@ ALIASES: Dict[str, str] = {
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
"grok-oauth": "xai-oauth",
"xai-oauth": "xai-oauth",
"x-ai-oauth": "xai-oauth",
"xai-grok-oauth": "xai-oauth",
# nvidia
"nim": "nvidia",

View file

@ -15,12 +15,14 @@ from hermes_cli.auth import (
AuthError,
DEFAULT_CODEX_BASE_URL,
DEFAULT_QWEN_BASE_URL,
DEFAULT_XAI_OAUTH_BASE_URL,
PROVIDER_REGISTRY,
_agent_key_is_usable,
format_auth_error,
resolve_provider,
resolve_nous_runtime_credentials,
resolve_codex_runtime_credentials,
resolve_xai_oauth_runtime_credentials,
resolve_qwen_runtime_credentials,
resolve_gemini_oauth_runtime_credentials,
resolve_api_key_provider_credentials,
@ -102,8 +104,10 @@ def _auto_detect_local_model(base_url: str) -> str:
model_id = models[0].get("id", "")
if model_id:
return model_id
except Exception:
pass
except Exception as exc:
# Log instead of silently swallowing — aids debugging when
# local model auto-detection fails unexpectedly.
logger.debug("Auto-detect model from %s failed: %s", base_url, exc)
return ""
@ -236,6 +240,9 @@ def _resolve_runtime_from_pool_entry(
if provider == "openai-codex":
api_mode = "codex_responses"
base_url = base_url or DEFAULT_CODEX_BASE_URL
elif provider == "xai-oauth":
api_mode = "codex_responses"
base_url = base_url or DEFAULT_XAI_OAUTH_BASE_URL
elif provider == "qwen-oauth":
api_mode = "chat_completions"
base_url = base_url or DEFAULT_QWEN_BASE_URL
@ -1130,6 +1137,24 @@ def resolve_runtime_provider(
logger.info("Auto-detected Codex provider but credentials failed; "
"falling through to next provider.")
if provider == "xai-oauth":
try:
creds = resolve_xai_oauth_runtime_credentials()
return {
"provider": "xai-oauth",
"api_mode": "codex_responses",
"base_url": (creds.get("base_url") or "").rstrip("/") or DEFAULT_XAI_OAUTH_BASE_URL,
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "hermes-auth-store"),
"last_refresh": creds.get("last_refresh"),
"requested_provider": requested_provider,
}
except AuthError:
if requested_provider != "auto":
raise
logger.info("Auto-detected xAI OAuth provider but credentials failed; "
"falling through to next provider.")
if provider == "qwen-oauth":
try:
creds = resolve_qwen_runtime_credentials()

445
hermes_cli/send_cmd.py Normal file
View file

@ -0,0 +1,445 @@
"""CLI subcommand: ``hermes send`` — pipe text from shell scripts to any
configured messaging platform (Telegram, Discord, Slack, Signal, SMS, etc.).
This is a thin wrapper around ``tools.send_message_tool.send_message_tool``
that exposes its functionality as a standalone CLI entry point so ops
scripts, cron jobs, CI hooks, and monitoring daemons can reuse the gateway's
already-configured credentials without having to reimplement each platform's
REST API client.
Design notes:
* No LLM, no agent loop the subcommand just resolves arguments, reads the
message body, calls the shared tool function, and prints/returns the
result. It is intentionally fast, cheap, and side-effect-only.
* For platforms that send via bot token (Telegram, Discord, Slack, Signal,
SMS, WhatsApp-CloudAPI, ) no running gateway is required. The tool
talks directly to each platform's REST endpoint. For platforms that rely
on a persistent adapter connection (plugin platforms, Matrix in some
modes, ) a live gateway is needed; the underlying tool surfaces that
error to the caller.
* Exit codes follow the classic Unix convention:
0 delivery (or list) succeeded
1 delivery failed at the platform level
2 usage / argument / config error (argparse already uses 2)
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Optional
_USAGE_EXIT = 2
_FAILURE_EXIT = 1
_SUCCESS_EXIT = 0
def _read_message_body(
positional: Optional[str],
file_path: Optional[str],
) -> Optional[str]:
"""Resolve the message body from (in order):
1. An explicit positional message argument.
2. ``--file PATH`` or ``--file -`` (where ``-`` means stdin).
3. Piped stdin when it is not attached to a TTY.
Returns ``None`` when nothing is available callers must treat that as
a usage error.
"""
if positional:
return positional
if file_path:
if file_path == "-":
return sys.stdin.read()
try:
return Path(file_path).read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError) as exc:
print(f"hermes send: cannot read {file_path}: {exc}", file=sys.stderr)
sys.exit(_USAGE_EXIT)
# Piped input: only consume stdin when it is not a TTY. Reading from a
# TTY would block the user in a half-broken "type your message" state,
# which is a poor default for an ops CLI.
if not sys.stdin.isatty():
data = sys.stdin.read()
if data:
return data
return None
def _resolve_target(arg_to: Optional[str]) -> Optional[str]:
"""Return a cleaned ``--to`` value, or ``None`` when nothing is set."""
if arg_to and arg_to.strip():
return arg_to.strip()
return None
def _emit_result(
result_json: str,
*,
json_mode: bool,
quiet: bool,
) -> int:
"""Print the tool result in the requested format and return the exit code.
The underlying ``send_message_tool`` always returns a JSON string. We
parse it, decide success/failure, and format accordingly.
"""
try:
payload = json.loads(result_json) if result_json else {}
except json.JSONDecodeError:
# Shouldn't happen with the shared tool, but be defensive — pass the
# raw string through so the user can still see what went wrong.
payload = {"error": "invalid JSON from send_message_tool", "raw": result_json}
if json_mode:
print(json.dumps(payload, indent=2))
elif quiet:
pass
else:
if payload.get("error"):
print(f"hermes send: {payload['error']}", file=sys.stderr)
elif payload.get("success"):
note = payload.get("note")
if note:
print(note)
else:
print("sent")
else:
# Unknown shape — dump it so nothing is silently dropped.
print(json.dumps(payload, indent=2))
if payload.get("error"):
return _FAILURE_EXIT
if payload.get("skipped"):
return _SUCCESS_EXIT
if payload.get("success"):
return _SUCCESS_EXIT
# Unknown / unexpected — treat as failure so scripts notice.
return _FAILURE_EXIT
def _list_targets(platform_filter: Optional[str], *, json_mode: bool) -> int:
"""Print the channel directory (all configured targets across platforms).
Uses ``load_directory()`` for structured JSON output and
``format_directory_for_display()`` for the human-readable rendering that
the send_message tool itself shows to the model keeps the two surfaces
identical.
"""
try:
from gateway.channel_directory import (
format_directory_for_display,
load_directory,
)
except Exception as exc:
print(f"hermes send: failed to load channel directory: {exc}", file=sys.stderr)
return _FAILURE_EXIT
try:
raw = load_directory()
except Exception as exc:
print(f"hermes send: failed to read channel directory: {exc}", file=sys.stderr)
return _FAILURE_EXIT
platforms = dict(raw.get("platforms") or {})
if platform_filter:
key = platform_filter.strip().lower()
filtered = {k: v for k, v in platforms.items() if k.lower() == key}
if not filtered:
print(
f"hermes send: no targets found for platform '{platform_filter}'. "
f"Configured: {', '.join(sorted(platforms)) or '(none)'}",
file=sys.stderr,
)
return _FAILURE_EXIT
platforms = filtered
if json_mode:
print(json.dumps({"platforms": platforms}, indent=2, default=str))
return _SUCCESS_EXIT
if not any(platforms.values()):
print("No messaging platforms configured or no channels discovered yet.")
print("Set one up with `hermes gateway setup`, or run the gateway once so")
print("channel discovery can populate ~/.hermes/channel_directory.json.")
return _SUCCESS_EXIT
# Human display — when unfiltered, reuse the shared formatter the agent
# already sees. When filtered, build a minimal view ourselves.
if platform_filter is None:
print(format_directory_for_display())
return _SUCCESS_EXIT
for plat_name in sorted(platforms):
channels = platforms[plat_name]
print(f"{plat_name}:")
if not channels:
print(" (no channels discovered yet)")
continue
for ch in channels:
name = ch.get("name", "?")
chat_id = ch.get("id") or ch.get("chat_id") or ""
suffix = f" [{chat_id}]" if chat_id and chat_id != name else ""
print(f" {plat_name}:{name}{suffix}")
print()
return _SUCCESS_EXIT
def _load_hermes_env() -> None:
"""Populate ``os.environ`` from ``~/.hermes/.env`` AND bridge top-level
``config.yaml`` keys into the environment so the underlying gateway
config loader sees platform credentials and home channel IDs.
``send_message_tool`` reads tokens and home-channel IDs via
``os.getenv(...)`` on each call. The gateway process does two things at
startup that ``hermes send`` must replicate when invoked standalone:
1. ``load_dotenv(~/.hermes/.env)`` brings bot tokens into the env.
2. Bridge top-level simple values from ``~/.hermes/config.yaml`` into
``os.environ`` (without overriding existing env vars). This is where
``TELEGRAM_HOME_CHANNEL`` and friends live when the user saved them
via ``hermes config set``.
See ``gateway/run.py`` for the canonical version of this bridge we
intentionally reimplement the minimum needed here so ``hermes send``
doesn't pull in the full gateway module just to resolve a home channel.
"""
# Step 1: dotenv
try:
from dotenv import load_dotenv
except Exception:
load_dotenv = None # type: ignore[assignment]
try:
from hermes_cli.config import get_hermes_home
home = get_hermes_home()
except Exception:
return
env_path = home / ".env"
if load_dotenv and env_path.exists():
try:
load_dotenv(str(env_path), override=True, encoding="utf-8")
except UnicodeDecodeError:
try:
load_dotenv(str(env_path), override=True, encoding="latin-1")
except Exception:
pass
except Exception:
pass
# Step 2: bridge top-level config.yaml values into the environment so
# gateway.config.load_gateway_config() sees them. Scalars only; don't
# override values already in the env.
import os
config_path = home / "config.yaml"
if not config_path.exists():
return
try:
import yaml # type: ignore[import-not-found]
except Exception:
return
try:
with open(config_path, "r", encoding="utf-8") as fh:
raw = yaml.safe_load(fh) or {}
except Exception:
return
try:
from hermes_cli.config import _expand_env_vars
raw = _expand_env_vars(raw)
except Exception:
pass
if not isinstance(raw, dict):
return
for key, val in raw.items():
if not isinstance(val, (str, int, float, bool)):
continue
if key in os.environ:
continue
os.environ[key] = str(val)
def cmd_send(args: argparse.Namespace) -> None:
"""Entry point wired into the top-level argparse dispatcher."""
# Bridge ~/.hermes/.env and ~/.hermes/config.yaml into os.environ so the
# gateway config loader (invoked downstream by send_message_tool and by
# the channel directory) can see platform credentials and home channels.
_load_hermes_env()
# --list short-circuits everything else.
if getattr(args, "list_targets", False):
# When `--list telegram` is used, argparse stores "telegram" in the
# `message` positional (since list_targets takes no argument).
platform_filter = getattr(args, "message", None)
exit_code = _list_targets(platform_filter, json_mode=getattr(args, "json", False))
sys.exit(exit_code)
target = _resolve_target(getattr(args, "to", None))
if not target:
print(
"hermes send: --to PLATFORM[:channel[:thread]] is required\n"
"Examples:\n"
" hermes send --to telegram \"hello\"\n"
" hermes send --to discord:#ops --file report.md\n"
" hermes send --list # list available targets",
file=sys.stderr,
)
sys.exit(_USAGE_EXIT)
message = _read_message_body(
getattr(args, "message", None),
getattr(args, "file", None),
)
if message is None or not message.strip():
print(
"hermes send: no message provided. Pass text as a positional "
"argument, use --file PATH, or pipe data via stdin.",
file=sys.stderr,
)
sys.exit(_USAGE_EXIT)
# Optional: prepend a subject line. Useful for alerting scripts that
# want a consistent header without inlining it into every call.
subject = getattr(args, "subject", None)
if subject:
message = f"{subject}\n\n{message.lstrip()}"
# Import lazily so `hermes send --help` stays fast and does not pull in
# the full tool registry / gateway config stack.
from tools.send_message_tool import send_message_tool
# send_message_tool auto-loads gateway config + env and routes to the
# appropriate platform adapter (bot-token path for Telegram/Discord/Slack/
# Signal/SMS/WhatsApp; live-adapter path for plugin platforms).
#
# It expects the standard tool-call dict and returns a JSON string.
tool_args = {
"action": "send",
"target": target,
"message": message,
}
result = send_message_tool(tool_args)
exit_code = _emit_result(
result,
json_mode=getattr(args, "json", False),
quiet=getattr(args, "quiet", False),
)
sys.exit(exit_code)
def register_send_subparser(subparsers) -> argparse.ArgumentParser:
"""Create the ``send`` subparser and return it.
Kept as a standalone function so the top-level parser builder can wire
it in next to the other messaging subcommands without cluttering
``_parser.py`` or ``main.py``.
"""
parser = subparsers.add_parser(
"send",
help="Send a message to a configured platform (scripts, cron jobs, CI).",
description=(
"Pipe text from any shell script to any messaging platform Hermes "
"is already configured for. Reuses the gateway's platform "
"credentials (~/.hermes/.env + ~/.hermes/config.yaml) — no LLM, "
"no agent loop, no running gateway required for bot-token "
"platforms like Telegram/Discord/Slack/Signal."
),
epilog=(
"Examples:\n"
" hermes send --to telegram \"deploy finished\"\n"
" echo \"RAM 92%\" | hermes send --to telegram:-1001234567890\n"
" hermes send --to discord:#ops --file /tmp/report.md\n"
" hermes send --to slack:#eng --subject \"[CI]\" --file build.log\n"
" hermes send --list # all platforms\n"
" hermes send --list telegram # filter by platform\n"
"\n"
"Exit codes: 0 ok, 1 delivery/backend error, 2 usage error."
),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"-t",
"--to",
metavar="TARGET",
default=None,
help=(
"Delivery target. Format: 'platform' (home channel), "
"'platform:chat_id', 'platform:chat_id:thread_id', or "
"'platform:#channel-name'. Examples: telegram, "
"telegram:-1001234567890:17585, discord:#ops, slack:C0123ABCD, "
"signal:+15551234567."
),
)
parser.add_argument(
"message",
nargs="?",
default=None,
help="Message text. If omitted, read from --file or stdin.",
)
# Legacy / convenience positional removed — use --to for clarity.
parser.add_argument(
"-f",
"--file",
metavar="PATH",
default=None,
help="Read message body from PATH. Use '-' to force stdin.",
)
parser.add_argument(
"-s",
"--subject",
metavar="LINE",
default=None,
help="Prepend a subject/header line before the message body.",
)
parser.add_argument(
"-l",
"--list",
dest="list_targets",
action="store_true",
default=False,
help="List available targets. Optional positional filter: `hermes send --list telegram`.",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
default=False,
help="Suppress stdout on success (exit code only).",
)
parser.add_argument(
"--json",
action="store_true",
default=False,
help="Emit raw JSON result instead of human-readable output.",
)
parser.set_defaults(func=cmd_send)
return parser
__all__ = ["cmd_send", "register_send_subparser"]

316
hermes_cli/session_recap.py Normal file
View file

@ -0,0 +1,316 @@
"""Session recap — summarize what's happened in the current session.
Inspired by Claude Code's `/recap` command (v2.1.114, April 2026), which
shows a one-line summary of what happened while a terminal was unfocused
so users juggling multiple sessions can re-orient quickly.
Source: https://code.claude.com/docs/en/whats-new/2026-w17
Differences from Claude Code:
- Pure local computation from the in-memory conversation history. No
LLM call, no auxiliary model, no prompt-cache invalidation. A
recap should be instant and free.
- Works unchanged on CLI and every gateway platform (Telegram,
Discord, Slack, ) because both call into the same ``build_recap``
helper. Claude Code only shows this on the CLI.
- Tailored to hermes-agent's tool vocabulary (``terminal``, ``patch``,
``write_file``, ``delegate_task``, ``browser_*``, ``web_*``) the
recap surfaces which classes of work were most active.
"""
from __future__ import annotations
import os
from collections import Counter
from typing import Any, Iterable, List, Mapping, Optional, Sequence, Tuple
# How many recent user/assistant turns we consider "recent activity".
_RECENT_TURN_WINDOW = 20
# How many characters of the latest user prompt to show.
_PROMPT_PREVIEW_CHARS = 140
# How many characters of the latest assistant text to show.
_ASSISTANT_PREVIEW_CHARS = 200
# How many recently-touched files to list.
_MAX_FILES_LISTED = 5
# Tool names that identify a file-editing action and the argument key that
# holds the path.
_FILE_EDIT_TOOLS: Mapping[str, str] = {
"write_file": "path",
"patch": "path",
"read_file": "path",
"skill_manage": "file_path",
"skill_view": "file_path",
}
def _coerce_text(value: Any) -> str:
"""Flatten assistant/user ``content`` into a plain string.
Content can be a string or a list of content blocks (for multimodal
or reasoning models). We concatenate every text-like block and
ignore the rest.
"""
if value is None:
return ""
if isinstance(value, str):
return value
if isinstance(value, list):
parts: List[str] = []
for block in value:
if isinstance(block, str):
parts.append(block)
continue
if isinstance(block, Mapping):
text = block.get("text")
if isinstance(text, str) and text:
parts.append(text)
return "\n".join(parts)
return str(value)
def _tool_call_name_and_args(tool_call: Any) -> Tuple[str, Mapping[str, Any]]:
"""Extract ``(name, arguments_dict)`` from a tool_call entry.
``arguments`` may be a JSON string or a dict depending on provider.
Return an empty dict if it cannot be parsed.
"""
if not isinstance(tool_call, Mapping):
return "", {}
fn = tool_call.get("function") or {}
if not isinstance(fn, Mapping):
return "", {}
name = str(fn.get("name") or "") or ""
raw_args = fn.get("arguments")
if isinstance(raw_args, Mapping):
return name, raw_args
if isinstance(raw_args, str) and raw_args:
try:
import json
parsed = json.loads(raw_args)
if isinstance(parsed, Mapping):
return name, parsed
except Exception:
return name, {}
return name, {}
def _iter_assistant_tool_calls(
messages: Sequence[Mapping[str, Any]],
) -> Iterable[Tuple[str, Mapping[str, Any]]]:
for msg in messages:
if not isinstance(msg, Mapping):
continue
if msg.get("role") != "assistant":
continue
tool_calls = msg.get("tool_calls") or []
if not isinstance(tool_calls, list):
continue
for tc in tool_calls:
name, args = _tool_call_name_and_args(tc)
if name:
yield name, args
def _count_visible_turns(
messages: Sequence[Mapping[str, Any]],
) -> Tuple[int, int, int]:
"""Return ``(user_turn_count, assistant_turn_count, tool_message_count)``."""
users = assistants = tools = 0
for msg in messages:
if not isinstance(msg, Mapping):
continue
role = msg.get("role")
if role == "user":
users += 1
elif role == "assistant":
assistants += 1
elif role == "tool":
tools += 1
return users, assistants, tools
def _latest_user_prompt(
messages: Sequence[Mapping[str, Any]],
) -> Optional[str]:
for msg in reversed(messages):
if isinstance(msg, Mapping) and msg.get("role") == "user":
text = _coerce_text(msg.get("content")).strip()
if text:
return text
return None
def _latest_assistant_text(
messages: Sequence[Mapping[str, Any]],
) -> Optional[str]:
for msg in reversed(messages):
if not isinstance(msg, Mapping):
continue
if msg.get("role") != "assistant":
continue
text = _coerce_text(msg.get("content")).strip()
if text:
return text
return None
def _recent_window(
messages: Sequence[Mapping[str, Any]], window: int = _RECENT_TURN_WINDOW
) -> List[Mapping[str, Any]]:
"""Return the tail slice of ``messages`` covering at most ``window``
user+assistant turns (tool messages ride along inside the window).
Iterating from the end, we count user and assistant messages and
keep everything from the first message that falls within the window.
"""
count = 0
cut = 0
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if isinstance(msg, Mapping) and msg.get("role") in ("user", "assistant"):
count += 1
if count >= window:
cut = i
break
else:
return list(messages)
return list(messages[cut:])
def _shortened_path(path: str) -> str:
"""Show a path relative to cwd when possible, otherwise with ~ expansion."""
if not path:
return path
try:
abs_path = os.path.abspath(os.path.expanduser(path))
cwd = os.getcwd()
if abs_path == cwd:
return "."
if abs_path.startswith(cwd + os.sep):
return abs_path[len(cwd) + 1 :]
home = os.path.expanduser("~")
if abs_path.startswith(home + os.sep):
return "~/" + abs_path[len(home) + 1 :]
return abs_path
except Exception:
return path
def _summarise_tool_activity(
tool_calls: Sequence[Tuple[str, Mapping[str, Any]]],
) -> Tuple[List[Tuple[str, int]], List[str]]:
"""Return ``(tool_counts_sorted, recently_edited_files)``.
``tool_counts_sorted`` is descending by count, keeping the full list
so callers can truncate for display. ``recently_edited_files`` lists
distinct paths (most recent first) from file-editing tools.
"""
counter: Counter[str] = Counter()
files_seen: List[str] = []
files_set: set[str] = set()
# Walk in reverse so "most recent first" drops out of order-preserved iteration.
for name, args in reversed(list(tool_calls)):
counter[name] += 1
arg_key = _FILE_EDIT_TOOLS.get(name)
if arg_key:
path = args.get(arg_key)
if isinstance(path, str) and path and path not in files_set:
files_set.add(path)
files_seen.append(_shortened_path(path))
# Restore "reverse of reverse" for correct counts; Counter ignores order
# so only files_seen needed the reversal. Fix ordering: currently
# files_seen is newest→oldest which is what we want for display.
tool_counts = sorted(counter.items(), key=lambda kv: (-kv[1], kv[0]))
return tool_counts, files_seen
def _truncate(text: str, limit: int) -> str:
text = " ".join(text.split()) # collapse newlines for a compact one-liner
if len(text) <= limit:
return text
return text[: limit - 1].rstrip() + ""
def build_recap(
messages: Sequence[Mapping[str, Any]],
*,
session_title: Optional[str] = None,
session_id: Optional[str] = None,
platform: Optional[str] = None,
) -> str:
"""Build a multi-line recap of recent activity.
Inputs:
messages: the full conversation history as a list of
chat-completion-style dicts (``role``, ``content``,
``tool_calls``, ).
session_title: optional human title (from SessionDB).
session_id: optional session id.
platform: optional hint (``"cli"``, ``"telegram"``, ). Does not
change behavior today but is accepted for forward compat.
The output is plain text designed to render well in both a terminal
(with 80-col wrapping) and a gateway message bubble.
"""
_ = platform # reserved for future use
lines: List[str] = []
header_bits: List[str] = ["Session recap"]
if session_title:
header_bits.append(f"{session_title}")
elif session_id:
header_bits.append(f"{session_id[:8]}")
lines.append(" ".join(header_bits))
if not messages:
lines.append(" (nothing to recap — no messages yet)")
return "\n".join(lines)
users, assistants, tool_msgs = _count_visible_turns(messages)
window = _recent_window(messages)
win_users, win_assistants, _ = _count_visible_turns(window)
scope = (
f"{win_users} user turn{'s' if win_users != 1 else ''} / "
f"{win_assistants} assistant repl{'ies' if win_assistants != 1 else 'y'}"
)
if (users, assistants) != (win_users, win_assistants):
scope += f" (of {users}/{assistants} total)"
lines.append(f" Recent: {scope}, {tool_msgs} tool result{'s' if tool_msgs != 1 else ''}")
tool_calls = list(_iter_assistant_tool_calls(window))
tool_counts, files = _summarise_tool_activity(tool_calls)
if tool_counts:
top = ", ".join(f"{name}×{count}" for name, count in tool_counts[:5])
extra = len(tool_counts) - 5
if extra > 0:
top += f" (+{extra} more)"
lines.append(f" Tools used: {top}")
if files:
shown = files[:_MAX_FILES_LISTED]
extra = len(files) - len(shown)
entry = ", ".join(shown)
if extra > 0:
entry += f" (+{extra} more)"
lines.append(f" Files touched: {entry}")
latest_user = _latest_user_prompt(window)
if latest_user:
lines.append(f" Last ask: {_truncate(latest_user, _PROMPT_PREVIEW_CHARS)}")
latest_reply = _latest_assistant_text(window)
if latest_reply:
lines.append(f" Last reply: {_truncate(latest_reply, _ASSISTANT_PREVIEW_CHARS)}")
if len(lines) == 2:
# Only the header + scope line — nothing substantive to show.
lines.append(" (no assistant activity yet in this window)")
return "\n".join(lines)
__all__ = ["build_recap"]

View file

@ -1091,6 +1091,58 @@ def _install_kittentts_deps() -> bool:
return False
def _xai_oauth_logged_in_for_setup() -> bool:
"""True iff xAI Grok OAuth credentials are already stored locally.
Lets TTS / STT setup skip the API-key prompt for users who logged in
through ``hermes model`` -> xAI Grok OAuth (SuperGrok Subscription).
"""
try:
from hermes_cli.auth import get_xai_oauth_auth_status
return bool(get_xai_oauth_auth_status().get("logged_in"))
except Exception:
return False
def _run_xai_oauth_login_from_setup() -> bool:
"""Run the xAI Grok OAuth loopback login from inside the setup wizard.
Returns True on success, False on any failure (the caller falls back
to whatever the user picked next, e.g. Edge TTS).
"""
try:
from hermes_cli.auth import (
DEFAULT_XAI_OAUTH_BASE_URL,
_is_remote_session,
_save_xai_oauth_tokens,
_update_config_for_provider,
_xai_oauth_loopback_login,
)
except Exception as exc:
print_warning(f"xAI Grok OAuth helpers unavailable: {exc}")
return False
open_browser = not _is_remote_session()
print()
print_info("Signing in to xAI Grok OAuth (SuperGrok Subscription)...")
try:
creds = _xai_oauth_loopback_login(open_browser=open_browser)
_save_xai_oauth_tokens(
creds["tokens"],
discovery=creds.get("discovery"),
redirect_uri=creds.get("redirect_uri", ""),
last_refresh=creds.get("last_refresh"),
)
_update_config_for_provider(
"xai-oauth", creds.get("base_url", DEFAULT_XAI_OAUTH_BASE_URL)
)
return True
except Exception as exc:
print_warning(f"xAI Grok OAuth login failed: {exc}")
return False
def _setup_tts_provider(config: dict):
"""Interactive TTS provider selection with install flow for NeuTTS."""
tts_config = config.get("tts", {})
@ -1125,7 +1177,7 @@ def _setup_tts_provider(config: dict):
"Edge TTS (free, cloud-based, no setup needed)",
"ElevenLabs (premium quality, needs API key)",
"OpenAI TTS (good quality, needs API key)",
"xAI TTS (Grok voices, needs API key)",
"xAI TTS (Grok voices — OAuth login or API key)",
"MiniMax TTS (high quality with voice cloning, needs API key)",
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
@ -1199,21 +1251,59 @@ def _setup_tts_provider(config: dict):
selected = "edge"
elif selected == "xai":
existing = get_env_value("XAI_API_KEY")
if not existing:
# Resolution order: existing OAuth tokens (free for SuperGrok subscribers
# via the Hermes auth store) > existing XAI_API_KEY > prompt the user.
# When neither is configured, offer both options instead of forcing the
# API-key path — xAI TTS works fine with OAuth bearer tokens too.
oauth_logged_in = _xai_oauth_logged_in_for_setup()
existing_api_key = get_env_value("XAI_API_KEY")
if oauth_logged_in:
print_success(
"xAI TTS will use your xAI Grok OAuth (SuperGrok Subscription) "
"credentials"
)
elif existing_api_key:
print_success("xAI TTS will use your existing XAI_API_KEY")
else:
print()
api_key = prompt("xAI API key for TTS", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
print_success("xAI TTS API key saved")
choice_idx = prompt_choice(
"How do you want xAI TTS to authenticate?",
choices=[
"Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
"Paste an xAI API key (console.x.ai)",
"Skip → fallback to Edge TTS",
],
default=0,
)
if choice_idx == 0:
if _run_xai_oauth_login_from_setup():
print_success(
"Logged in — xAI TTS will use these OAuth credentials"
)
else:
print_warning(
"xAI Grok OAuth login did not complete. "
"Falling back to Edge TTS."
)
selected = "edge"
elif choice_idx == 1:
api_key = prompt("xAI API key for TTS", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
print_success("xAI TTS API key saved")
else:
from hermes_constants import display_hermes_home as _dhh
print_warning(
"No xAI API key provided for TTS. Configure XAI_API_KEY "
f"via hermes setup model or {_dhh()}/.env to use xAI TTS. "
"Falling back to Edge TTS."
)
selected = "edge"
else:
from hermes_constants import display_hermes_home as _dhh
print_warning(
"No xAI API key provided for TTS. Configure XAI_API_KEY via "
f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
"Falling back to Edge TTS."
)
print_warning("xAI TTS skipped. Falling back to Edge TTS.")
selected = "edge"
if selected == "xai":
print()
voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)")

View file

@ -61,6 +61,7 @@ CONFIGURABLE_TOOLSETS = [
("video", "🎬 Video Analysis", "video_analyze (requires video-capable model)"),
("image_gen", "🎨 Image Generation", "image_generate"),
("video_gen", "🎬 Video Generation", "video_generate (text-to-video + image-to-video)"),
("x_search", "🐦 X (Twitter) Search", "x_search (requires xAI OAuth or XAI_API_KEY)"),
("moa", "🧠 Mixture of Agents", "mixture_of_agents"),
("tts", "🔊 Text-to-Speech", "text_to_speech"),
("skills", "📚 Skills", "list, view, manage"),
@ -86,7 +87,12 @@ CONFIGURABLE_TOOLSETS = [
# Video gen is off by default — it's a niche, paid, slow feature. Users
# who want it opt in via `hermes tools` → Video Generation, which walks
# them through provider + model selection.
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen"}
#
# X search is off by default — gated on xAI credentials (SuperGrok OAuth
# or XAI_API_KEY). Users opt in via `hermes tools` → X (Twitter) Search,
# which walks them through credential setup. The tool's check_fn means
# the schema won't appear to the model even if enabled without credentials.
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen", "x_search"}
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
# these platforms, and only resolve/save for these platforms. A toolset
@ -194,11 +200,10 @@ TOOL_CATEGORIES = {
},
{
"name": "xAI TTS",
"tag": "Grok voices - requires xAI API key",
"env_vars": [
{"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
],
"tag": "Grok voices — uses xAI Grok OAuth or XAI_API_KEY",
"env_vars": [],
"tts_provider": "xai",
"post_setup": "xai_grok",
},
{
"name": "ElevenLabs",
@ -309,6 +314,39 @@ TOOL_CATEGORIES = {
# converge image_gen toward.
"providers": [],
},
"x_search": {
"name": "X (Twitter) Search",
"setup_title": "Select xAI Credential Source",
"setup_note": (
"Hermes routes X searches through xAI's built-in x_search "
"Responses tool. Both credential sources hit the same "
"https://api.x.ai/v1/responses endpoint — pick whichever you "
"already have. SuperGrok OAuth is preferred when both are set "
"(uses your subscription quota instead of API spend)."
),
"icon": "🐦",
"providers": [
{
"name": "xAI Grok OAuth (SuperGrok Subscription)",
"badge": "subscription",
"tag": "Browser login at accounts.x.ai — no API key required",
"env_vars": [],
"post_setup": "xai_grok",
},
{
"name": "xAI API key",
"badge": "paid",
"tag": "Direct xAI API billing via XAI_API_KEY",
"env_vars": [
{
"key": "XAI_API_KEY",
"prompt": "xAI API key",
"url": "https://console.x.ai/",
},
],
},
],
},
"browser": {
"name": "Browser Automation",
"icon": "🌐",
@ -423,31 +461,6 @@ TOOL_CATEGORIES = {
},
],
},
"langfuse": {
"name": "Langfuse Observability",
"icon": "📊",
"providers": [
{
"name": "Langfuse Cloud",
"tag": "Hosted Langfuse (cloud.langfuse.com)",
"env_vars": [
{"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)", "url": "https://cloud.langfuse.com"},
{"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)", "url": "https://cloud.langfuse.com"},
],
"post_setup": "langfuse",
},
{
"name": "Langfuse Self-Hosted",
"tag": "Self-hosted Langfuse instance",
"env_vars": [
{"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)"},
{"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)"},
{"key": "HERMES_LANGFUSE_BASE_URL", "prompt": "Langfuse server URL (e.g. http://localhost:3000)", "default": "http://localhost:3000"},
],
"post_setup": "langfuse",
},
],
},
}
# Simple env-var requirements for toolsets NOT in TOOL_CATEGORIES.
@ -772,21 +785,35 @@ def _run_post_setup(post_setup_key: str):
camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
_npm_bin = shutil.which("npm")
if not camofox_dir.exists() and _npm_bin:
_print_info(" Installing Camofox browser server...")
_print_info(" Installing Camofox browser package...")
_print_info(" First run downloads the Camoufox engine (~300MB) — this can take several minutes.")
import subprocess
# Absolute npm path so .cmd shim executes on Windows.
result = subprocess.run(
[_npm_bin, "install", "--silent"],
capture_output=True, text=True, cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
_print_success(" Camofox installed")
else:
_print_warning(" npm install failed - run manually: npm install")
# Install @askjo/camofox-browser on-demand. It is NOT in
# package.json so that `hermes update` does not silently pull
# the ~300MB Camoufox Firefox-fork binary for every user.
# Stream output (no capture, no --silent) so the long-running
# postinstall download is visible instead of looking frozen.
try:
result = subprocess.run(
[_npm_bin, "install", "@askjo/camofox-browser@^1.5.2",
"--no-fund", "--no-audit", "--progress=false"],
cwd=str(PROJECT_ROOT),
)
if result.returncode == 0:
_print_success(" Camofox installed")
else:
_print_warning(
" npm install failed — run manually: "
"npm install @askjo/camofox-browser"
)
except Exception as exc:
_print_warning(f" Camofox install failed: {exc}")
_print_info(
" Run manually: npm install @askjo/camofox-browser"
)
if camofox_dir.exists():
_print_info(" Start the Camofox server:")
_print_info(" npx @askjo/camofox-browser")
_print_info(" First run downloads the Camoufox engine (~300MB)")
_print_info(" Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
elif not shutil.which("npm"):
_print_warning(" Node.js not found. Install Camofox via Docker:")
@ -895,35 +922,72 @@ def _run_post_setup(post_setup_key: str):
_print_warning(f" Spotify login failed: {exc}")
_print_info(" Run manually: hermes auth spotify")
elif post_setup_key == "langfuse":
# Install the langfuse SDK.
elif post_setup_key == "xai_grok":
# Shared credential bootstrap for any picker entry that talks to xAI
# (TTS, Video Gen, future Image Gen, etc.). Accepts either a
# SuperGrok-tier OAuth bearer token (preferred — billed against the
# user's existing subscription) or a raw XAI_API_KEY from
# console.x.ai. The picker entries declare empty env_vars so we
# drive the full auth UX here.
try:
__import__("langfuse")
_print_success(" langfuse SDK already installed")
except ImportError:
_print_info(" Installing langfuse SDK...")
result = _pip_install(["langfuse", "--quiet"], timeout=120)
if result.returncode == 0:
_print_success(" langfuse SDK installed")
else:
_print_warning(" langfuse SDK install failed — run manually: uv pip install langfuse")
# Opt the bundled observability/langfuse plugin into plugins.enabled.
# The plugin ships in the repo but doesn't load until the user enables
# it (standalone plugins are opt-in).
from hermes_cli.auth import get_xai_oauth_auth_status
oauth_logged_in = bool(get_xai_oauth_auth_status().get("logged_in"))
except Exception:
oauth_logged_in = False
existing_api_key = get_env_value("XAI_API_KEY")
if oauth_logged_in:
_print_success(
" xAI will use your xAI Grok OAuth (SuperGrok Subscription) credentials"
)
return
if existing_api_key:
_print_success(" xAI will use your existing XAI_API_KEY")
return
_print_info(" xAI needs credentials. Choose one:")
try:
from hermes_cli.plugins_cmd import _get_enabled_set, _save_enabled_set
enabled = _get_enabled_set()
if "observability/langfuse" in enabled or "langfuse" in enabled:
_print_success(" Plugin observability/langfuse already enabled")
else:
enabled.add("observability/langfuse")
_save_enabled_set(enabled)
_print_success(" Plugin observability/langfuse enabled")
from hermes_cli.setup import (
_run_xai_oauth_login_from_setup,
prompt_choice,
prompt as _setup_prompt,
)
from hermes_cli.config import save_env_value
except Exception as exc:
_print_warning(f" Could not enable plugin automatically: {exc}")
_print_info(" Run manually: hermes plugins enable observability/langfuse")
_print_info(" Restart Hermes for tracing to take effect.")
_print_info(" Verify: hermes plugins list")
_print_warning(f" Could not load setup helpers: {exc}")
_print_info(" Run later: hermes auth add xai-oauth (or set XAI_API_KEY)")
return
idx = prompt_choice(
" How do you want xAI to authenticate?",
choices=[
"Sign in with xAI Grok OAuth (SuperGrok Subscription) — browser login",
"Paste an xAI API key (console.x.ai)",
"Skip — configure later via `hermes auth add xai-oauth`",
],
default=0,
)
if idx == 0:
if _run_xai_oauth_login_from_setup():
_print_success(
" Logged in — xAI will use these OAuth credentials"
)
else:
_print_warning(
" xAI Grok OAuth login did not complete. "
"Run later: hermes auth add xai-oauth"
)
elif idx == 1:
api_key = _setup_prompt(" xAI API key", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
_print_success(" XAI_API_KEY saved")
else:
_print_warning(
" No API key provided. Run later: hermes auth add xai-oauth"
)
else:
_print_info(" xAI will remain inactive until credentials are configured.")
# ─── Platform / Toolset Helpers ───────────────────────────────────────────────
@ -1439,15 +1503,16 @@ def _plugin_image_gen_providers() -> list[dict]:
continue
if not isinstance(schema, dict):
continue
rows.append(
{
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"image_gen_plugin_name": provider.name,
}
)
row = {
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"image_gen_plugin_name": provider.name,
}
if schema.get("post_setup"):
row["post_setup"] = schema["post_setup"]
rows.append(row)
return rows
@ -1476,15 +1541,16 @@ def _plugin_video_gen_providers() -> list[dict]:
continue
if not isinstance(schema, dict):
continue
rows.append(
{
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"video_gen_plugin_name": provider.name,
}
)
row = {
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"video_gen_plugin_name": provider.name,
}
if schema.get("post_setup"):
row["post_setup"] = schema["post_setup"]
rows.append(row)
return rows
@ -1748,6 +1814,11 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
image_cfg = config.get("image_gen", {})
return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
video_plugin_name = provider.get("video_gen_plugin_name")
if video_plugin_name:
video_cfg = config.get("video_gen", {})
return isinstance(video_cfg, dict) and video_cfg.get("provider") == video_plugin_name
managed_feature = provider.get("managed_nous_feature")
if managed_feature:
features = get_nous_subscription_features(config)

View file

@ -21,6 +21,7 @@ Public API (signatures preserved from the original 2,400-line version):
"""
import json
import re
import asyncio
import logging
import threading
@ -485,6 +486,48 @@ _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
# =========================================================================
# Tool error sanitization
# =========================================================================
#
# Tool exceptions can carry arbitrary text into the model's context as the
# `tool` message content. json.dumps() handles quote/backslash escaping so a
# raw injection of `</tool_call>` won't break message framing, but the model
# still *reads* those tokens and they can confuse downstream tool-call
# parsing or, in adversarial cases, nudge it toward role-confusion framing.
#
# This helper strips structural framing tokens (XML role tags, CDATA,
# markdown code fences) and caps the message at a sane upper bound before it
# becomes part of the conversation. It's defense-in-depth — the json layer
# already prevents framing escape — but cheap and worth having.
#
# Ported from ironclaw#1639.
_TOOL_ERROR_ROLE_TAG_RE = re.compile(
r'</?(?:tool_call|function_call|result|response|output|input|system|assistant|user)>',
re.IGNORECASE,
)
_TOOL_ERROR_FENCE_OPEN_RE = re.compile(r'^\s*```(?:json|xml|html|markdown)?\s*', re.MULTILINE)
_TOOL_ERROR_FENCE_CLOSE_RE = re.compile(r'\s*```\s*$', re.MULTILINE)
_TOOL_ERROR_CDATA_RE = re.compile(r'<!\[CDATA\[.*?\]\]>', re.DOTALL)
_TOOL_ERROR_MAX_LEN = 2000
def _sanitize_tool_error(error_msg: str) -> str:
"""Strip structural framing tokens from a tool error before showing it to the model.
See _TOOL_ERROR_ROLE_TAG_RE docstring above for rationale.
"""
if not error_msg:
return "[TOOL_ERROR] "
sanitized = _TOOL_ERROR_ROLE_TAG_RE.sub("", error_msg)
sanitized = _TOOL_ERROR_FENCE_OPEN_RE.sub("", sanitized)
sanitized = _TOOL_ERROR_FENCE_CLOSE_RE.sub("", sanitized)
sanitized = _TOOL_ERROR_CDATA_RE.sub("", sanitized)
if len(sanitized) > _TOOL_ERROR_MAX_LEN:
sanitized = sanitized[:_TOOL_ERROR_MAX_LEN - 3] + "..."
return f"[TOOL_ERROR] {sanitized}"
# =========================================================================
# Tool argument type coercion
# =========================================================================
@ -824,7 +867,7 @@ def handle_function_call(
except Exception as e:
error_msg = f"Error executing {function_name}: {str(e)}"
logger.exception(error_msg)
return json.dumps({"error": error_msg}, ensure_ascii=False)
return json.dumps({"error": _sanitize_tool_error(error_msg)}, ensure_ascii=False)
# =============================================================================

View file

@ -0,0 +1,309 @@
---
name: pinggy-tunnel
description: Zero-install localhost tunnels over SSH via Pinggy.
version: 0.1.0
author: Teknium (teknium1), Hermes Agent
license: MIT
platforms: [linux, macos, windows]
metadata:
hermes:
tags: [Pinggy, Tunnel, Networking, SSH, Webhook, Localhost]
related_skills: [cloudflared-quick-tunnel, webhook-subscriptions]
---
# Pinggy Tunnel Skill
Expose a local service (dev server, webhook receiver, MCP endpoint, demo) to the public internet using a Pinggy SSH reverse tunnel. No daemon to install — the user's stock SSH client connects to `a.pinggy.io:443` and Pinggy hands back a public HTTP/HTTPS URL.
Free tier: 60-minute tunnels, random subdomain, no signup. Pro tier ($3/mo) is an opt-in with a token.
## When to Use
- User asks to "expose this locally", "share my dev server", "make this URL public", "tunnel port N", "get a public URL for a webhook"
- Need to receive a webhook callback during a local task (Stripe, GitHub, Discord, AgentMail)
- Sharing a one-off HTTP demo (MCP server, Ollama/vLLM endpoint, dashboard) with a remote party
- The host has SSH but no `cloudflared` / `ngrok` binary, and installing one would be overkill
If the host already has `cloudflared` configured, prefer the `cloudflared-quick-tunnel` skill — Cloudflare quick tunnels don't expire after 60 minutes.
## Prerequisites
- `ssh` on PATH (`ssh -V`). Default on Linux, macOS, and Windows 10+. No other install.
- A local service listening on `127.0.0.1:<port>` before the tunnel starts. Pinggy will return URLs but they'll 502 until the local origin is up.
Optional:
- `PINGGY_TOKEN` env var for paid Pro features (persistent subdomain, custom domain, multiple tunnels, no 60-minute cap). Free tier needs no credentials.
## Quick Reference
```bash
# Plain HTTP/HTTPS tunnel for port 8000 (free tier)
ssh -p 443 -o StrictHostKeyChecking=no -o ServerAliveInterval=30 \
-R0:localhost:8000 free@a.pinggy.io
# TCP tunnel (databases, raw SSH, etc.)
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:5432 tcp@a.pinggy.io
# TLS tunnel (Pinggy can't decrypt — bring your own certs at origin)
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:443 tls@a.pinggy.io
# Basic auth gate (b:user:pass)
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
"b:admin:secret+free@a.pinggy.io"
# Bearer token gate (k:token)
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
"k:mysecrettoken+free@a.pinggy.io"
# IP whitelist (w:CIDR)
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
"w:203.0.113.0/24+free@a.pinggy.io"
# Enable CORS + force HTTPS redirect
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 \
"co+x:https+free@a.pinggy.io"
# Pro tier (persistent URL, no 60-min cap)
ssh -p 443 -o StrictHostKeyChecking=no -R0:localhost:8000 "$PINGGY_TOKEN+a.pinggy.io"
```
## Procedure — Start a Tunnel and Get the URL
The model SHOULD use the `terminal` tool. The tunnel must stay alive for the duration of the share, so run it as a background process and parse the public URL from stdout.
### 1. Confirm a local origin is up
```bash
curl -sI http://127.0.0.1:8000/ | head -1
# expect HTTP/1.x 200 (or any non-connection-refused response)
```
If nothing is listening yet, start it first (e.g. `python3 -m http.server 8000 --bind 127.0.0.1`). Pinggy will happily return a URL pointed at nothing — the user will see 502 until the origin comes up.
### 2. Launch the tunnel as a background process
Use `terminal(background=True)` and capture output to a logfile (Pinggy prints the URLs on stdout, then keeps the connection open):
```bash
LOG=/tmp/pinggy-8000.log
nohup ssh -p 443 \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=30 \
-o ServerAliveCountMax=3 \
-R0:localhost:8000 free@a.pinggy.io \
> "$LOG" 2>&1 &
echo $! > /tmp/pinggy-8000.pid
```
`StrictHostKeyChecking=no` + `UserKnownHostsFile=/dev/null` skips the first-run host-key prompt. `ServerAliveInterval=30` keeps the SSH session from getting torn down by an idle NAT.
### 3. Parse the URL out of the log
```bash
sleep 4
grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/pinggy-8000.log | head -1
```
Expected output looks like:
```
You are not authenticated.
Your tunnel will expire in 60 minutes.
http://yqycl-98-162-69-48.a.free.pinggy.link
https://yqycl-98-162-69-48.a.free.pinggy.link
```
Hand the `https://...pinggy.link` URL to the user.
### 4. Verify
```bash
curl -sI https://<the-url>/ | head -3
# expect 200/302/whatever the local origin actually returns
```
If you get `502 Bad Gateway`, the SSH session is up but the local origin isn't listening — fix step 1 first.
### 5. Teardown
```bash
kill "$(cat /tmp/pinggy-8000.pid)"
# or, if the pid file got lost:
pkill -f 'ssh -p 443 .* free@a\.pinggy\.io'
```
If you have a session_id from `terminal(background=True)`, prefer `process(action='kill', session_id=...)`.
## Access Control via Username Keywords
Pinggy stacks control flags into the SSH username separated by `+`. Always quote the whole `user@host` argument when it contains a `+`:
| Keyword | Effect |
|---------|--------|
| `b:user:pass` | HTTP Basic auth gate |
| `k:token` | Bearer-token header gate (`Authorization: Bearer <token>`) |
| `w:CIDR` | IP whitelist (single IP or CIDR, repeatable) |
| `co` | Add `Access-Control-Allow-Origin: *` (CORS) |
| `x:https` | Force HTTPS — auto-redirect HTTP to HTTPS |
| `a:Name:Value` | Add request header |
| `u:Name:Value` | Update request header |
| `r:Name` | Remove request header |
| `qr` | Print a QR code of the URL to stdout (handy for mobile sharing) |
Combine freely: `"b:admin:secret+co+x:https+free@a.pinggy.io"`.
## Web Debugger (optional)
Pinggy can mirror the inbound traffic to `localhost:4300` for inspection. Add a local forward to the SSH command:
```bash
ssh -p 443 -L4300:localhost:4300 -R0:localhost:8000 free@a.pinggy.io
```
Then open `http://localhost:4300` in a browser to see live request/response pairs.
## Pitfalls
- **60-minute hard cap on the free tier.** The SSH session terminates at the 60-minute mark; the URL goes dead. For longer shares, either use `PINGGY_TOKEN` (Pro) or auto-restart with a shell loop (note that the URL changes on every restart for free-tier).
- **Free-tier URL is random and changes on restart.** Don't bookmark it, don't paste it into a config file. Re-parse from the log each time.
- **Concurrent free tunnels are limited to one per source IP.** Starting a second tunnel from the same machine usually kills the first. Pro tier lifts this.
- **`+` in usernames must be quoted.** Bare `ssh ... b:admin:secret+free@a.pinggy.io` works in bash but breaks under shells that treat `+` specially or when assembled programmatically. Always wrap in double quotes.
- **Don't tunnel anything sensitive without an access-control flag.** A bare HTTP tunnel is reachable by anyone with the URL. Use `b:`, `k:`, or `w:` for non-public services.
- **`process(action='log')` may miss SSH banner output.** Pinggy prints the URLs and then the SSH session goes interactive. Always redirect to a logfile and `grep` the file directly — same pattern as `cloudflared-quick-tunnel`.
- **Host-key prompt on first run.** Default OpenSSH config asks the user to accept Pinggy's host key. Always pass `-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null` for unattended runs.
- **TCP and TLS tunnels return a `<subdomain>.a.pinggy.online:<port>` pair, not an https URL.** Parse with a different regex (`tcp://` and a port). Don't assume every Pinggy tunnel is HTTP.
- **Pro mode requires the token as the username, not a flag.** Use `"$PINGGY_TOKEN+a.pinggy.io"` (no `free@`). With a token you can also add `:persistent` for a stable subdomain — see `pinggy.io/docs/`.
## Recipes
Composite patterns combining a local origin with a Pinggy tunnel. Each recipe is self-contained — start the origin, start the tunnel, parse the URL, hand it back to the user.
### Recipe 1 — Receive a webhook callback
Use this when an external service (Stripe, GitHub, Discord, AgentMail, etc.) needs to POST to a publicly reachable URL during a local task.
```bash
# 1. Tiny capturing server: every request gets appended to /tmp/webhook-hits.log
cat >/tmp/webhook-server.py <<'PY'
import http.server, json, datetime, pathlib
LOG = pathlib.Path("/tmp/webhook-hits.log")
class H(http.server.BaseHTTPRequestHandler):
def _capture(self):
n = int(self.headers.get("content-length") or 0)
body = self.rfile.read(n).decode("utf-8", "replace") if n else ""
rec = {"t": datetime.datetime.utcnow().isoformat(), "path": self.path,
"method": self.command, "headers": dict(self.headers), "body": body}
with LOG.open("a") as f: f.write(json.dumps(rec) + "\n")
self.send_response(200); self.send_header("content-type","application/json")
self.end_headers(); self.wfile.write(b'{"ok":true}\n')
def do_GET(self): self._capture()
def do_POST(self): self._capture()
def log_message(self,*a,**k): pass
http.server.HTTPServer(("127.0.0.1", 18080), H).serve_forever()
PY
nohup python3 /tmp/webhook-server.py >/tmp/webhook-server.log 2>&1 &
echo $! >/tmp/webhook-server.pid
# 2. Tunnel — bearer-token-gate so randos can't pollute the capture log
nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=30 \
-R0:localhost:18080 "k:$(openssl rand -hex 12)+free@a.pinggy.io" \
>/tmp/webhook-pinggy.log 2>&1 &
echo $! >/tmp/webhook-pinggy.pid
sleep 5
URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/webhook-pinggy.log | head -1)
echo "Webhook URL: $URL"
# 3. While the agent works, watch hits land
tail -f /tmp/webhook-hits.log
```
Hand `$URL` to the service that needs to call you. Teardown: `kill $(cat /tmp/webhook-server.pid) $(cat /tmp/webhook-pinggy.pid)`.
### Recipe 2 — Expose an MCP server over HTTP/SSE
Use when a remote MCP client (Claude Desktop on another machine, a teammate's editor, etc.) needs to reach an MCP server running on the local box. Only works for MCP servers that speak HTTP transport — stdio-mode servers can't be tunneled.
```bash
# 1. Start the MCP server in HTTP mode (example: a FastMCP server on port 8765)
nohup python3 my_mcp_server.py --transport http --port 8765 \
>/tmp/mcp-server.log 2>&1 &
echo $! >/tmp/mcp-server.pid
# 2. Tunnel with a bearer token — MCP traffic should not be open to the internet
TOKEN=$(openssl rand -hex 16)
nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=30 \
-R0:localhost:8765 "k:$TOKEN+free@a.pinggy.io" \
>/tmp/mcp-pinggy.log 2>&1 &
echo $! >/tmp/mcp-pinggy.pid
sleep 5
URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/mcp-pinggy.log | head -1)
echo "MCP URL: $URL"
echo "Bearer token: $TOKEN"
```
The remote client connects to `$URL` with `Authorization: Bearer $TOKEN`. Hermes' own native MCP client config: `{"transport": "http", "url": "<URL>", "headers": {"Authorization": "Bearer <TOKEN>"}}`.
### Recipe 3 — Expose a local LLM endpoint (Ollama / vLLM / llama.cpp)
Share a local model with a remote caller (another agent, a phone, a teammate). Ollama listens on `:11434`, vLLM and llama.cpp typically on `:8000`.
```bash
# Pre-req: the model server is already running on 127.0.0.1:11434 (Ollama default)
TOKEN=$(openssl rand -hex 16)
nohup ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=30 \
-R0:localhost:11434 "k:$TOKEN+co+free@a.pinggy.io" \
>/tmp/llm-pinggy.log 2>&1 &
echo $! >/tmp/llm-pinggy.pid
sleep 5
URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/llm-pinggy.log | head -1)
echo "Endpoint: $URL"
echo "Token: $TOKEN"
# Verify
curl -s "$URL/api/tags" -H "Authorization: Bearer $TOKEN" | head
```
`co` enables CORS so a browser caller can hit the endpoint. Drop `co` for backend-only callers. For an OpenAI-compatible vLLM/llama.cpp endpoint, callers use base URL `$URL/v1` with `Authorization: Bearer $TOKEN` — but note Pinggy strips/replaces nothing in the body, so the model server itself sees Pinggy's token; the local server should be configured to ignore auth (it's already on `127.0.0.1`) and let Pinggy do the gating.
### Recipe 4 — Share a dev server with a one-shot password
The fastest "let a teammate poke at my running app" pattern. Random password, prints once, dies when you Ctrl-C.
```bash
PASS=$(openssl rand -base64 12 | tr -d '+/=' | head -c 12)
echo "Dev server password: $PASS"
ssh -p 443 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
-o ServerAliveInterval=30 \
-R0:localhost:3000 "b:dev:$PASS+co+x:https+free@a.pinggy.io"
# URL prints to the terminal. Share URL + password. Ctrl-C to tear down.
```
`b:dev:$PASS` gates the URL with HTTP Basic auth. `x:https` forces TLS. `co` adds CORS for SPA frontends.
## Verification
```bash
# End-to-end: spin up a trivial origin, tunnel it, hit it, tear down
python3 -m http.server 18000 --bind 127.0.0.1 >/tmp/origin.log 2>&1 &
ORIGIN_PID=$!
nohup ssh -p 443 \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
-R0:localhost:18000 free@a.pinggy.io >/tmp/pinggy-verify.log 2>&1 &
SSH_PID=$!
sleep 5
URL=$(grep -oE 'https://[a-z0-9-]+\.[a-z]+\.pinggy\.link' /tmp/pinggy-verify.log | head -1)
echo "URL: $URL"
curl -sI "$URL/" | head -1
kill "$SSH_PID" "$ORIGIN_PID"
```
Expected: a `pinggy.link` URL and `HTTP/2 200` on the curl head.

View file

@ -0,0 +1,199 @@
---
name: darwinian-evolver
description: Evolve prompts/regex/SQL/code with Imbue's evolution loop.
version: 0.1.0
author: Bihruze (Asahi0x), Hermes Agent
license: MIT
platforms: [linux, macos]
metadata:
hermes:
tags: [evolution, optimization, prompt-engineering, research]
related_skills: [arxiv, jupyter-live-kernel]
---
# Darwinian Evolver
Run Imbue's [darwinian_evolver](https://github.com/imbue-ai/darwinian_evolver) — an
LLM-driven evolutionary search loop — to optimize a **prompt, regex, SQL query,
or small code snippet** against a fitness function.
Status: thin wrapper around the upstream tool. The skill installs it, walks the
agent through writing a `Problem` definition (organism + evaluator + mutator),
and drives the loop via the upstream CLI or a small custom Python driver.
**License:** the upstream tool is **AGPL-3.0**. The skill ONLY ever invokes it
via the upstream CLI or a `subprocess`/`uv run` call (mere aggregation). Do NOT
import upstream classes into Hermes itself.
## When to Use
- User says "optimize this prompt", "evolve a regex for X", "auto-improve this
code/SQL", "search for a better instruction".
- You have a scorer (exact match, regex pass-rate, unit test, LLM-judge, runtime
metric) AND a starting candidate (organism). If you don't have a scorer, stop
and define one first — that's the hard part.
- Cost is OK: a typical run is 50500 LLM calls. On gpt-4o-mini that's pennies;
on Claude Sonnet it can be a few dollars.
Do **not** use this when:
- The optimization target is differentiable (use gradient descent / DSPy).
- You only need to try 23 variants — just write them by hand.
- The fitness signal is purely subjective with no measurable criterion.
## Prerequisites
- Python ≥3.11
- `git`, `uv` (or `pip`)
- One of: `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, or `OPENAI_API_KEY`
The skill ships a small `parrot_openrouter.py` driver that uses `OPENROUTER_API_KEY`
via the OpenAI SDK, so any model on OpenRouter works. The upstream CLI itself
hardcodes Anthropic and needs `ANTHROPIC_API_KEY`.
## Install (One-Time)
Run via the `terminal` tool:
```bash
mkdir -p ~/.hermes/cache/darwinian-evolver && cd ~/.hermes/cache/darwinian-evolver
[ -d darwinian_evolver ] || git clone --depth 1 https://github.com/imbue-ai/darwinian_evolver.git
cd darwinian_evolver && uv sync
```
Verify:
```bash
cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver \
&& uv run darwinian_evolver --help | head -5
```
## Quick Start — The Built-In Parrot Example
Tiny smoke test (requires `ANTHROPIC_API_KEY`):
```bash
cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver
uv run darwinian_evolver parrot \
--num_iterations 2 \
--num_parents_per_iteration 2 \
--mutator_concurrency 2 --evaluator_concurrency 2 \
--output_dir /tmp/parrot_demo
```
Outputs:
- `/tmp/parrot_demo/snapshots/iteration_N.pkl` — pickled population per iteration
- `/tmp/parrot_demo/<jsonl>` — per-iteration JSON log (path printed at end)
Open `~/.hermes/cache/darwinian-evolver/darwinian_evolver/darwinian_evolver/lineage_visualizer.html`
in a browser and load the JSON log to see the evolutionary tree.
## Quick Start — OpenRouter Driver (No Anthropic Key)
The skill ships `scripts/parrot_openrouter.py` — same parrot problem, but the
LLM call goes through OpenRouter so any provider works.
```bash
# From wherever the skill is installed:
SKILL_DIR=~/.hermes/skills/research/darwinian-evolver
DE_DIR=~/.hermes/cache/darwinian-evolver/darwinian_evolver
cd "$DE_DIR" && \
EVOLVER_MODEL='openai/gpt-4o-mini' \
uv run --with openai python "$SKILL_DIR/scripts/parrot_openrouter.py" \
--num_iterations 3 --num_parents_per_iteration 2 \
--output_dir /tmp/parrot_or
```
Inspect the result with `scripts/show_snapshot.py`:
```bash
uv run --with openai python "$SKILL_DIR/scripts/show_snapshot.py" \
/tmp/parrot_or/snapshots/iteration_3.pkl
```
Expected output: 7 evolved prompt templates ranked by score, with the best
landing around 0.60.8 (the seed `Say {{ phrase }}` scored 0.000).
## Defining a Custom Problem
The skill ships `templates/custom_problem_template.py` — copy, edit, run.
Three things you must define:
1. **`Organism`** — a Pydantic `BaseModel` subclass holding the artifact being
evolved (`prompt_template: str`, `regex_pattern: str`, `sql_query: str`,
`code_block: str`, etc.). Add a `run(*args)` method that exercises it.
2. **`Evaluator`** — `.evaluate(organism) -> EvaluationResult(score=..., trainable_failure_cases=[...], holdout_failure_cases=[...], is_viable=True)`.
- **`score`** is in `[0, 1]`. Higher is better.
- **`trainable_failure_cases`** — what the mutator sees. Include enough
context (input, expected, actual) for the LLM to diagnose.
- **`holdout_failure_cases`** — kept out of the mutator's view. Use these
to detect overfitting.
- **`is_viable=True`** unless the organism is completely broken (raises,
returns None, etc.). A 0-score viable organism is fine — it just gets
down-weighted in parent selection.
3. **`Mutator`** — `.mutate(organism, failure_cases, learning_log_entries) -> list[Organism]`.
Typically: build an LLM prompt that includes the current organism + a
failure case + an ask to propose a fix; parse the LLM's response; return
a new `Organism`. Return `[]` on parse failure — the loop handles it.
Then write a driver script that wires `Problem(initial_organism, evaluator, [mutators])`
into `EvolveProblemLoop` and iterates over `loop.run(num_iterations=N)` — the
shipped `scripts/parrot_openrouter.py` is the reference.
## Hyperparameters That Actually Matter
| flag | default | when to change |
|---|---|---|
| `--num_iterations` | 5 | bump to 1020 once you trust the evaluator |
| `--num_parents_per_iteration` | 4 | drop to 2 for cheap exploration |
| `--mutator_concurrency` | 10 | drop to 24 to avoid rate limits |
| `--evaluator_concurrency` | 10 | same; evaluator hits the LLM too |
| `--batch_size` | 1 | raise to 35 once your mutator handles multiple failures |
| `--verify_mutations` | off | turn on once mutator is wasteful (>10× cost saving on later runs per Imbue) |
| `--midpoint_score` | `p75` | leave alone unless scores cluster |
| `--sharpness` | 10 | leave alone |
## Pitfalls
1. **`Initial organism must be viable`** — set `is_viable=True` in your
`EvaluationResult` even on a 0-score seed. The loop refuses non-viable
organisms because they imply the loop has nothing to evolve from.
2. **Provider content filters kill runs.** Azure-backed OpenRouter models
reject phrases like "ignore previous instructions" with HTTP 400. Wrap
the LLM call in `try/except` and return `f"<LLM_ERROR: {e}>"` — the
evolver will just score that organism 0 and move on.
3. **`loop.run()` is a generator** — calling it doesn't run anything until
you iterate. Use `for snap in loop.run(num_iterations=N):`.
4. **Snapshots are nested pickles.** `iteration_N.pkl` contains a dict with
`population_snapshot` (more pickled bytes). To unpickle you must have the
`Organism` class importable under the same dotted path it was pickled at.
5. **Concurrency defaults are aggressive.** 10/10 will hit rate limits on
most providers. Start with 2/2.
6. **CLI is hardcoded to Anthropic.** `uv run darwinian_evolver <problem>`
reaches for `ANTHROPIC_API_KEY` and uses Claude Sonnet. To use any other
provider, write a driver like `parrot_openrouter.py`.
7. **AGPL.** Never `from darwinian_evolver import ...` inside Hermes core.
Custom driver scripts under `~/.hermes/skills/...` are user-side and fine.
8. **No PyPI package.** `pip install darwinian-evolver` will pull the wrong
thing. Always install from the GitHub repo.
## Verification
After install + a parrot run, exit code 0 from this is sufficient:
```bash
DE_DIR=~/.hermes/cache/darwinian-evolver/darwinian_evolver
ls "$DE_DIR/darwinian_evolver/lineage_visualizer.html" >/dev/null && \
cd "$DE_DIR" && uv run darwinian_evolver --help >/dev/null && \
echo "darwinian-evolver: OK"
```
## References
- [Imbue research post](https://imbue.com/research/2026-02-27-darwinian-evolver/)
- [ARC-AGI-2 results](https://imbue.com/research/2026-02-27-arc-agi-2-evolution/)
- [imbue-ai/darwinian_evolver](https://github.com/imbue-ai/darwinian_evolver) (AGPL-3.0)
- [Darwin Gödel Machines](https://arxiv.org/abs/2505.22954)
- [PromptBreeder](https://arxiv.org/abs/2309.16797)

View file

@ -0,0 +1,218 @@
"""
parrot_openrouter: same as the upstream `parrot` example but the LLM call goes
through OpenRouter (OpenAI SDK) instead of Anthropic native. Lets us run an
end-to-end evolution with whatever model the user already has paid access to.
Run with:
uv --project darwinian_evolver run python parrot_openrouter.py \
--num_iterations 3 --output_dir /tmp/parrot_out
Reads `OPENROUTER_API_KEY` from the environment.
"""
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
import jinja2
from openai import OpenAI
# Vendored problem types from upstream (AGPL — only run via subprocess in production)
from darwinian_evolver.cli_common import build_hyperparameter_config_from_args
from darwinian_evolver.cli_common import register_hyperparameter_args
from darwinian_evolver.cli_common import parse_learning_log_view_type
from darwinian_evolver.evolve_problem_loop import EvolveProblemLoop
from darwinian_evolver.learning_log import LearningLogEntry
from darwinian_evolver.problem import EvaluationFailureCase
from darwinian_evolver.problem import EvaluationResult
from darwinian_evolver.problem import Evaluator
from darwinian_evolver.problem import Mutator
from darwinian_evolver.problem import Organism
from darwinian_evolver.problem import Problem
DEFAULT_MODEL = os.environ.get("EVOLVER_MODEL", "openai/gpt-4o-mini")
def _client() -> OpenAI:
key = os.environ.get("OPENROUTER_API_KEY")
if not key:
sys.exit("OPENROUTER_API_KEY is not set")
return OpenAI(api_key=key, base_url="https://openrouter.ai/api/v1")
def _prompt_llm(prompt: str) -> str:
try:
r = _client().chat.completions.create(
model=DEFAULT_MODEL,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
return r.choices[0].message.content or ""
except Exception as e:
# Treat any provider error (rate limit, content filter, schema reject)
# as a failed response. The evolver will simply see this as a low score
# on this organism and move on — much friendlier than killing the run.
return f"<LLM_ERROR: {type(e).__name__}: {e}>"
class ParrotOrganism(Organism):
prompt_template: str
def run(self, phrase: str) -> str:
try:
prompt = jinja2.Template(self.prompt_template).render(phrase=phrase)
except jinja2.exceptions.TemplateError as e:
return f"Error rendering prompt: {e}"
if not prompt:
return ""
return _prompt_llm(prompt)
class ParrotEvaluationFailureCase(EvaluationFailureCase):
phrase: str
response: str
class ImproveParrotMutator(Mutator[ParrotOrganism, ParrotEvaluationFailureCase]):
IMPROVEMENT_PROMPT_TEMPLATE = """
We want to build a prompt that causes an LLM to repeat back a given phrase verbatim.
The current prompt template is:
```
{{ organism.prompt_template }}
```
Unfortunately, on this phrase:
```
{{ failure_case.phrase }}
```
the LLM responded with:
```
{{ failure_case.response }}
```
Diagnose what went wrong, then propose an improved prompt template. Put the new
template in the LAST triple-backtick block of your response.
""".strip()
def mutate(
self,
organism: ParrotOrganism,
failure_cases: list[ParrotEvaluationFailureCase],
learning_log_entries: list[LearningLogEntry],
) -> list[ParrotOrganism]:
fc = failure_cases[0]
prompt = jinja2.Template(self.IMPROVEMENT_PROMPT_TEMPLATE).render(
organism=organism, failure_case=fc
)
try:
resp = _prompt_llm(prompt)
parts = resp.split("```")
if len(parts) < 3:
return []
new_tpl = parts[-2].strip()
return [ParrotOrganism(prompt_template=new_tpl)]
except Exception as e:
print(f"mutate error: {e}", file=sys.stderr)
return []
class ParrotEvaluator(Evaluator[ParrotOrganism, EvaluationResult, ParrotEvaluationFailureCase]):
TRAINABLE_PHRASES = [
"Hello world.",
"bla",
"Bla",
"bla.",
'"bla bla".',
"Just say 'foo' once with no extra words.",
]
HOLDOUT_PHRASES = [
"bla, but only once.",
"'bla'",
]
def evaluate(self, organism: ParrotOrganism) -> EvaluationResult:
train_fails: list[ParrotEvaluationFailureCase] = []
hold_fails: list[ParrotEvaluationFailureCase] = []
for i, p in enumerate(self.TRAINABLE_PHRASES):
r = organism.run(p)
if r != p:
train_fails.append(ParrotEvaluationFailureCase(
phrase=p, response=r, data_point_id=f"trainable_{i}"))
for i, p in enumerate(self.HOLDOUT_PHRASES):
r = organism.run(p)
if r != p:
hold_fails.append(ParrotEvaluationFailureCase(
phrase=p, response=r, data_point_id=f"holdout_{i}"))
n_total = len(self.TRAINABLE_PHRASES) + len(self.HOLDOUT_PHRASES)
n_ok = n_total - len(train_fails) - len(hold_fails)
return EvaluationResult(
score=n_ok / n_total,
trainable_failure_cases=train_fails,
holdout_failure_cases=hold_fails,
# Always viable. Even a 0-score seed is a valid starting point; the
# mutator should still get a chance to fix it.
is_viable=True,
)
def make_problem() -> Problem:
return Problem[ParrotOrganism, EvaluationResult, ParrotEvaluationFailureCase](
evaluator=ParrotEvaluator(),
mutators=[ImproveParrotMutator()],
initial_organism=ParrotOrganism(prompt_template="Say {{ phrase }}"),
)
def main() -> int:
ap = argparse.ArgumentParser()
register_hyperparameter_args(ap.add_argument_group("hyperparameters"))
ap.add_argument("--num_iterations", type=int, default=3)
ap.add_argument("--mutator_concurrency", type=int, default=4)
ap.add_argument("--evaluator_concurrency", type=int, default=4)
ap.add_argument("--output_dir", type=str, required=True)
args = ap.parse_args()
out = Path(args.output_dir)
out.mkdir(parents=True, exist_ok=True)
hp = build_hyperparameter_config_from_args(args)
loop = EvolveProblemLoop(
problem=make_problem(),
learning_log_view_type=parse_learning_log_view_type(hp.learning_log_view_type),
num_parents_per_iteration=hp.num_parents_per_iteration,
mutator_concurrency=args.mutator_concurrency,
evaluator_concurrency=args.evaluator_concurrency,
fixed_midpoint_score=hp.fixed_midpoint_score,
midpoint_score_percentile=hp.midpoint_score_percentile,
sharpness=hp.sharpness,
novelty_weight=hp.novelty_weight,
batch_size=hp.batch_size,
should_verify_mutations=hp.verify_mutations,
)
import json
log_path = out / "results.jsonl"
snap_dir = out / "snapshots"
snap_dir.mkdir(exist_ok=True)
print("Evaluating initial organism...")
for snap in loop.run(num_iterations=args.num_iterations):
(snap_dir / f"iteration_{snap.iteration}.pkl").write_bytes(snap.snapshot)
_, best_eval = snap.best_organism_result
print(f"iter={snap.iteration} pop={snap.population_size} "
f"best_score={best_eval.score:.3f}")
with log_path.open("a") as f:
f.write(json.dumps({
"iteration": snap.iteration,
"best_score": best_eval.score,
"pop_size": snap.population_size,
"score_percentiles": {str(k): v for k, v in snap.score_percentiles.items()},
}) + "\n")
print(f"\nDone. Results in: {out}")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,69 @@
"""
show_snapshot.py Dump the population from a darwinian-evolver snapshot pickle.
Usage:
python show_snapshot.py PATH/TO/iteration_N.pkl [--field prompt_template]
The script is intentionally Organism-agnostic: it walks `org.__dict__` and prints
all str fields. By default it shows `prompt_template` if present; pass --field to
target a different attribute (e.g. `regex_pattern`, `sql_query`, `code_block`).
"""
from __future__ import annotations
import argparse
import pickle
import sys
from pathlib import Path
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("snapshot", type=Path)
ap.add_argument(
"--field",
default=None,
help="Organism attribute to display. Defaults to the first str field found.",
)
ap.add_argument("--top", type=int, default=None, help="Show only top N by score.")
args = ap.parse_args()
if not args.snapshot.exists():
sys.exit(f"snapshot not found: {args.snapshot}")
# The outer pickle wraps a dict; the inner pickle contains the actual organism
# objects, which must be importable under their original dotted path. If you
# ran a custom driver, make sure its module is on sys.path before calling this.
outer = pickle.loads(args.snapshot.read_bytes())
if not isinstance(outer, dict) or "population_snapshot" not in outer:
sys.exit("not a darwinian-evolver snapshot (no population_snapshot key)")
inner = pickle.loads(outer["population_snapshot"])
pairs = inner["organisms"] # list of (Organism, EvaluationResult)
print(f"# organisms: {len(pairs)}\n")
ranked = sorted(pairs, key=lambda p: getattr(p[1], "score", 0) or 0, reverse=True)
if args.top:
ranked = ranked[: args.top]
for i, (org, res) in enumerate(ranked):
score = getattr(res, "score", float("nan"))
print(f"=== rank {i} score={score:.3f} ===")
# pick field
field = args.field
if field is None:
for k, v in vars(org).items():
if isinstance(v, str) and not k.startswith("_") and k not in ("id",):
field = k
break
val = getattr(org, field, None) if field else None
if val is None:
print(f" (no string field; org fields: {list(vars(org).keys())})")
else:
print(f" {field} ({len(val)} chars):")
for ln in val.splitlines()[:30]:
print(f" {ln}")
print()
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,240 @@
"""
Template: a custom darwinian-evolver problem.
Copy this file, fill in the THREE marked spots (Organism, Evaluator, Mutator),
then run it as a driver script. The skeleton handles all the wiring so you only
write the domain-specific logic.
To run:
cd ~/.hermes/cache/darwinian-evolver/darwinian_evolver
OPENROUTER_API_KEY=... uv run --with openai python /path/to/this_file.py \
--num_iterations 3 --num_parents_per_iteration 2 \
--output_dir /tmp/my_problem
The pattern mirrors `scripts/parrot_openrouter.py` (the working reference).
"""
from __future__ import annotations
import argparse
import os
import sys
from pathlib import Path
from openai import OpenAI
# Upstream types (AGPL — invoked via subprocess in production; importing here
# is fine for skill-side driver scripts the user owns).
from darwinian_evolver.cli_common import (
build_hyperparameter_config_from_args,
parse_learning_log_view_type,
register_hyperparameter_args,
)
from darwinian_evolver.evolve_problem_loop import EvolveProblemLoop
from darwinian_evolver.learning_log import LearningLogEntry
from darwinian_evolver.problem import (
EvaluationFailureCase,
EvaluationResult,
Evaluator,
Mutator,
Organism,
Problem,
)
DEFAULT_MODEL = os.environ.get("EVOLVER_MODEL", "openai/gpt-4o-mini")
def _client() -> OpenAI:
key = os.environ.get("OPENROUTER_API_KEY")
if not key:
sys.exit("OPENROUTER_API_KEY is not set")
return OpenAI(api_key=key, base_url="https://openrouter.ai/api/v1")
def _prompt_llm(prompt: str, max_tokens: int = 1024) -> str:
try:
r = _client().chat.completions.create(
model=DEFAULT_MODEL,
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
)
return r.choices[0].message.content or ""
except Exception as e:
# Never let one bad LLM response kill the run.
return f"<LLM_ERROR: {type(e).__name__}: {e}>"
# ---------------------------------------------------------------------------
# 1. ORGANISM — what you are evolving.
# ---------------------------------------------------------------------------
class MyOrganism(Organism):
# TODO: replace with your artifact field. Common shapes:
# prompt_template: str
# regex_pattern: str
# sql_query: str
# code_block: str
artifact: str
def run(self, *inputs) -> str:
"""Exercise the organism on a test input. Return whatever your
evaluator wants to score."""
# TODO: implement. For prompt evolution this typically calls _prompt_llm
# with the artifact rendered against the input. For regex/SQL it would
# call `re.findall(self.artifact, input)` / execute SQL / etc.
raise NotImplementedError
# ---------------------------------------------------------------------------
# 2. EVALUATOR — score organisms and surface failures the mutator can learn from.
# ---------------------------------------------------------------------------
class MyFailureCase(EvaluationFailureCase):
# TODO: include enough context for the LLM to diagnose the failure.
input: str
expected: str
actual: str
class MyEvaluator(Evaluator[MyOrganism, EvaluationResult, MyFailureCase]):
# Split your dataset. Mutator only sees trainable; holdout detects overfitting.
TRAINABLE = [
# TODO: list of (input, expected) tuples
# ("input1", "expected1"),
]
HOLDOUT = [
# TODO: separate set the mutator never sees
]
def evaluate(self, organism: MyOrganism) -> EvaluationResult:
train_fails: list[MyFailureCase] = []
hold_fails: list[MyFailureCase] = []
for i, (inp, expected) in enumerate(self.TRAINABLE):
actual = organism.run(inp)
if actual != expected:
train_fails.append(MyFailureCase(
input=inp, expected=expected, actual=actual,
data_point_id=f"trainable_{i}",
))
for i, (inp, expected) in enumerate(self.HOLDOUT):
actual = organism.run(inp)
if actual != expected:
hold_fails.append(MyFailureCase(
input=inp, expected=expected, actual=actual,
data_point_id=f"holdout_{i}",
))
n_total = len(self.TRAINABLE) + len(self.HOLDOUT)
n_ok = n_total - len(train_fails) - len(hold_fails)
return EvaluationResult(
score=n_ok / n_total if n_total else 0.0,
trainable_failure_cases=train_fails,
holdout_failure_cases=hold_fails,
# Always-viable. The evolver only blocks completely-broken organisms;
# a 0-score organism is fine and will simply be sampled less often.
is_viable=True,
)
# ---------------------------------------------------------------------------
# 3. MUTATOR — LLM proposes an improved organism from a failure case.
# ---------------------------------------------------------------------------
class MyMutator(Mutator[MyOrganism, MyFailureCase]):
PROMPT = """
The current artifact is:
```
{artifact}
```
On this input:
```
{input}
```
it produced:
```
{actual}
```
but we wanted:
```
{expected}
```
Diagnose what went wrong, then propose an improved version of the artifact.
Put the new version in the LAST triple-backtick block of your response.
""".strip()
def mutate(
self,
organism: MyOrganism,
failure_cases: list[MyFailureCase],
learning_log_entries: list[LearningLogEntry],
) -> list[MyOrganism]:
fc = failure_cases[0]
prompt = self.PROMPT.format(
artifact=organism.artifact,
input=fc.input,
actual=fc.actual,
expected=fc.expected,
)
resp = _prompt_llm(prompt)
parts = resp.split("```")
if len(parts) < 3:
return []
new_artifact = parts[-2].strip()
# Strip an opening language tag like "python\n" or "sql\n"
if "\n" in new_artifact:
first_line, rest = new_artifact.split("\n", 1)
if first_line and not first_line.startswith(" ") and len(first_line) < 20:
new_artifact = rest
return [MyOrganism(artifact=new_artifact)]
# ---------------------------------------------------------------------------
# Driver — fills in the EvolveProblemLoop boilerplate. You shouldn't need to
# touch anything below this line for a typical run.
# ---------------------------------------------------------------------------
def make_problem() -> Problem:
initial = MyOrganism(artifact="TODO: starting artifact here") # TODO
return Problem[MyOrganism, EvaluationResult, MyFailureCase](
evaluator=MyEvaluator(),
mutators=[MyMutator()],
initial_organism=initial,
)
def main() -> int:
ap = argparse.ArgumentParser()
register_hyperparameter_args(ap.add_argument_group("hyperparameters"))
ap.add_argument("--num_iterations", type=int, default=3)
ap.add_argument("--mutator_concurrency", type=int, default=2)
ap.add_argument("--evaluator_concurrency", type=int, default=2)
ap.add_argument("--output_dir", type=str, required=True)
args = ap.parse_args()
out = Path(args.output_dir)
out.mkdir(parents=True, exist_ok=True)
(out / "snapshots").mkdir(exist_ok=True)
hp = build_hyperparameter_config_from_args(args)
loop = EvolveProblemLoop(
problem=make_problem(),
learning_log_view_type=parse_learning_log_view_type(hp.learning_log_view_type),
num_parents_per_iteration=hp.num_parents_per_iteration,
mutator_concurrency=args.mutator_concurrency,
evaluator_concurrency=args.evaluator_concurrency,
fixed_midpoint_score=hp.fixed_midpoint_score,
midpoint_score_percentile=hp.midpoint_score_percentile,
sharpness=hp.sharpness,
novelty_weight=hp.novelty_weight,
batch_size=hp.batch_size,
should_verify_mutations=hp.verify_mutations,
)
print("Evaluating initial organism...")
for snap in loop.run(num_iterations=args.num_iterations):
(out / "snapshots" / f"iteration_{snap.iteration}.pkl").write_bytes(snap.snapshot)
_, best = snap.best_organism_result
print(f"iter={snap.iteration} pop={snap.population_size} best_score={best.score:.3f}")
print(f"\nDone. Results in: {out}")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,277 @@
---
name: osint-investigation
description: Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback Machine archives, Wikipedia + Wikidata, GDELT news monitoring. Entity resolution across sources, cross-link analysis, timing correlation, evidence chains. Python stdlib only.
version: 0.1.0
platforms: [linux, macos, windows]
author: Hermes Agent (adapted from ShinMegamiBoson/OpenPlanter, MIT)
metadata:
hermes:
tags: [osint, investigation, public-records, sec, sanctions, corporate-registry, property, courts, due-diligence, journalism]
category: research
related_skills: [domain-intel, arxiv]
---
# OSINT Investigation — Public Records Cross-Reference
Investigative framework for public-records OSINT: government contracts,
corporate filings, lobbying, sanctions, offshore leaks, property records,
court records, web archives, knowledge bases, and global news. Resolve
entities across heterogeneous sources, build cross-links with explicit
confidence, run statistical timing tests, and produce structured evidence
chains.
**Python stdlib only.** Zero install. Works on Linux, macOS, Windows. Most
sources work with no API key (OpenCorporates has an optional free token
that raises rate limits).
Adapted from the MIT-licensed ShinMegamiBoson/OpenPlanter project; expanded
to cover identity / property / litigation / archives / news sources that
the original didn't address.
## When to use this skill
Use when the user asks for:
- "follow the money" — government contracts, lobbying → legislation, sanctions
- corporate due diligence — who controls company X, where are they
incorporated, who serves on their boards, what filings have they made
- sanctions screening — is entity X on OFAC SDN, ICIJ offshore leaks
- pay-to-play investigation — contractors with offshore ties, lobbying
clients winning awards
- property ownership — find recorded deeds/mortgages by name or address
(NYC; for other counties point users at the relevant recorder)
- litigation history — find federal + state court opinions and PACER dockets
- multi-source entity resolution where naming varies (LLC suffixes, abbreviations)
- evidence-chain construction with explicit confidence levels
- "what's been said about X" — international news (GDELT) + Wikipedia
narrative + Wayback Machine to recover dead URLs
Do NOT use this skill for:
- general web research → `web_search` / `web_extract`
- domain/infrastructure OSINT → `domain-intel` skill
- academic literature → `arxiv` skill
- social-media profile discovery → `sherlock` skill (optional)
- US **federal** campaign finance — FEC is intentionally NOT covered here
(the API is unreliable for ad-hoc contributor-name queries on the free
DEMO_KEY tier). For federal donations, point users at
https://www.fec.gov/data/ directly.
## Workflow
The agent runs scripts via the `terminal` tool. `SKILL_DIR` is the directory
holding this SKILL.md.
### 1. Identify which sources apply
Read the data-source wiki entries to plan the investigation:
```
ls SKILL_DIR/references/sources/
# Federal financial / regulatory
cat SKILL_DIR/references/sources/sec-edgar.md # corporate filings
cat SKILL_DIR/references/sources/usaspending.md # federal contracts
cat SKILL_DIR/references/sources/senate-ld.md # lobbying
cat SKILL_DIR/references/sources/ofac-sdn.md # sanctions
cat SKILL_DIR/references/sources/icij-offshore.md # offshore leaks
# Identity / property / litigation / archives / news
cat SKILL_DIR/references/sources/nyc-acris.md # NYC property records
cat SKILL_DIR/references/sources/opencorporates.md # global corporate registry
cat SKILL_DIR/references/sources/courtlistener.md # court records (federal + state)
cat SKILL_DIR/references/sources/wayback.md # Wayback Machine archives
cat SKILL_DIR/references/sources/wikipedia.md # Wikipedia + Wikidata
cat SKILL_DIR/references/sources/gdelt.md # global news monitoring
```
Each entry follows a 9-section template: summary, access, schema, coverage,
cross-reference keys, data quality, acquisition, legal, references.
The **cross-reference potential** section maps join keys between sources — read
those first to pick the right pair.
### 2. Acquire data
Each source has a stdlib-only fetch script in `SKILL_DIR/scripts/`:
**Federal financial / regulatory**
```bash
# SEC EDGAR filings (corporate disclosures)
python3 SKILL_DIR/scripts/fetch_sec_edgar.py --cik 0000320193 \
--types 10-K,10-Q --out data/edgar_filings.csv
# USAspending federal contracts
python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
--fy 2024 --out data/contracts.csv
# Senate LD-1 / LD-2 lobbying disclosures
python3 SKILL_DIR/scripts/fetch_senate_ld.py --client "EXAMPLE CORP" \
--year 2024 --out data/lobbying.csv
# OFAC SDN sanctions list (full snapshot)
python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --out data/ofac_sdn.csv
# ICIJ Offshore Leaks — downloads ~70 MB bulk CSV on first use,
# then searches it locally. Cached for 30 days under
# $HERMES_OSINT_CACHE/icij/ (default: ~/.cache/hermes-osint/icij/).
python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
--out data/icij.csv
```
**Identity / property / litigation / archives / news**
```bash
# NYC property records (deeds, mortgages, liens) — ACRIS via Socrata
python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "SMITH, JOHN" \
--out data/acris.csv
python3 SKILL_DIR/scripts/fetch_nyc_acris.py --address "571 HUDSON" \
--out data/acris_addr.csv
# OpenCorporates — 130+ jurisdiction corporate registry
# (free token required; set OPENCORPORATES_API_TOKEN or pass --token)
python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
--jurisdiction us_ny --out data/opencorporates.csv
# CourtListener — federal + state court opinions, PACER dockets
python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Smith v. Example Corp" \
--type opinions --out data/courts.csv
# Wayback Machine — historical web captures
python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
--match host --collapse digest --out data/wayback.csv
# Wikipedia + Wikidata — narrative bio + structured facts
# Set HERMES_OSINT_UA=your-app/1.0 (your@email) to identify yourself
python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Bill Gates" \
--out data/wp.csv
# GDELT — global news in 100+ languages, ~2015→present
python3 SKILL_DIR/scripts/fetch_gdelt.py --query '"Example Corp"' \
--timespan 1y --out data/gdelt.csv
```
All outputs are normalized CSV with a header row. Re-run scripts idempotently.
When a private individual won't be in a source (e.g. SEC EDGAR for a non-public-
company person, USAspending for someone who isn't a federal contractor, Senate
LDA for someone who isn't a lobbying client), the script returns 0 rows with a
clear warning rather than silently writing an empty CSV. EDGAR specifically
flags when the company-name resolver matched an individual Form 3/4/5 filer
rather than a corporate registrant.
Rate-limit notes are in each source's wiki entry. Default fetchers sleep
politely between paginated requests. **API keys raise rate limits** for
sources that support them (`SEC_USER_AGENT`, `SENATE_LDA_TOKEN`,
`OPENCORPORATES_API_TOKEN`, `COURTLISTENER_TOKEN`). All scripts surface
429 responses immediately with the upstream's quota message so the user
knows to slow down or supply a key.
### 3. Resolve entities across sources
Normalize names and find matches between two CSV files:
```bash
# Match lobbying clients (Senate LDA) against contract recipients (USAspending)
python3 SKILL_DIR/scripts/entity_resolution.py \
--left data/lobbying.csv --left-name-col client_name \
--right data/contracts.csv --right-name-col recipient_name \
--out data/cross_links.csv
```
Three matching tiers with explicit confidence:
| Tier | Method | Confidence |
|------|--------|------------|
| `exact` | Normalized strings equal after suffix/punctuation strip | high |
| `fuzzy` | Sorted-token equality (word-bag match) | medium |
| `token_overlap` | ≥60% token overlap, ≥2 shared tokens, tokens ≥4 chars | low |
Output `cross_links.csv` columns: `match_type, confidence, left_name,
right_name, left_normalized, right_normalized, left_row, right_row`.
### 4. Statistical timing correlation (optional)
Test whether two time series cluster suspiciously close together — e.g.
lobbying filings near contract awards — using a permutation test:
```bash
python3 SKILL_DIR/scripts/timing_analysis.py \
--donations data/lobbying.csv --donation-date-col filing_date \
--donation-amount-col income --donation-donor-col client_name \
--donation-recipient-col registrant_name \
--contracts data/contracts.csv --contract-date-col award_date \
--contract-vendor-col recipient_name \
--cross-links data/cross_links.csv \
--permutations 1000 \
--out data/timing.json
```
The script's column flags are intentionally generic — the original tool was
written for donations vs awards, but it works for any (event, payee) time
series joined through cross-links. Null hypothesis: event timing is
independent of award dates. One-tailed p-value = fraction of permutations
with mean nearest-award distance ≤ observed. Minimum 3 events per (payer,
vendor) pair to run the test.
### 5. Build the findings JSON (evidence chain)
```bash
python3 SKILL_DIR/scripts/build_findings.py \
--cross-links data/cross_links.csv \
--timing data/timing.json \
--out data/findings.json
```
Every finding has `id, title, severity, confidence, summary, evidence[], sources[]`.
Each evidence item points back to a specific row in a source CSV. The user (or a
follow-up agent) can verify every claim against its source.
## Confidence and evidence discipline
This is the load-bearing rule of the skill. Tell the user:
- Every claim must trace to a record. No naked assertions.
- Confidence tier travels with the claim. `match_type=fuzzy` is "probable",
not "confirmed."
- Entity resolution produces candidates, NOT conclusions. A `fuzzy` match
between "ACME LLC" and "Acme Holdings Group" is a lead, not a fact.
- Statistical significance ≠ wrongdoing. p < 0.05 means the timing pattern
is unlikely under the null. It does not establish corruption.
- All data sources here are public records. They may still contain
inaccuracies, stale info, or redactions (GDPR, sealed records).
## Adding a new data source
Use the template:
```bash
cp SKILL_DIR/templates/source-template.md \
SKILL_DIR/references/sources/<your-source>.md
```
Fill in all 9 sections. Write a `fetch_<source>.py` script in `scripts/` that
uses stdlib only and writes a normalized CSV. Update the source list in the
"When to use" section above.
## Tools and their limits
- `entity_resolution.py` does NOT use external fuzzy libraries (no rapidfuzz,
no jellyfish). Token-bag matching is the upper bound here. If you need
Levenshtein, transliteration, or phonetic matching, pip-install separately.
- `timing_analysis.py` uses Python's `random` for permutations. For
reproducibility, pass `--seed N`.
- `fetch_*.py` scripts use `urllib.request` and respect `Retry-After`. Heavy
bulk usage may still violate ToS — read each source's legal section first.
## Legal note
All Phase-1 sources are public records. Bulk acquisition is permitted under
their respective access terms (FOIA, public records law, ICIJ explicit
publication, OFAC public data). However:
- Some sources rate-limit aggressively. Respect their headers.
- Some redact registrant info (GDPR on WHOIS, sealed filings).
- Cross-referencing public records to identify private individuals can have
ethical implications. The skill produces evidence chains, not accusations.

View file

@ -0,0 +1,98 @@
# CourtListener — Free Law Project
## 1. Summary
CourtListener (Free Law Project) aggregates court opinions, dockets, oral
arguments, and judge data. Covers ~10M federal and state court opinions
back to colonial America, plus PACER docket data from RECAP submissions.
## 2. Access Methods
- **REST API v4:** `https://www.courtlistener.com/api/rest/v4/`
- **Auth:** Anonymous reads allowed on most endpoints; token raises rate
limits and unlocks bulk export
- **Rate limit:** ~5,000 req/hour unauthenticated for search; higher with token
Set `COURTLISTENER_TOKEN` env var. Get a free token at
https://www.courtlistener.com/sign-in/ then create an API key.
## 3. Data Schema
Key fields emitted by `fetch_courtlistener.py`:
| Column | Type | Description |
|--------|------|-------------|
| `case_name` | str | Case name |
| `court` | str | Court name |
| `court_id` | str | Court ID (e.g. `nysd`, `scotus`, `ca9`) |
| `date_filed` | str | YYYY-MM-DD |
| `docket_number` | str | Court docket number |
| `judge` | str | Judge name(s) |
| `citation` | str | Reporter citation(s) |
| `result_type` | str | opinions / dockets / oral / people |
| `snippet` | str | Search-match snippet (up to 500 chars) |
| `absolute_url` | str | Direct CourtListener URL |
## 4. Coverage
- Federal: all circuit and district courts, SCOTUS
- State: all 50 state supreme/appellate courts, many trial courts
- Opinions: ~10M back to 1600s (colonial), full coverage 1950 → present
- Dockets via RECAP: ~3M+ from user-submitted PACER PDFs
- Updated continuously
## 5. Cross-Reference Potential
- **OpenCorporates**`case_name` (corporate litigation)
- **SEC EDGAR**`case_name` (securities class actions)
- **OFAC SDN**`case_name` (sanctions-related civil/criminal cases)
Join key: party name from `case_name`. Note: `case_name` often abbreviates
("Smith v. Jones" rather than full party names) — use the full case URL
to get all parties.
## 6. Data Quality
- Older opinions (pre-1990) often lack docket numbers and judges
- State coverage is more uneven than federal
- PACER docket coverage depends on RECAP user submissions — not exhaustive
- Sealed documents are excluded
- Party names in case captions don't always match filing names exactly
## 7. Acquisition Script
Path: `scripts/fetch_courtlistener.py`
```bash
# Search opinions for a party / keyword
python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Example Corp" \
--out data/cl.csv
# PACER dockets (best for recent litigation)
python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Example Corp" \
--type dockets --out data/cl_dockets.csv
# Restrict to a court
python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Microsoft" \
--court ca9 --out data/cl_9th.csv
# Date range
python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Example Corp" \
--date-from 2020-01-01 --date-to 2024-12-31 --out data/cl.csv
```
Pass `--token` or set `COURTLISTENER_TOKEN`.
## 8. Legal & Licensing
- Court opinions are public domain
- Free Law Project provides the data under CC0 / public domain dedication
- No commercial use restrictions on opinion text or metadata
- Some PACER PDFs have copyright on layout (not text) — fair use applies
## 9. References
- API docs: https://www.courtlistener.com/help/api/rest/
- Court IDs: https://www.courtlistener.com/api/jurisdictions/
- RECAP archive: https://www.courtlistener.com/recap/
- Bulk data: https://www.courtlistener.com/help/api/bulk-data/

View file

@ -0,0 +1,104 @@
# GDELT — Global News Monitoring
## 1. Summary
GDELT (Global Database of Events, Language, and Tone) monitors world news
in 100+ languages with full-text indexing. Updated every 15 minutes.
~2015 → present, ~1B+ articles indexed. Free anonymous access.
GDELT is wider than Google News (more international, more long-tail
sources) and indexed by tone/sentiment, themes (CAMEO codes), people, and
organizations.
## 2. Access Methods
- **DOC 2.0 API:** `https://api.gdeltproject.org/api/v2/doc/doc`
- **Events / GKG 2.0:** `https://api.gdeltproject.org/api/v2/events/events`
- **Auth:** None
- **Rate limit:** **1 request per 5 seconds** for the DOC API — strict
The fetch script automatically retries after a 6-second sleep when a
429 is received.
## 3. Data Schema
Key fields emitted by `fetch_gdelt.py`:
| Column | Type | Description |
|--------|------|-------------|
| `title` | str | Article title |
| `url` | str | Article URL |
| `seen_date` | str | When GDELT first saw the article (UTC) |
| `domain` | str | Publisher domain |
| `language` | str | Source language |
| `source_country` | str | 2-letter country code |
| `tone` | str | GDELT-computed tone score (negative = negative coverage) |
| `social_image` | str | Open Graph image URL when available |
## 4. Coverage
- Worldwide news in 100+ languages
- ~2015 → present (Events back to 1979 via a separate stream)
- Update frequency: 15 minutes
- Bias: heavily Anglophone in volume but very wide source list overall
## 5. Cross-Reference Potential
- **All sources**`title` / `url` (news context for any subject)
- **Wikipedia** ↔ event timeline for notable entities
- **Wayback Machine** ↔ recover articles whose URLs have died
- **OFAC SDN** ↔ news context for sanctions designations
- **SEC EDGAR** ↔ news context for 8-K material events
Join key: entity name appearing in article title or full-text. GDELT also
extracts named entities into a separate stream (GKG) not exposed by this
fetcher — query GDELT directly for entity-level filtering.
## 6. Data Quality
- Title extraction is automated and can be wrong (sometimes captures the
site name + delimiter + article title; sometimes a generic page title)
- Sentiment / tone is computed by GDELT, not source-supplied
- Some domains are oversampled (newswires, aggregators)
- Source country is inferred from domain registration / TLD — can be
wrong for international news sites with country-neutral domains
- Article URLs can rot — pair with Wayback Machine to preserve content
## 7. Acquisition Script
Path: `scripts/fetch_gdelt.py`
```bash
# Recent news mentioning an entity
python3 SKILL_DIR/scripts/fetch_gdelt.py --query "Nous Research" \
--timespan 6m --out data/gdelt.csv
# Phrase-exact (use double quotes inside single quotes for the shell)
python3 SKILL_DIR/scripts/fetch_gdelt.py --query '"Dillon Rolnick"' \
--timespan 1y --out data/gdelt.csv
# Filter to a country / language
python3 SKILL_DIR/scripts/fetch_gdelt.py --query "Microsoft" \
--source-country US --source-lang English --out data/gdelt.csv
# Date range
python3 SKILL_DIR/scripts/fetch_gdelt.py --query "Microsoft" \
--start 2024-01-01 --end 2024-12-31 --out data/gdelt.csv
```
GDELT supports its own query operators: phrase quoting, AND/OR/NOT,
`sourcecountry:US`, `theme:ECON_BANKRUPTCY`, `tone<-5`, etc.
See https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/ for syntax.
## 8. Legal & Licensing
- GDELT data is provided free for academic and journalistic use
- Article URLs link out to original publishers — copyright remains with
the publisher
- GDELT is NOT a content archive; it's a metadata index
## 9. References
- DOC 2.0 API: https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/
- Themes & query syntax: https://blog.gdeltproject.org/gkg-2-0-our-global-knowledge-graph-2-0-amazing-data-at-your-fingertips/
- Project home: https://www.gdeltproject.org/

View file

@ -0,0 +1,104 @@
# ICIJ Offshore Leaks Database
## 1. Summary
The International Consortium of Investigative Journalists (ICIJ) publishes a
combined database of offshore entities from the Panama Papers, Paradise Papers,
Pandora Papers, Bahamas Leaks, and Offshore Leaks. ~800,000+ offshore entities
with their officers, intermediaries, and addresses.
## 2. Access Methods
- **Bulk download (primary):** `https://offshoreleaks-data.icij.org/offshoreleaks/csv/full-oldb.LATEST.zip` (~70 MB ZIP, refreshed periodically)
- **Search UI (human):** `https://offshoreleaks.icij.org/`
- **Auth:** None
- **Note:** The previous Open Refine reconciliation endpoint at
`/reconcile` now returns 404. ICIJ has removed it. The bulk ZIP is the
remaining stable access path. The skill's `fetch_icij_offshore.py` caches
the ZIP locally (default `~/.cache/hermes-osint/icij/`, refreshes after
30 days) and searches it offline.
## 3. Data Schema
Key fields emitted by `fetch_icij_offshore.py`:
| Column | Type | Description |
|--------|------|-------------|
| `node_id` | int | ICIJ canonical node ID |
| `name` | str | Entity / officer / intermediary name |
| `node_type` | str | entity / officer / intermediary / address |
| `country_codes` | str | Semicolon-separated ISO codes |
| `countries` | str | Country names |
| `jurisdiction` | str | Offshore jurisdiction (BVI, Panama, etc.) |
| `incorporation_date` | str | YYYY-MM-DD |
| `inactivation_date` | str | YYYY-MM-DD (if struck) |
| `source` | str | Panama Papers / Paradise Papers / Pandora Papers / etc. |
| `entity_url` | str | Link to ICIJ page |
| `connections` | str | Semicolon-separated node IDs of related entities |
## 4. Coverage
- Worldwide offshore entity records
- Earliest records: 1970s (Bahamas Leaks). Most data 19902018.
- NOT updated in real-time — new leaks added when ICIJ publishes them
- ~810,000 offshore entities + ~750,000 officers + ~150,000 intermediaries
## 5. Cross-Reference Potential
- **SEC EDGAR**`name` (public companies with offshore arms)
- **USAspending**`name` (federal contractors with offshore structure)
- **OFAC SDN**`name` (sanctioned entities using offshore vehicles)
Join key: normalized entity/officer name. `node_id` is canonical for cross-
referencing within ICIJ. Connections graph traversal is in-script (BFS over
`connections`).
## 6. Data Quality
- Offshore entity names sometimes appear in multiple leaks with slight variations
- Officers may be nominees (front persons), not beneficial owners
- Some entries have minimal info (just a name + jurisdiction)
- The connections graph is incomplete — some relationships are documented in
source materials but not in the structured database
- Inactive/struck-off entities are still included with `inactivation_date`
## 7. Acquisition Script
Path: `scripts/fetch_icij_offshore.py`
```bash
# Search by entity name (case-insensitive substring across the bulk DB)
python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
--out data/icij.csv
# Search by officer (individual person)
python3 SKILL_DIR/scripts/fetch_icij_offshore.py --officer "SMITH JOHN" \
--out data/icij.csv
# Search by jurisdiction (filter on cached results)
python3 SKILL_DIR/scripts/fetch_icij_offshore.py --officer "SMITH" \
--jurisdiction "BRITISH VIRGIN ISLANDS" --out data/icij_bvi.csv
# Force a fresh download (default refresh window is 30 days)
python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
--force-refresh --out data/icij.csv
```
First call downloads the ~70 MB ZIP under `~/.cache/hermes-osint/icij/`
(or `$HERMES_OSINT_CACHE/icij/`). Subsequent calls reuse the cache for 30 days.
## 8. Legal & Licensing
- Public record as published by ICIJ under explicit publication
- No copyright on the underlying facts (entity names, jurisdictions)
- ICIJ asks for attribution if used in derivative reporting
- **Ethical note**: Presence in this database does NOT imply wrongdoing. Many
offshore structures are legal. The database is a research tool, not a list of
criminals.
## 9. References
- Database: https://offshoreleaks.icij.org/
- About the data: https://offshoreleaks.icij.org/pages/about
- Methodology: https://www.icij.org/investigations/panama-papers/
- API hints: Open Refine reconciliation endpoint at `https://offshoreleaks.icij.org/reconcile`

View file

@ -0,0 +1,90 @@
# NYC ACRIS — NYC Real Property Records
## 1. Summary
The Automated City Register Information System (ACRIS) is NYC's index of
recorded property documents: deeds, mortgages, satisfactions, liens, UCC
filings. Covers Manhattan, Bronx, Brooklyn, Queens, Staten Island.
Published as 4 linked Socrata datasets on the NYC Open Data portal.
## 2. Access Methods
- **Socrata API:** `https://data.cityofnewyork.us/resource/636b-3b5g.json` (Parties)
- **Other datasets:** `bnx9-e6tj` (Master), `8h5j-fqxa` (Legal), `uqqa-hym2` (References)
- **Auth:** None for read access (Socrata `$app_token` raises rate limits if needed)
- **Rate limit:** Generous (~1000 req/hour unauthenticated)
## 3. Data Schema
Key fields emitted by `fetch_nyc_acris.py` (Parties joined to Master):
| Column | Type | Description |
|--------|------|-------------|
| `document_id` | str | ACRIS document ID |
| `name` | str | Party name as recorded (often "LAST, FIRST" but varies) |
| `party_type` | str | 1=grantor, 2=grantee, 3=other |
| `party_role` | str | Human-readable role label |
| `address_1` | str | Property or party address line 1 |
| `city`, `state`, `zip`, `country` | str | Address parts |
| `doc_type` | str | DEED, MTGE (mortgage), SAT (satisfaction), AGMT, etc. |
| `doc_date`, `recorded_date` | str | YYYY-MM-DD |
| `borough` | str | Manhattan / Bronx / Brooklyn / Queens / Staten Island |
| `amount` | str | Document amount (USD, when applicable) |
| `filing_url` | str | Direct ACRIS DocumentImageView link |
## 4. Coverage
- NYC 5 boroughs only — other counties have their own recorders
- 1966 → present (older filings exist on microfilm at the County Clerk)
- Updated nightly
- ~70M+ party records cumulative
## 5. Cross-Reference Potential
- **SEC EDGAR**`name` (insider filers with NYC property)
- **USAspending**`name` (federal contractors with NYC property)
- **Senate LDA**`name` (lobbyists / clients with NYC property)
- **ICIJ Offshore**`name` (NYC properties owned via offshore vehicles)
Join key: normalized party name. NYC property records typically store names
as "LAST, FIRST" or full LLC names — use `entity_resolution.py`.
## 6. Data Quality
- Same person appears with multiple name formats over time
- LLC and trust ownership obscures beneficial owners
- Recording lag can be 2-4 weeks after closing
- Older documents have spottier address data
- Sealed records (e.g. domestic violence shelters) are excluded by law
## 7. Acquisition Script
Path: `scripts/fetch_nyc_acris.py`
```bash
# By party name
python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "ROLNICK" --out data/acris.csv
# By address (useful when you know the property but not the names)
python3 SKILL_DIR/scripts/fetch_nyc_acris.py --address "571 HUDSON" --out data/acris.csv
# Restrict to grantees (buyers / mortgagees)
python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "ROLNICK" --party-type 2 \
--out data/acris_buyers.csv
```
The script joins Parties → Master to populate doc_type, dates, borough, and
amount. Pass `--no-enrich` to skip the join (faster, fewer columns).
## 8. Legal & Licensing
- Public record under NYS Real Property Law and NYC Charter
- No commercial use restrictions on the data
- All ACRIS data is public information by statute
## 9. References
- ACRIS portal: https://a836-acris.nyc.gov/CP/
- NYC Open Data: https://data.cityofnewyork.us/
- Parties dataset: https://data.cityofnewyork.us/City-Government/ACRIS-Real-Property-Parties/636b-3b5g
- Document type codes: https://www1.nyc.gov/site/finance/taxes/acris.page

View file

@ -0,0 +1,92 @@
# OFAC SDN — Specially Designated Nationals List
## 1. Summary
The Office of Foreign Assets Control (OFAC) publishes the Specially Designated
Nationals and Blocked Persons List (SDN). US persons are generally prohibited
from dealing with individuals and entities on this list. Also published:
non-SDN consolidated lists (BIS Denied Persons, FSE, etc.).
## 2. Access Methods
- **Full XML:** `https://www.treasury.gov/ofac/downloads/sdn.xml`
- **Delimited:** `https://www.treasury.gov/ofac/downloads/sdn.csv`
- **Consolidated:** `https://www.treasury.gov/ofac/downloads/consolidated/consolidated.xml`
- **Auth:** None
- **Rate limit:** None (static file downloads). Updated continuously.
## 3. Data Schema
Key fields emitted by `fetch_ofac_sdn.py`:
| Column | Type | Description |
|--------|------|-------------|
| `entity_id` | int | OFAC unique ID |
| `name` | str | Primary name |
| `entity_type` | str | individual / entity / vessel / aircraft |
| `program_list` | str | Semicolon-separated sanctions programs (e.g. SDGT;IRAN) |
| `title` | str | For individuals: title/role |
| `nationalities` | str | Semicolon-separated country codes |
| `aka_list` | str | Semicolon-separated "also known as" names |
| `addresses` | str | Semicolon-separated known addresses |
| `dob` | str | Date of birth (individuals) |
| `pob` | str | Place of birth (individuals) |
| `remarks` | str | OFAC's free-text remarks |
| `last_updated` | str | YYYY-MM-DD (publication date) |
## 4. Coverage
- Worldwide — all entities sanctioned by US Treasury
- ~10,000 entries on SDN, ~15,000 on consolidated lists
- Updated continuously (sometimes daily during active enforcement)
- Includes AKAs (very common, can be 10+ per entity)
## 5. Cross-Reference Potential
- **SEC EDGAR**`name` (public companies sanctioned)
- **USAspending**`name` (sanctioned entity as federal contractor — should
be impossible but verify)
- **ICIJ Offshore**`name` (offshore entities also sanctioned)
Join key: normalized name. **CRITICAL**: must match against `aka_list` too.
Many sanctioned entities are caught only via aliases.
## 6. Data Quality
- Names are transliterated from many scripts — multiple romanizations possible
- AKAs often differ wildly from primary name
- Some entries have minimal info (no DOB, no address) for individuals
- Free-text `remarks` contain critical context — read them
- "Specially Designated Global Terrorists" (SDGT) and "Cyber-related" (CYBER2)
programs add and remove entries frequently
## 7. Acquisition Script
Path: `scripts/fetch_ofac_sdn.py`
```bash
# Full snapshot
python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --out data/ofac_sdn.csv
# Filter to specific program
python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --program SDGT --out data/sdn_sdgt.csv
# Entities only (skip individuals, vessels, aircraft)
python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --entity-type entity --out data/sdn_entities.csv
```
## 8. Legal & Licensing
- Public record under Executive Order authority and statutory sanctions programs
- US persons MUST screen against this list — it is enforced
- No restrictions on the data itself; restrictions are on transactions with
the listed entities
- ZERO penalty for "over-matching" — false positives must be cleared but are not
prohibited
## 9. References
- OFAC home: https://ofac.treasury.gov/
- SDN list: https://ofac.treasury.gov/specially-designated-nationals-and-blocked-persons-list-sdn-human-readable-lists
- Data formats: https://ofac.treasury.gov/sdn-list/sanctions-list-search-tool
- Compliance guidance: https://ofac.treasury.gov/recent-actions

View file

@ -0,0 +1,103 @@
# OpenCorporates — Global Corporate Registry
## 1. Summary
OpenCorporates aggregates corporate registry data from 130+ jurisdictions
worldwide (~200M companies). Covers US state-level filings (NY DOS, Delaware
DOC, California SOS, etc.), UK Companies House, EU registries, and most
common-law jurisdictions.
## 2. Access Methods
- **REST API:** `https://api.opencorporates.com/v0.4/`
- **HTML fallback:** `https://opencorporates.com/companies?q=...`
- **Auth:** API token required (free tier 500 calls/month, paid plans available)
- **Rate limit:** Token-bound; un-tokened requests return 401
Set `OPENCORPORATES_API_TOKEN` env var. Get a free token at
https://opencorporates.com/api_accounts/new.
## 3. Data Schema
Key fields emitted by `fetch_opencorporates.py`:
| Column | Type | Description |
|--------|------|-------------|
| `name` | str | Company legal name |
| `company_number` | str | Registry-assigned number |
| `jurisdiction_code` | str | e.g. `us_ny`, `us_de`, `gb` |
| `jurisdiction_name` | str | Human-readable jurisdiction |
| `incorporation_date` | str | YYYY-MM-DD |
| `dissolution_date` | str | YYYY-MM-DD (empty if active) |
| `company_type` | str | Domestic LLC / Foreign Corp / etc. |
| `status` | str | Active / Inactive / Dissolved |
| `registered_address` | str | Registered office address |
| `opencorporates_url` | str | Link to OpenCorporates entity page |
| `officers_count` | str | Total officers on record |
| `source` | str | `api`, `html`, or `html-fallback` |
## 4. Coverage
- US: all 50 states + DC at state level (LLCs, corps, LPs)
- International: UK, EU, Canada, Australia, NZ, many APAC + LATAM jurisdictions
- ~200M company records cumulative
- Update frequency varies by jurisdiction (UK CH is near-realtime; some
state registries lag months)
## 5. Cross-Reference Potential
- **NYC ACRIS**`name` (LLC/corp owners of NYC property)
- **USAspending**`name` (corporate federal contractors)
- **SEC EDGAR**`name` (public companies + their subsidiaries)
- **ICIJ Offshore**`name` (international corporate structures)
Join key: normalized company name. Some entries have `previous_names` arrays
which are not currently exported by the fetch script — query OC directly
for that.
## 6. Data Quality
- Company-name spellings vary across re-incorporations and renames
- Officer records are spottier than company records (many jurisdictions
don't require officer disclosure)
- Beneficial-ownership data is generally NOT here — most jurisdictions
don't require it. UK Companies House has PSC (people with significant
control) but that's not universal.
- Cross-jurisdictional links (parent / subsidiary) are based on registry
filings only; corporate trees are often incomplete
## 7. Acquisition Script
Path: `scripts/fetch_opencorporates.py`
```bash
# Search globally by name
python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
--out data/oc.csv
# Restrict to a jurisdiction
python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
--jurisdiction us_ny --out data/oc_ny.csv
# Set token via env or flag
OPENCORPORATES_API_TOKEN=xxx python3 SKILL_DIR/scripts/fetch_opencorporates.py \
--query "Microsoft" --out data/oc.csv
```
Without a token the script falls back to scraping the HTML search page.
The fallback is brittle and only fills in `name`, `jurisdiction_code`,
`opencorporates_url` — set the token for serious work.
## 8. Legal & Licensing
- OpenCorporates aggregates public records — the underlying facts are
public domain
- OpenCorporates own database is licensed CC-BY-SA-4.0; attribution required
- API ToS prohibits redistributing the full dataset; per-record reference
is fine
## 9. References
- API docs: https://api.opencorporates.com/documentation/API-Reference
- Jurisdiction codes: https://api.opencorporates.com/v0.4/jurisdictions.json
- Schema: https://opencorporates.com/info/our_data

View file

@ -0,0 +1,83 @@
# SEC EDGAR — Corporate Filings
## 1. Summary
EDGAR (Electronic Data Gathering, Analysis, and Retrieval) is the SEC's system
for corporate disclosure filings: 10-K (annual), 10-Q (quarterly), 8-K (current
events), DEF 14A (proxy), Form 4 (insider trading), 13F (institutional holdings).
## 2. Access Methods
- **API:** `https://data.sec.gov/submissions/CIK<10-digit-padded>.json` (no auth)
- **Filing index:** `https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=...`
- **Full-text search:** `https://efts.sec.gov/LATEST/search-index?q=...`
- **Auth:** None — requires `User-Agent` header with contact info per SEC policy
- **Rate limit:** 10 requests/second per IP (enforced)
## 3. Data Schema
Key fields emitted by `fetch_sec_edgar.py` (filings index):
| Column | Type | Description |
|--------|------|-------------|
| `cik` | str | Central Index Key (10-digit padded) |
| `company_name` | str | Registrant name |
| `form_type` | str | 10-K, 10-Q, 8-K, etc. |
| `filing_date` | str | YYYY-MM-DD |
| `accession_number` | str | Filing accession (e.g. 0000320193-24-000123) |
| `primary_document` | str | Filename of main document |
| `filing_url` | str | Direct URL to filing index |
| `reporting_period` | str | Period of report (where applicable) |
## 4. Coverage
- All public US registrants from 1993 → present
- 1993-2000 has spotty coverage of older filings (paper-to-electronic migration)
- ~12M filings cumulative
- Updated within minutes of filing acceptance
## 5. Cross-Reference Potential
- **USAspending**`company_name` (public companies as federal contractors)
- **Senate LD**`company_name` (public companies hire lobbyists)
- **OFAC SDN**`company_name` (sanctions screening of public registrants)
Join key: company name OR CIK if you have it. CIK is canonical and stable.
## 6. Data Quality
- Subsidiaries often filed under parent CIK — be careful with name matches
- Name changes over time (rebrands, acquisitions) — CIK remains constant
- 10-K Item 1A Risk Factors are free-form text — useful for `web_extract`-style
parsing, not structured queries
- Foreign private issuers file 20-F instead of 10-K
## 7. Acquisition Script
Path: `scripts/fetch_sec_edgar.py`
```bash
# By CIK
python3 SKILL_DIR/scripts/fetch_sec_edgar.py --cik 0000320193 \
--types 10-K,10-Q --out data/edgar_filings.csv
# By company name (resolves to CIK first via name search)
python3 SKILL_DIR/scripts/fetch_sec_edgar.py --company "APPLE INC" \
--types 8-K --since 2024-01-01 --out data/edgar_filings.csv
```
Set `SEC_USER_AGENT` env var with your contact email (SEC requirement).
Example: `SEC_USER_AGENT="Research example@example.com"`.
## 8. Legal & Licensing
- Public record under SEC Rule 24b-2 / 17 CFR § 230.401
- No commercial use restrictions on filing content
- SEC asks all bulk users to include a `User-Agent` with contact info and to
respect 10 req/s — failure to do so can result in IP blocking
## 9. References
- Developer docs: https://www.sec.gov/edgar/sec-api-documentation
- EDGAR full-text search: https://efts.sec.gov/LATEST/search-index
- Fair access policy: https://www.sec.gov/os/accessing-edgar-data

View file

@ -0,0 +1,89 @@
# Senate LD — Lobbying Disclosure (LD-1 / LD-2)
## 1. Summary
The Senate Office of Public Records publishes lobbying disclosures under the
Lobbying Disclosure Act of 1995 (LDA, as amended by HLOGA 2007). LD-1 is
registration of a new client-lobbyist relationship; LD-2 is the quarterly
activity report.
## 2. Access Methods
- **API:** `https://lda.senate.gov/api/v1/` (no auth required for read-only)
- **Bulk download:** `https://lda.senate.gov/api/v1/filings/?format=csv` (paginated)
- **Auth:** Token required for >120 req/hour — register at https://lda.senate.gov/api/auth/register/
- **Rate limit:** 120 req/hour unauthenticated, 1,200 req/hour authenticated
## 3. Data Schema
Key fields emitted by `fetch_senate_ld.py`:
| Column | Type | Description |
|--------|------|-------------|
| `filing_uuid` | str | Unique filing ID |
| `filing_type` | str | LD-1, LD-2, LD-203, etc. |
| `filing_year` | int | Year |
| `filing_period` | str | Q1/Q2/Q3/Q4 or annual |
| `registrant_name` | str | Lobbying firm or organization |
| `registrant_id` | str | Senate-assigned registrant ID |
| `client_name` | str | Client being represented |
| `client_id` | str | Senate-assigned client ID |
| `client_general_description` | str | Client industry / business |
| `income` | float | LD-2 income from client this quarter (USD) |
| `expenses` | float | LD-2 expenses (in-house lobbying) |
| `lobbyists` | str | Semicolon-separated lobbyist names |
| `issues` | str | Semicolon-separated issue areas |
| `government_entities` | str | Agencies/chambers contacted |
| `filing_date` | str | YYYY-MM-DD |
## 4. Coverage
- US federal lobbying only (state lobbying handled by individual state ethics offices)
- 1999 → present (full electronic coverage from 2008)
- Quarterly reporting cycle (LD-2)
- ~1M+ filings cumulative
## 5. Cross-Reference Potential
- **USAspending**`client_name` (clients lobbying for contracts)
- **SEC EDGAR**`client_name` (public companies as lobbying clients)
- **OFAC SDN**`client_name` (sanctions screening of lobbying clients)
Join key: normalized client_name. registrant_id and client_id are canonical
when joining Senate-internal records.
## 6. Data Quality
- Many lobbyist names appear in multiple registrants over time (job changes)
- `issues` and `government_entities` are free-text — Inconsistent capitalization
- Foreign agents register under FARA (Department of Justice), NOT here
- Income/expenses are reported in $10,000 brackets in some older filings
## 7. Acquisition Script
Path: `scripts/fetch_senate_ld.py`
```bash
# By client
python3 SKILL_DIR/scripts/fetch_senate_ld.py --client "EXAMPLE CORP" \
--year 2024 --out data/lobbying.csv
# By registrant (lobbying firm)
python3 SKILL_DIR/scripts/fetch_senate_ld.py --registrant "BIG K STREET LLP" \
--year 2024 --out data/lobbying.csv
```
Set `SENATE_LDA_TOKEN` env var if you have one (or pass `--token`).
Defaults to anonymous (120 req/hour).
## 8. Legal & Licensing
- Public record under 2 U.S.C. § 1604 (LDA)
- No commercial use restrictions
- Reuse is unconditional — see Senate Public Records Office disclaimer
## 9. References
- API docs: https://lda.senate.gov/api/redoc/v1/
- LDA guidance: https://lobbyingdisclosure.house.gov/ld_guidance.pdf
- Senate Public Records: https://lda.senate.gov/

View file

@ -0,0 +1,97 @@
# USAspending — Federal Government Contracts and Grants
## 1. Summary
USAspending.gov is the official source of federal spending data. Coverage:
contracts, grants, loans, direct payments, sub-awards. Required by the DATA Act
of 2014 — all federal agencies must report to a single schema.
## 2. Access Methods
- **API v2:** `https://api.usaspending.gov/api/v2/` (no auth, no key)
- **Bulk:** `https://files.usaspending.gov/` (CSV / Parquet by award type)
- **Auth:** None
- **Rate limit:** Not strictly enforced, but be polite — keep to <10 req/s
## 3. Data Schema
Key fields emitted by `fetch_usaspending.py` (prime awards):
| Column | Type | Description |
|--------|------|-------------|
| `award_id` | str | Federal award ID (PIID for contracts, FAIN for grants) |
| `recipient_name` | str | Awardee legal name |
| `recipient_uei` | str | Unique Entity Identifier (replaced DUNS in 2022) |
| `recipient_duns` | str | Legacy DUNS number (historical only) |
| `recipient_parent_name` | str | Ultimate parent organization |
| `recipient_state` | str | Recipient state |
| `awarding_agency` | str | Department / agency name |
| `awarding_sub_agency` | str | Sub-tier (e.g. DoD → Army) |
| `award_type` | str | Contract / Grant / Loan / Direct Payment |
| `award_amount` | float | Current total obligation in USD |
| `award_date` | str | Action / signed date YYYY-MM-DD |
| `period_of_performance_start` | str | YYYY-MM-DD |
| `period_of_performance_end` | str | YYYY-MM-DD |
| `naics_code` | str | Industry classification |
| `psc_code` | str | Product / Service Code |
| `competition_extent` | str | Full / limited / sole-source |
| `description` | str | Award description (free-text) |
## 4. Coverage
- US federal awards only (state/local not included)
- FY 2008 → present (full coverage from FY 2017)
- Updated bi-weekly from agency reporting
- ~100M+ transaction records cumulative
## 5. Cross-Reference Potential
- **SEC EDGAR**`recipient_name` (public companies as contractors)
- **Senate LD**`recipient_name` (lobbying clients winning contracts)
- **OFAC SDN**`recipient_name` (sanctions screening of contractors — must be
filtered out by SAM.gov but verify)
- **ICIJ Offshore**`recipient_name` (offshore-linked contractors)
Join key: normalized recipient name. UEI is canonical when present.
## 6. Data Quality
- DUNS → UEI transition (April 2022) — old records have DUNS, new records have UEI
- Some sub-awards aren't reported (FFATA threshold is $30k)
- Award amount changes over time (mod actions) — fetch script reports current total
- `competition_extent` field is free-text in older records — `fetch_usaspending.py`
normalizes to canonical values
- Recipient name variations are extensive — "ACME LLC", "Acme L.L.C.", "ACME, INC"
all appear. Use `entity_resolution.py`.
## 7. Acquisition Script
Path: `scripts/fetch_usaspending.py`
```bash
# By recipient name
python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
--fy 2024 --out data/contracts.csv
# By awarding agency
python3 SKILL_DIR/scripts/fetch_usaspending.py --agency "Department of Defense" \
--fy 2024 --out data/contracts.csv
# Filter to sole-source only
python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
--fy 2024 --sole-source-only --out data/contracts.csv
```
## 8. Legal & Licensing
- Public record under the Federal Funding Accountability and Transparency Act
(FFATA, 2006) and DATA Act (2014)
- No commercial use restrictions on the data
- Personal information of award recipients (e.g. small business owners' addresses
in some grants) should be handled per the source agency's privacy notice
## 9. References
- API docs: https://api.usaspending.gov/
- Data dictionary: https://www.usaspending.gov/data-dictionary
- Award schema: https://files.usaspending.gov/docs/Data_Dictionary_Crosswalk.xlsx

View file

@ -0,0 +1,93 @@
# Wayback Machine — Internet Archive CDX
## 1. Summary
The Internet Archive's Wayback Machine has captured ~900B+ web pages since
1996. The CDX server API indexes those captures by URL, timestamp, and
content hash. Free, anonymous, no auth.
## 2. Access Methods
- **CDX server:** `https://web.archive.org/cdx/search/cdx`
- **Wayback URL:** `https://web.archive.org/web/<timestamp>/<url>`
- **Save Page Now (write):** `https://web.archive.org/save/<url>` (different API)
- **Auth:** None
- **Rate limit:** Generous; be polite (~1 req/s)
## 3. Data Schema
Key fields emitted by `fetch_wayback.py`:
| Column | Type | Description |
|--------|------|-------------|
| `url` | str | Original URL captured |
| `timestamp` | str | YYYYMMDDHHMMSS (CDX format) |
| `wayback_url` | str | Direct replay URL |
| `mimetype` | str | Content-type at capture |
| `status` | str | HTTP status (typically 200) |
| `digest` | str | SHA1 of capture content (collapse-friendly) |
| `length` | str | Byte length of capture |
## 4. Coverage
- 1996 → present
- ~900B+ captures across ~700M domains
- Updated continuously by automated crawls + manual saves
- Some domains have aggressive coverage (news), others sparse (private)
## 5. Cross-Reference Potential
- **Wikipedia** ↔ Reverse-lookup pages cited as references that have since
disappeared
- **News URLs** ↔ Original article content when present-day URLs 404
- **Corporate websites** ↔ Historical "About" pages, executive bios that
have been scrubbed
The Wayback CDX is most useful as a **content-recovery** layer when other
sources point to URLs that no longer exist.
## 6. Data Quality
- robots.txt-blocked domains may have spotty or no coverage
- Captures vary in completeness (HTML may be saved without CSS/JS)
- Some content is excluded by domain owner request (DMCA, etc.)
- Coverage of "deep links" (URLs with query strings) is uneven
- Time resolution is per-capture, not continuous — gaps are common
## 7. Acquisition Script
Path: `scripts/fetch_wayback.py`
```bash
# All captures of a specific URL
python3 SKILL_DIR/scripts/fetch_wayback.py --url "https://example.com/page" \
--out data/wb.csv
# All captures of a host
python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
--match host --out data/wb.csv
# All captures of a domain + subdomains
python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
--match domain --out data/wb.csv
# Only unique-content captures within a date window
python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
--match host --collapse digest \
--from-date 2020-01-01 --to-date 2023-12-31 \
--out data/wb.csv
```
## 8. Legal & Licensing
- Internet Archive captures are made under fair-use research provisions
- Replay URLs are stable references — citing them is encouraged
- Internet Archive non-profit terms of use govern content
- Some content is rights-restricted; replay may be blocked even if the
CDX entry shows it as captured
## 9. References
- CDX server docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
- Wayback API: https://archive.org/help/wayback_api.php
- Internet Archive: https://archive.org/

View file

@ -0,0 +1,107 @@
# Wikipedia + Wikidata
## 1. Summary
Wikipedia is the canonical narrative-bio source for notable people, places,
and organizations. Wikidata is its structured-data counterpart: ~110M
items, each with claims, dates, identifiers, and cross-references to
external authorities (VIAF, ISNI, ORCID, GRID, etc.).
Together they're a high-precision entity-resolution layer — the bar for
inclusion is real, but anything past that bar is well-cross-referenced.
## 2. Access Methods
- **Wikipedia OpenSearch:** `https://en.wikipedia.org/w/api.php?action=opensearch`
- **Wikipedia REST summary:** `https://en.wikipedia.org/api/rest_v1/page/summary/<title>`
- **Wikidata Action API:** `https://www.wikidata.org/w/api.php?action=wbgetentities`
- **Wikidata SPARQL:** `https://query.wikidata.org/sparql` (more powerful but aggressively rate-limited)
- **Auth:** None, but **a meaningful User-Agent is required**
Set `HERMES_OSINT_UA` to something identifying (e.g. `your-app/1.0 (you@example.com)`).
Wikimedia returns HTTP 429 to generic UAs.
## 3. Data Schema
Key fields emitted by `fetch_wikipedia.py`:
| Column | Type | Description |
|--------|------|-------------|
| `source` | str | `wikipedia` or `wikipedia+wikidata` |
| `label` | str | Wikipedia article title |
| `description` | str | Short Wikidata description |
| `qid` | str | Wikidata QID (e.g. Q2283 for Microsoft) |
| `wikipedia_title`, `wikipedia_url` | str | Article identifier + URL |
| `wikidata_url` | str | Wikidata entity URL |
| `instance_of` | str | What kind of thing it is (P31) |
| `country` | str | Country (P17 for orgs/places, P27 for people) |
| `occupation` | str | P106 |
| `employer` | str | P108 |
| `date_of_birth` | str | P569, YYYY-MM-DD |
| `place_of_birth` | str | P19 |
| `summary` | str | Wikipedia REST extract (~1000 chars) |
The fetch script uses Wikidata's Action API (NOT SPARQL) for structured
facts — far more lenient on rate limits.
## 4. Coverage
- Wikipedia EN: ~7M articles
- Wikidata: ~110M items, ~1.5B statements
- Updated continuously; abuse filters and bots run constantly
- High notability bar — most private individuals are not in Wikipedia
## 5. Cross-Reference Potential
- **All sources**`label` (entity identity resolution)
- **SEC EDGAR**`label` (public companies)
- **CourtListener**`label` (parties to notable litigation)
- **Wikidata external identifiers** (not currently in this fetcher's output)
link to VIAF, ISNI, ORCID, GRID, GitHub, Twitter, IMDb, ...
Join key: Wikidata QID is canonical. Wikipedia titles are stable for
most articles but can be renamed.
## 6. Data Quality
- Notability filter — only notable entities (criteria vary by topic)
- Recency lag — current events take days to weeks to be reflected
- POV / vandalism — moderated, but edits between sweeps can be bad
- Living-persons biographies have stricter sourcing requirements
- Wikidata claims have qualifiers and references — the fetch script
doesn't currently export them
## 7. Acquisition Script
Path: `scripts/fetch_wikipedia.py`
```bash
# Look up a notable entity
python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Microsoft" --out data/wp.csv
# A specific person
python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Bill Gates" --out data/wp_bg.csv
# Skip the Wikidata enrichment for speed
python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Microsoft" --no-wikidata \
--limit 5 --out data/wp.csv
```
The OpenSearch is fuzzy — `--limit 5` returns the top 5 Wikipedia article
matches. Each is enriched with the QID + structured facts unless
`--no-wikidata` is passed.
## 8. Legal & Licensing
- Wikipedia text: CC-BY-SA-3.0 / GFDL
- Wikidata claims: CC0 (public domain)
- API ToS: respect rate limits, identify your agent
- Commercial use allowed with attribution
## 9. References
- Wikipedia OpenSearch: https://www.mediawiki.org/wiki/API:Opensearch
- Wikipedia REST: https://en.wikipedia.org/api/rest_v1/
- Wikidata Action API: https://www.wikidata.org/wiki/Wikidata:Data_access
- Wikidata SPARQL: https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service
- User-Agent policy: https://meta.wikimedia.org/wiki/User-Agent_policy

View file

@ -0,0 +1,82 @@
"""Tiny stdlib HTTP helper used by fetch_*.py scripts.
Provides polite retry + JSON convenience + User-Agent enforcement.
"""
from __future__ import annotations
import json
import os
import time
import urllib.error
import urllib.parse
import urllib.request
DEFAULT_UA = (
"hermes-osint-investigation/0.2 "
"(+https://github.com/NousResearch/hermes-agent; "
"set HERMES_OSINT_UA env var to identify yourself per "
"Wikimedia / SEC fair-use guidance)"
)
def get(
url: str,
*,
params: dict | None = None,
headers: dict | None = None,
user_agent: str | None = None,
max_retries: int = 3,
backoff: float = 1.5,
timeout: float = 30.0,
) -> bytes:
"""GET with retry on 5xx and Retry-After honoring.
429 (rate-limit) is raised IMMEDIATELY with a clear message retrying
when the upstream says "you're over quota" just wastes time. The caller
should slow down or supply real credentials.
"""
if params:
sep = "&" if "?" in url else "?"
url = f"{url}{sep}{urllib.parse.urlencode(params)}"
h = {"User-Agent": user_agent or os.environ.get("HERMES_OSINT_UA", DEFAULT_UA)}
if headers:
h.update(headers)
last_err: Exception | None = None
for attempt in range(max_retries + 1):
req = urllib.request.Request(url, headers=h)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
return resp.read()
except urllib.error.HTTPError as e:
if e.code == 429:
# Surface immediately. Read the body so the caller sees the
# provider's actual message ("OVER_RATE_LIMIT" etc.).
try:
body = e.read(2048).decode("utf-8", errors="replace")
except Exception: # noqa: BLE001
body = ""
raise RuntimeError(
f"HTTP 429 rate-limited by {urllib.parse.urlsplit(url).netloc}. "
f"Slow down or supply a real API key. Body: {body[:300]}"
) from e
if e.code in (500, 502, 503, 504) and attempt < max_retries:
retry_after = e.headers.get("Retry-After") if e.headers else None
wait = float(retry_after) if (retry_after and retry_after.isdigit()) else backoff ** (attempt + 1)
time.sleep(wait)
last_err = e
continue
raise
except urllib.error.URLError as e:
if attempt < max_retries:
time.sleep(backoff ** (attempt + 1))
last_err = e
continue
raise
if last_err:
raise last_err
raise RuntimeError("unreachable")
def get_json(url: str, **kwargs) -> dict | list:
return json.loads(get(url, **kwargs).decode("utf-8"))

View file

@ -0,0 +1,67 @@
"""Shared entity-name normalization helpers (stdlib-only).
Used by entity_resolution.py and timing_analysis.py.
"""
from __future__ import annotations
import re
# Legal suffixes / corporate boilerplate to strip during normalization.
_SUFFIX_TOKENS = {
"INC", "INCORPORATED", "LLC", "LLP", "LP", "LTD", "LIMITED",
"CORP", "CORPORATION", "CO", "COMPANY",
"GROUP", "GRP", "HOLDINGS", "HOLDING",
"PARTNERS", "ASSOCIATES",
"INTERNATIONAL", "INTL",
"ENTERPRISES", "ENTERPRISE",
"SERVICES", "SERVICE", "SVCS",
"SOLUTIONS", "MANAGEMENT", "MGMT", "CONSULTING",
"TECHNOLOGY", "TECHNOLOGIES", "TECH",
"INDUSTRIES", "INDUSTRY",
"AMERICA", "AMERICAN",
"USA", "US",
"PLLC", "PC",
"TRUST", "FOUNDATION",
}
_PUNCT_RE = re.compile(r"[^\w\s]")
_WS_RE = re.compile(r"\s+")
def normalize_name(name: str | None) -> str:
"""Standard normalization: uppercase, strip suffixes, drop punctuation."""
if not name:
return ""
s = _PUNCT_RE.sub(" ", name.upper())
s = _WS_RE.sub(" ", s).strip()
tokens = [t for t in s.split() if t and t not in _SUFFIX_TOKENS]
return " ".join(tokens)
def normalize_aggressive(name: str | None) -> str:
"""Aggressive normalization: sorted unique tokens (word-bag)."""
base = normalize_name(name)
if not base:
return ""
return " ".join(sorted(set(base.split())))
def name_tokens(name: str | None, min_len: int = 4) -> set[str]:
"""Token set used for overlap matching."""
base = normalize_name(name)
if not base:
return set()
return {t for t in base.split() if len(t) >= min_len}
def token_overlap_ratio(left: str | None, right: str | None) -> tuple[float, int]:
"""Return (jaccard-like ratio, shared token count) over min-len tokens."""
a = name_tokens(left)
b = name_tokens(right)
if not a or not b:
return 0.0, 0
shared = a & b
if not shared:
return 0.0, 0
union = a | b
return len(shared) / len(union), len(shared)

View file

@ -0,0 +1,221 @@
#!/usr/bin/env python3
"""Build a structured findings.json with evidence chains (stdlib-only).
Aggregates cross_links.csv (entity_resolution output) and an optional
timing.json (timing_analysis output) into a single evidence-chain document.
Output structure:
{
"metadata": {...},
"findings": [
{
"id": "F0001",
"title": "...",
"severity": "HIGH|MEDIUM|LOW",
"confidence": "high|medium|low",
"summary": "...",
"evidence": [
{"source": "cross_links.csv", "row": 12, "fields": {...}},
...
],
"sources": ["cross_links.csv", "timing.json"]
}
]
}
Every finding traces to specific source rows. No naked claims.
"""
from __future__ import annotations
import argparse
import csv
import json
from collections import defaultdict
from pathlib import Path
CONFIDENCE_ORDER = {"high": 0, "medium": 1, "low": 2}
SEVERITY_ORDER = {"HIGH": 0, "MEDIUM": 1, "LOW": 2}
def _read_cross_links(path: str) -> list[dict[str, str]]:
with open(path, newline="", encoding="utf-8") as fh:
return list(csv.DictReader(fh))
def build_findings(
cross_links_path: str,
timing_path: str | None = None,
out_path: str = "findings.json",
bundled_threshold: int = 3,
) -> dict:
findings: list[dict] = []
next_id = 1
# 1. Match-based findings, grouped by (left_normalized, right_normalized).
matches = _read_cross_links(cross_links_path)
grouped: dict[tuple[str, str], list[dict[str, str]]] = defaultdict(list)
for i, row in enumerate(matches):
row["__row__"] = str(i)
grouped[(row.get("left_normalized", ""), row.get("right_normalized", ""))].append(row)
for (left_norm, right_norm), rows in grouped.items():
if not left_norm or not right_norm:
continue
# Use the highest-confidence match for the finding's overall confidence.
best = min(rows, key=lambda r: CONFIDENCE_ORDER.get(r.get("confidence", "low"), 2))
finding_id = f"F{next_id:04d}"
next_id += 1
evidence = [
{
"source": "cross_links.csv",
"row": int(r["__row__"]),
"fields": {
"match_type": r.get("match_type", ""),
"confidence": r.get("confidence", ""),
"left_name": r.get("left_name", ""),
"right_name": r.get("right_name", ""),
"overlap_ratio": r.get("overlap_ratio", ""),
"shared_tokens": r.get("shared_tokens", ""),
},
}
for r in rows
]
findings.append(
{
"id": finding_id,
"title": f"Entity match: {best.get('left_name', '')}{best.get('right_name', '')}",
"severity": "MEDIUM" if best.get("confidence") == "high" else "LOW",
"confidence": best.get("confidence", "low"),
"summary": (
f"{len(rows)} cross-link record(s) tie "
f"'{best.get('left_name', '')}' to "
f"'{best.get('right_name', '')}' "
f"(best tier: {best.get('match_type', '')})."
),
"evidence": evidence,
"sources": ["cross_links.csv"],
}
)
# 2. Bundled-donations findings (if cross_links carries donor↔candidate pattern).
# Heuristic: many distinct left names sharing the same right name.
by_right: dict[str, set[str]] = defaultdict(set)
by_right_rows: dict[str, list[dict[str, str]]] = defaultdict(list)
for r in matches:
right = r.get("right_normalized", "")
left_raw = r.get("left_name", "").strip()
if right and left_raw:
by_right[right].add(left_raw)
by_right_rows[right].append(r)
for right_norm, lefts in by_right.items():
if len(lefts) < bundled_threshold:
continue
rows = by_right_rows[right_norm]
right_raw = rows[0].get("right_name", "")
findings.append(
{
"id": f"F{next_id:04d}",
"title": f"Bundled cross-links: {len(lefts)} distinct left entities ↔ '{right_raw}'",
"severity": "HIGH",
"confidence": "medium",
"summary": (
f"{len(lefts)} distinct left-side entities link to "
f"'{right_raw}'. Pattern suggests coordinated relationship "
f"(e.g. bundled donations, multi-vendor employer)."
),
"evidence": [
{
"source": "cross_links.csv",
"row": int(r.get("__row__", "0")),
"fields": {
"left_name": r.get("left_name", ""),
"match_type": r.get("match_type", ""),
},
}
for r in rows
],
"sources": ["cross_links.csv"],
}
)
next_id += 1
# 3. Timing-based findings.
if timing_path and Path(timing_path).exists():
timing = json.loads(Path(timing_path).read_text())
for r in timing.get("results", []):
if not r.get("significant"):
continue
findings.append(
{
"id": f"F{next_id:04d}",
"title": (
f"Donation timing significantly clusters near awards: "
f"{r['donor']}{r['recipient']}"
),
"severity": "HIGH" if r["p_value"] < 0.01 else "MEDIUM",
"confidence": "medium",
"summary": (
f"Mean nearest-award distance {r['observed_mean_days']} days "
f"(null {r['null_mean_days']} days). p={r['p_value']}, "
f"effect size {r['effect_size_sd']} SD. "
f"{r['n_donations']} donations, {r['n_award_dates']} awards."
),
"evidence": [
{
"source": "timing.json",
"row": None,
"fields": r,
}
],
"sources": ["timing.json"],
}
)
next_id += 1
# Sort: severity → confidence → id.
findings.sort(
key=lambda f: (
SEVERITY_ORDER.get(f["severity"], 3),
CONFIDENCE_ORDER.get(f["confidence"], 3),
f["id"],
)
)
payload = {
"metadata": {
"n_findings": len(findings),
"cross_links_path": cross_links_path,
"timing_path": timing_path,
"bundled_threshold": bundled_threshold,
},
"findings": findings,
}
Path(out_path).write_text(json.dumps(payload, indent=2))
return payload
def main() -> int:
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--cross-links", required=True)
p.add_argument("--timing", help="Optional timing.json from timing_analysis.py")
p.add_argument("--out", default="findings.json")
p.add_argument(
"--bundled-threshold",
type=int,
default=3,
help="Minimum distinct left entities to flag as bundled (default 3)",
)
a = p.parse_args()
payload = build_findings(
cross_links_path=a.cross_links,
timing_path=a.timing,
out_path=a.out,
bundled_threshold=a.bundled_threshold,
)
print(f"Wrote {payload['metadata']['n_findings']} findings to {a.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,228 @@
#!/usr/bin/env python3
"""Cross-source entity resolution (stdlib-only).
Given two CSV files with name columns, find candidate matches using three
tiers of normalization:
1. exact normalized strings equal
2. fuzzy sorted-token (word-bag) match
3. token_overlap >=60% Jaccard overlap on >=4-char tokens, >=2 shared
Adapted from ShinMegamiBoson/OpenPlanter (MIT) but generalized: no Boston-
specific record types, no contribution-code filters, no fixed schemas.
Output CSV columns:
match_type, confidence, left_name, right_name,
left_normalized, right_normalized, left_row, right_row,
overlap_ratio, shared_tokens
"""
from __future__ import annotations
import argparse
import csv
import sys
from pathlib import Path
# Allow running directly or as a module.
sys.path.insert(0, str(Path(__file__).parent))
from _normalize import ( # noqa: E402
normalize_name,
normalize_aggressive,
token_overlap_ratio,
)
CONFIDENCE = {
"exact": "high",
"fuzzy": "medium",
"token_overlap": "low",
}
def _read_csv(path: str, name_col: str) -> list[dict[str, str]]:
rows = []
with open(path, newline="", encoding="utf-8") as fh:
reader = csv.DictReader(fh)
if name_col not in (reader.fieldnames or []):
raise SystemExit(
f"Column {name_col!r} not in {path}. "
f"Available: {reader.fieldnames}"
)
for i, row in enumerate(reader):
row["__row__"] = str(i)
rows.append(row)
return rows
def _build_index(rows: list[dict[str, str]], name_col: str):
"""Index by exact-normalized and aggressive (sorted-token) form."""
exact: dict[str, list[dict[str, str]]] = {}
aggressive: dict[str, list[dict[str, str]]] = {}
for row in rows:
raw = row.get(name_col, "")
n = normalize_name(raw)
if n:
exact.setdefault(n, []).append(row)
a = normalize_aggressive(raw)
if a:
aggressive.setdefault(a, []).append(row)
return exact, aggressive
def _emit(
out_rows: list[dict[str, str]],
seen: set[tuple],
match_type: str,
left_row: dict[str, str],
right_row: dict[str, str],
left_col: str,
right_col: str,
ratio: float = 0.0,
shared: int = 0,
):
left_raw = left_row.get(left_col, "")
right_raw = right_row.get(right_col, "")
key = (
left_row["__row__"],
right_row["__row__"],
match_type,
)
if key in seen:
return
seen.add(key)
out_rows.append(
{
"match_type": match_type,
"confidence": CONFIDENCE[match_type],
"left_name": left_raw,
"right_name": right_raw,
"left_normalized": normalize_name(left_raw),
"right_normalized": normalize_name(right_raw),
"left_row": left_row["__row__"],
"right_row": right_row["__row__"],
"overlap_ratio": f"{ratio:.3f}" if ratio else "",
"shared_tokens": str(shared) if shared else "",
}
)
def resolve(
left_path: str,
left_col: str,
right_path: str,
right_col: str,
out_path: str,
overlap_threshold: float = 0.60,
min_shared: int = 2,
skip_overlap: bool = False,
) -> int:
left_rows = _read_csv(left_path, left_col)
right_rows = _read_csv(right_path, right_col)
right_exact, right_aggressive = _build_index(right_rows, right_col)
out_rows: list[dict[str, str]] = []
seen: set[tuple] = set()
# Pass 1+2: exact / fuzzy via index lookup.
for lrow in left_rows:
raw = lrow.get(left_col, "")
n = normalize_name(raw)
if not n:
continue
for rrow in right_exact.get(n, []):
_emit(out_rows, seen, "exact", lrow, rrow, left_col, right_col)
a = normalize_aggressive(raw)
if a:
for rrow in right_aggressive.get(a, []):
_emit(out_rows, seen, "fuzzy", lrow, rrow, left_col, right_col)
if not skip_overlap:
# Pass 3: token overlap (O(N*M) — expensive; allow opt-out).
for lrow in left_rows:
l_raw = lrow.get(left_col, "")
if not normalize_name(l_raw):
continue
for rrow in right_rows:
ratio, shared = token_overlap_ratio(
l_raw, rrow.get(right_col, "")
)
if ratio >= overlap_threshold and shared >= min_shared:
_emit(
out_rows,
seen,
"token_overlap",
lrow,
rrow,
left_col,
right_col,
ratio=ratio,
shared=shared,
)
fieldnames = [
"match_type",
"confidence",
"left_name",
"right_name",
"left_normalized",
"right_normalized",
"left_row",
"right_row",
"overlap_ratio",
"shared_tokens",
]
with open(out_path, "w", newline="", encoding="utf-8") as fh:
writer = csv.DictWriter(fh, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(out_rows)
return len(out_rows)
def main() -> int:
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--left", required=True, help="Left CSV path")
p.add_argument(
"--left-name-col", required=True, help="Name column in left CSV"
)
p.add_argument("--right", required=True, help="Right CSV path")
p.add_argument(
"--right-name-col",
required=True,
help="Name column in right CSV",
)
p.add_argument("--out", required=True, help="Output CSV path")
p.add_argument(
"--overlap-threshold",
type=float,
default=0.60,
help="Jaccard overlap threshold for token_overlap tier (default 0.60)",
)
p.add_argument(
"--min-shared",
type=int,
default=2,
help="Minimum shared tokens for token_overlap tier (default 2)",
)
p.add_argument(
"--skip-overlap",
action="store_true",
help="Skip the O(N*M) token_overlap pass (much faster on large CSVs)",
)
args = p.parse_args()
count = resolve(
left_path=args.left,
left_col=args.left_name_col,
right_path=args.right,
right_col=args.right_name_col,
out_path=args.out,
overlap_threshold=args.overlap_threshold,
min_shared=args.min_shared,
skip_overlap=args.skip_overlap,
)
print(f"Wrote {count} match rows to {args.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""Search court records via CourtListener (Free Law Project).
Covers ~10M federal and state court opinions, plus PACER docket data
where available. Public REST API v4 supports anonymous read access for
search; some endpoints require a token (free at courtlistener.com).
Set COURTLISTENER_TOKEN to authenticate (raises rate limits).
"""
from __future__ import annotations
import argparse
import csv
import os
import sys
import urllib.parse
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from _http import get_json # noqa: E402
BASE = "https://www.courtlistener.com/api/rest/v4/search/"
COLUMNS = [
"case_name",
"court",
"court_id",
"date_filed",
"docket_number",
"judge",
"citation",
"result_type",
"snippet",
"absolute_url",
]
SEARCH_TYPES = {
"opinions": "o", # Court opinions
"dockets": "r", # PACER dockets (may require auth depending on coverage)
"oral": "oa", # Oral arguments
"people": "p", # Judges / people
"recap": "r", # Same as dockets in v4
}
def fetch(
query: str,
search_type: str,
court: str | None,
date_from: str | None,
date_to: str | None,
token: str | None,
limit: int,
out_path: str,
) -> int:
type_code = SEARCH_TYPES.get(search_type, search_type)
params = {
"q": query,
"type": type_code,
}
if court:
params["court"] = court
if date_from:
params["filed_after"] = date_from
if date_to:
params["filed_before"] = date_to
headers = {"Authorization": f"Token {token}"} if token else None
rows: list[dict[str, str]] = []
next_url: str | None = f"{BASE}?{urllib.parse.urlencode(params)}"
while next_url and len(rows) < limit:
try:
payload = get_json(next_url, headers=headers)
except Exception as e: # noqa: BLE001
print(f"CourtListener error: {e}", file=sys.stderr)
break
if not isinstance(payload, dict):
break
results = payload.get("results", [])
for r in results:
if len(rows) >= limit:
break
rows.append(
{
"case_name": r.get("caseName", "") or r.get("case_name", "") or "",
"court": r.get("court", "") or "",
"court_id": r.get("court_id", "") or "",
"date_filed": (r.get("dateFiled", "") or r.get("date_filed", "") or "")[:10],
"docket_number": r.get("docketNumber", "") or r.get("docket_number", "") or "",
"judge": r.get("judge", "") or "",
"citation": "; ".join(r.get("citation", []) or []) if isinstance(r.get("citation"), list) else (r.get("citation") or ""),
"result_type": search_type,
"snippet": (r.get("snippet", "") or "").replace("\n", " ")[:500],
"absolute_url": (
f"https://www.courtlistener.com{r.get('absolute_url', '')}"
if r.get("absolute_url", "").startswith("/")
else r.get("absolute_url", "")
),
}
)
next_url = payload.get("next")
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=COLUMNS)
w.writeheader()
w.writerows(rows)
if not rows:
print(
f"CourtListener: 0 results for type={search_type!r} q={query!r}. "
"Most private individuals don't appear in published court records "
"unless they were party to a federal or state appellate case.",
file=sys.stderr,
)
return len(rows)
def main() -> int:
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--query", required=True, help="Search query (party name, case name, keyword)")
p.add_argument(
"--type",
default="opinions",
choices=list(SEARCH_TYPES.keys()),
help="Search type (default: opinions)",
)
p.add_argument("--court", help="Court ID filter (e.g. 'nysd' = SDNY, 'scotus' = Supreme Court)")
p.add_argument("--date-from", help="Filed-after date YYYY-MM-DD")
p.add_argument("--date-to", help="Filed-before date YYYY-MM-DD")
p.add_argument("--token", default=os.environ.get("COURTLISTENER_TOKEN"))
p.add_argument("--limit", type=int, default=100)
p.add_argument("--out", required=True)
a = p.parse_args()
n = fetch(
query=a.query,
search_type=a.type,
court=a.court,
date_from=a.date_from,
date_to=a.date_to,
token=a.token,
limit=a.limit,
out_path=a.out,
)
print(f"Wrote {n} CourtListener rows to {a.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""Search the GDELT 2.0 DOC API for news mentions.
GDELT monitors world news in 100+ languages and indexes the full text.
Free, anonymous, ~15-minute update frequency. Covers ~2015present.
Useful for surfacing news mentions of a person, company, or topic across
international media much wider net than Google News.
"""
from __future__ import annotations
import argparse
import csv
import json
import sys
import time
import urllib.parse
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from _http import get_json # noqa: E402
BASE = "https://api.gdeltproject.org/api/v2/doc/doc"
COLUMNS = [
"title",
"url",
"seen_date",
"domain",
"language",
"source_country",
"tone",
"social_image",
]
def fetch(
query: str,
mode: str,
timespan: str | None,
start_datetime: str | None,
end_datetime: str | None,
source_country: str | None,
source_lang: str | None,
limit: int,
out_path: str,
) -> int:
params: dict[str, str] = {
"query": query,
"mode": mode,
"format": "json",
"maxrecords": str(min(limit, 250)),
"sort": "datedesc",
}
if timespan:
params["timespan"] = timespan
if start_datetime:
params["startdatetime"] = start_datetime.replace("-", "").replace(":", "").replace(" ", "")
if end_datetime:
params["enddatetime"] = end_datetime.replace("-", "").replace(":", "").replace(" ", "")
if source_country:
params["sourcecountry"] = source_country
if source_lang:
params["sourcelang"] = source_lang
url = f"{BASE}?{urllib.parse.urlencode(params)}"
payload: dict | list = {}
for attempt in range(3):
try:
payload = get_json(url)
break
except RuntimeError as e:
# GDELT requires 1 request per 5 seconds; back off and retry.
if "429" in str(e) and attempt < 2:
print(
f"GDELT throttle hit; sleeping 6s before retry "
f"(attempt {attempt + 1}/3)",
file=sys.stderr,
)
time.sleep(6)
continue
print(f"GDELT error: {e}", file=sys.stderr)
payload = {}
break
except Exception as e: # noqa: BLE001
print(f"GDELT error: {e}", file=sys.stderr)
payload = {}
break
rows: list[dict[str, str]] = []
if isinstance(payload, dict):
articles = payload.get("articles", []) or []
for a in articles[:limit]:
seen = (a.get("seendate") or "")
# GDELT format: 20260319T083000Z → 2026-03-19 08:30:00Z
if len(seen) == 16 and "T" in seen:
seen = f"{seen[0:4]}-{seen[4:6]}-{seen[6:8]} {seen[9:11]}:{seen[11:13]}:{seen[13:15]}Z"
rows.append(
{
"title": (a.get("title") or "").replace("\n", " ").strip(),
"url": a.get("url") or "",
"seen_date": seen,
"domain": a.get("domain") or "",
"language": a.get("language") or "",
"source_country": a.get("sourcecountry") or "",
"tone": str(a.get("tone") or ""),
"social_image": a.get("socialimage") or "",
}
)
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=COLUMNS)
w.writeheader()
w.writerows(rows)
if not rows:
print(
f"GDELT: 0 articles for query={query!r}. "
"GDELT indexes ~2015→present. Try widening the timespan or "
"checking the query syntax (https://blog.gdeltproject.org/gdelt-doc-2-0-api-debuts/).",
file=sys.stderr,
)
return len(rows)
def main() -> int:
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--query", required=True, help='Search query (supports GDELT operators: quoted phrases, AND/OR/NOT, sourcecountry:, theme:)')
p.add_argument(
"--mode",
default="ArtList",
choices=["ArtList", "ImageCollage", "TimelineVol", "TimelineTone", "ToneChart"],
help="GDELT mode (default ArtList for article list)",
)
p.add_argument(
"--timespan",
help="Relative window: e.g. '1d', '1w', '1m', '3m', '1y' (overrides start/end)",
)
p.add_argument("--start", help="Absolute start YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS")
p.add_argument("--end", help="Absolute end YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS")
p.add_argument("--source-country", help="2-letter source country (e.g. US, UK)")
p.add_argument("--source-lang", help="Source language (e.g. English, Spanish)")
p.add_argument("--limit", type=int, default=100)
p.add_argument("--out", required=True)
a = p.parse_args()
n = fetch(
query=a.query,
mode=a.mode,
timespan=a.timespan,
start_datetime=a.start,
end_datetime=a.end,
source_country=a.source_country,
source_lang=a.source_lang,
limit=a.limit,
out_path=a.out,
)
print(f"Wrote {n} GDELT article rows to {a.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,234 @@
#!/usr/bin/env python3
"""Search ICIJ Offshore Leaks via the bulk CSV database.
The old reconcile endpoint (https://offshoreleaks.icij.org/reconcile) returns
404 ICIJ has removed it. The remaining stable access path is the public
bulk download:
https://offshoreleaks-data.icij.org/offshoreleaks/csv/full-oldb.LATEST.zip
~70 MB, ~6 CSVs inside (nodes-entities, nodes-officers, nodes-intermediaries,
nodes-addresses, relationships, ...). We cache it under
$HERMES_OSINT_CACHE/icij/ (default: ~/.cache/hermes-osint/icij/) and search
locally so the agent doesn't re-download for every query.
Output CSV columns match the original `fetch_icij_offshore.py` contract.
"""
from __future__ import annotations
import argparse
import csv
import io
import os
import re
import sys
import time
import urllib.request
import zipfile
from pathlib import Path
BULK_URL = "https://offshoreleaks-data.icij.org/offshoreleaks/csv/full-oldb.LATEST.zip"
COLUMNS = [
"node_id",
"name",
"node_type",
"country_codes",
"countries",
"jurisdiction",
"incorporation_date",
"inactivation_date",
"source",
"entity_url",
"connections",
]
def _cache_dir() -> Path:
base = os.environ.get("HERMES_OSINT_CACHE")
if base:
return Path(base) / "icij"
return Path.home() / ".cache" / "hermes-osint" / "icij"
def _download(dest: Path, force: bool = False) -> Path:
"""Download (or reuse cached) ICIJ bulk ZIP."""
dest.mkdir(parents=True, exist_ok=True)
zip_path = dest / "full-oldb.zip"
if zip_path.exists() and not force:
# Re-check age: refetch if older than 30 days.
age_days = (time.time() - zip_path.stat().st_mtime) / 86400
if age_days < 30:
return zip_path
print(f"Downloading ICIJ bulk database (~70 MB) to {zip_path}", file=sys.stderr)
req = urllib.request.Request(
BULK_URL,
headers={"User-Agent": "hermes-agent osint-investigation skill"},
)
with urllib.request.urlopen(req, timeout=120) as resp: # noqa: S310
tmp = zip_path.with_suffix(".zip.tmp")
with open(tmp, "wb") as fh:
while True:
chunk = resp.read(1 << 16)
if not chunk:
break
fh.write(chunk)
tmp.replace(zip_path)
return zip_path
def _open_csv(zf: zipfile.ZipFile, name_pattern: str):
"""Open the first CSV matching name_pattern (case-insensitive substring)."""
for info in zf.infolist():
if name_pattern.lower() in info.filename.lower() and info.filename.lower().endswith(".csv"):
return zf.open(info), info.filename
return None, None
def _match(needle_norm: str, hay: str) -> bool:
return needle_norm in (hay or "").upper()
def _normalize_query(s: str) -> str:
s = s.upper()
s = re.sub(r"[^\w\s]", " ", s)
s = re.sub(r"\s+", " ", s).strip()
return s
def fetch(
entity: str | None,
officer: str | None,
jurisdiction: str | None,
out_path: str,
cache_dir: Path,
force_refresh: bool = False,
limit: int = 500,
) -> int:
zip_path = _download(cache_dir, force=force_refresh)
rows: list[dict[str, str]] = []
needles: list[tuple[str, str]] = [] # (kind, normalized needle)
if entity:
needles.append(("Entity", _normalize_query(entity)))
if officer:
needles.append(("Officer", _normalize_query(officer)))
jur_norm = _normalize_query(jurisdiction) if jurisdiction else None
targets = [
("Entity", "nodes-entities"),
("Officer", "nodes-officers"),
("Intermediary", "nodes-intermediaries"),
]
with zipfile.ZipFile(zip_path) as zf:
for node_type, csv_substring in targets:
relevant_needles = [n for (k, n) in needles if k in (node_type, "Entity", "Officer")] or []
# Only scan a CSV if we have a needle that could plausibly match it,
# or if we have ONLY a jurisdiction filter.
applicable_needles = [n for (k, n) in needles if k == node_type]
if needles and not applicable_needles and not jur_norm:
continue
stream, fname = _open_csv(zf, csv_substring)
if not stream:
continue
with stream:
text = io.TextIOWrapper(stream, encoding="utf-8", errors="replace")
reader = csv.DictReader(text)
for row in reader:
name = (row.get("name") or "").strip()
if not name:
continue
name_u = name.upper()
matched = False
for n in applicable_needles or relevant_needles:
if _match(n, name_u):
matched = True
break
if not needles:
matched = True # jurisdiction-only sweep
if not matched:
continue
jur = (row.get("jurisdiction_description") or row.get("country_codes") or "").strip()
if jur_norm and jur_norm not in jur.upper() and jur_norm not in (row.get("countries") or "").upper():
continue
node_id = (row.get("node_id") or "").strip()
rows.append(
{
"node_id": node_id,
"name": name,
"node_type": node_type,
"country_codes": row.get("country_codes", "") or "",
"countries": row.get("countries", "") or "",
"jurisdiction": jur,
"incorporation_date": row.get("incorporation_date", "") or "",
"inactivation_date": row.get("inactivation_date", "") or "",
"source": row.get("sourceID", "") or row.get("source", "") or "",
"entity_url": (
f"https://offshoreleaks.icij.org/nodes/{node_id}" if node_id else ""
),
"connections": "",
}
)
if len(rows) >= limit:
break
if len(rows) >= limit:
break
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=COLUMNS)
w.writeheader()
w.writerows(rows)
if not rows:
bits = []
if entity:
bits.append(f"entity={entity!r}")
if officer:
bits.append(f"officer={officer!r}")
if jurisdiction:
bits.append(f"jurisdiction={jurisdiction!r}")
print(
f"ICIJ: 0 matches for {', '.join(bits)}. "
"The bulk database covers offshore leaks (Panama, Paradise, Pandora, "
"Bahamas, Offshore Leaks). Most private US individuals are NOT in it.",
file=sys.stderr,
)
return len(rows)
def main() -> int:
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--entity", help="Search by entity name (substring, case-insensitive)")
p.add_argument("--officer", help="Search by officer / individual name (substring, case-insensitive)")
p.add_argument("--jurisdiction", help="Filter results by jurisdiction substring")
p.add_argument("--limit", type=int, default=500)
p.add_argument("--out", required=True)
p.add_argument(
"--cache-dir",
type=Path,
default=None,
help="Override cache directory (default: $HERMES_OSINT_CACHE/icij or ~/.cache/hermes-osint/icij)",
)
p.add_argument(
"--force-refresh",
action="store_true",
help="Re-download the bulk ZIP even if a recent cached copy exists.",
)
a = p.parse_args()
if not (a.entity or a.officer or a.jurisdiction):
p.error("must supply at least one of --entity / --officer / --jurisdiction")
n = fetch(
entity=a.entity,
officer=a.officer,
jurisdiction=a.jurisdiction,
out_path=a.out,
cache_dir=a.cache_dir or _cache_dir(),
force_refresh=a.force_refresh,
limit=a.limit,
)
print(f"Wrote {n} ICIJ Offshore Leaks rows to {a.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""Search NYC property records via ACRIS (Automated City Register Information System).
Uses the city's Socrata-backed open data API. No auth required for read access.
Datasets:
bnx9-e6tj Real Property Master (one row per recorded document)
636b-3b5g Real Property Parties (names grantor, grantee, etc.)
8h5j-fqxa Real Property Legal (lot / property identifiers)
uqqa-hym2 Real Property References
The Parties dataset has the names. We search by name and optionally join to
Master to get the doc type and date.
"""
from __future__ import annotations
import argparse
import csv
import sys
import urllib.parse
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from _http import get_json # noqa: E402
PARTIES_URL = "https://data.cityofnewyork.us/resource/636b-3b5g.json"
MASTER_URL = "https://data.cityofnewyork.us/resource/bnx9-e6tj.json"
PARTY_TYPE = {
"1": "grantor (seller / mortgagor / debtor)",
"2": "grantee (buyer / mortgagee / creditor)",
"3": "other party",
}
BOROUGH = {
"1": "Manhattan",
"2": "Bronx",
"3": "Brooklyn",
"4": "Queens",
"5": "Staten Island",
}
COLUMNS = [
"document_id",
"name",
"party_type",
"party_role",
"address_1",
"address_2",
"city",
"state",
"zip",
"country",
"doc_type",
"doc_date",
"recorded_date",
"borough",
"amount",
"filing_url",
]
def _filing_url(document_id: str) -> str:
if not document_id:
return ""
return (
f"https://a836-acris.nyc.gov/DS/DocumentSearch/DocumentImageView?doc_id={document_id}"
)
def fetch(
name: str | None,
address: str | None,
party_type: str | None,
limit: int,
out_path: str,
enrich: bool = True,
) -> int:
if not (name or address):
raise SystemExit("must supply --name or --address")
where_clauses: list[str] = []
if name:
safe = name.upper().replace("'", "''")
where_clauses.append(f"upper(name) like '%{safe}%'")
if address:
safe_addr = address.upper().replace("'", "''")
where_clauses.append(f"upper(address_1) like '%{safe_addr}%'")
if party_type and party_type in {"1", "2", "3"}:
where_clauses.append(f"party_type='{party_type}'")
params = {
"$where": " AND ".join(where_clauses),
"$limit": str(limit),
}
url = f"{PARTIES_URL}?{urllib.parse.urlencode(params)}"
parties = get_json(url)
if not isinstance(parties, list):
raise SystemExit(f"Unexpected ACRIS response: {parties!r}")
# Enrich with master record (doc_type, dates, borough, amount).
doc_ids: list[str] = sorted({
d for d in (p.get("document_id") for p in parties) if d
})
masters: dict[str, dict] = {}
if enrich and doc_ids:
# Batch up to 100 doc_ids per request (Socrata IN-list is fine for this).
for i in range(0, len(doc_ids), 100):
chunk = doc_ids[i : i + 100]
id_list = ",".join(f"'{d}'" for d in chunk)
master_params = {
"$where": f"document_id in ({id_list})",
"$limit": "100",
}
url = f"{MASTER_URL}?{urllib.parse.urlencode(master_params)}"
try:
rows = get_json(url)
except Exception as e: # noqa: BLE001
print(f"ACRIS master lookup failed for chunk: {e}", file=sys.stderr)
continue
if isinstance(rows, list):
for r in rows:
did = r.get("document_id", "")
if did:
masters[did] = r
out_rows: list[dict[str, str]] = []
for p in parties:
did = p.get("document_id", "") or ""
m = masters.get(did, {})
out_rows.append(
{
"document_id": did,
"name": p.get("name", "") or "",
"party_type": p.get("party_type", "") or "",
"party_role": PARTY_TYPE.get(p.get("party_type", ""), ""),
"address_1": p.get("address_1", "") or "",
"address_2": p.get("address_2", "") or "",
"city": p.get("city", "") or "",
"state": p.get("state", "") or "",
"zip": p.get("zip", "") or "",
"country": p.get("country", "") or "",
"doc_type": m.get("doc_type", "") or "",
"doc_date": (m.get("document_date", "") or "")[:10],
"recorded_date": (m.get("recorded_datetime", "") or "")[:10],
"borough": BOROUGH.get(m.get("recorded_borough", ""), m.get("recorded_borough", "")),
"amount": m.get("document_amt", "") or "",
"filing_url": _filing_url(did),
}
)
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=COLUMNS)
w.writeheader()
w.writerows(out_rows)
if not out_rows:
filters = []
if name:
filters.append(f"name={name!r}")
if address:
filters.append(f"address={address!r}")
print(
f"NYC ACRIS: 0 records for {', '.join(filters)}. "
"ACRIS covers ONLY NYC (5 boroughs). For property records elsewhere, "
"search the relevant county recorder directly.",
file=sys.stderr,
)
return len(out_rows)
def main() -> int:
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("--name", help="Party name substring (case-insensitive)")
p.add_argument("--address", help="Address line 1 substring")
p.add_argument(
"--party-type",
choices=["1", "2", "3"],
help="Filter party type: 1=grantor (seller/mortgagor), 2=grantee (buyer/mortgagee), 3=other",
)
p.add_argument("--limit", type=int, default=200)
p.add_argument(
"--no-enrich",
action="store_true",
help="Skip the master-document lookup that adds doc_type/date/amount",
)
p.add_argument("--out", required=True)
a = p.parse_args()
n = fetch(
name=a.name,
address=a.address,
party_type=a.party_type,
limit=a.limit,
out_path=a.out,
enrich=not a.no_enrich,
)
print(f"Wrote {n} NYC ACRIS rows to {a.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,175 @@
#!/usr/bin/env python3
"""Fetch OFAC SDN list (CSV format) and normalize.
Public endpoint: https://www.treasury.gov/ofac/downloads/sdn.csv
Format reference: https://ofac.treasury.gov/specially-designated-nationals-and-blocked-persons-list-sdn-human-readable-lists
The SDN CSV uses a specific 12-column format with no header row:
ent_num, sdn_name, sdn_type, program, title, call_sign, vess_type,
tonnage, grt, vess_flag, vess_owner, remarks
Address and AKA records live in separate files. We fetch all three and join.
"""
from __future__ import annotations
import argparse
import csv
import io
import sys
from collections import defaultdict
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from _http import get # noqa: E402
SDN_URL = "https://www.treasury.gov/ofac/downloads/sdn.csv"
ADD_URL = "https://www.treasury.gov/ofac/downloads/add.csv"
ALT_URL = "https://www.treasury.gov/ofac/downloads/alt.csv"
SDN_COLS = [
"ent_num", "sdn_name", "sdn_type", "program", "title",
"call_sign", "vess_type", "tonnage", "grt", "vess_flag",
"vess_owner", "remarks",
]
ADD_COLS = [
"ent_num", "add_num", "address", "city_state_zip", "country", "add_remarks",
]
ALT_COLS = [
"ent_num", "alt_num", "alt_type", "alt_name", "alt_remarks",
]
COLUMNS = [
"entity_id",
"name",
"entity_type",
"program_list",
"title",
"nationalities",
"aka_list",
"addresses",
"dob",
"pob",
"remarks",
"last_updated",
]
_TYPE_MAP = {
"individual": "individual",
"entity": "entity",
"vessel": "vessel",
"aircraft": "aircraft",
}
def _read_csv(url: str, columns: list[str]) -> list[dict[str, str]]:
body = get(url, timeout=60).decode("latin-1", errors="replace")
reader = csv.reader(io.StringIO(body))
out = []
for row in reader:
if not row:
continue
# Pad/truncate to expected width.
row = row[: len(columns)] + [""] * (len(columns) - len(row))
out.append(dict(zip(columns, row)))
return out
def _strip_quotes(s: str) -> str:
s = s.strip()
if s.startswith('"') and s.endswith('"'):
s = s[1:-1]
if s == "-0-":
return ""
return s
def fetch(
program: str | None,
entity_type: str | None,
out_path: str,
) -> int:
sdn = _read_csv(SDN_URL, SDN_COLS)
addresses = _read_csv(ADD_URL, ADD_COLS)
akas = _read_csv(ALT_URL, ALT_COLS)
addr_by_ent: dict[str, list[str]] = defaultdict(list)
for a in addresses:
ent = _strip_quotes(a["ent_num"])
parts = [
_strip_quotes(a[c])
for c in ("address", "city_state_zip", "country")
if _strip_quotes(a[c])
]
if parts:
addr_by_ent[ent].append(", ".join(parts))
aka_by_ent: dict[str, list[str]] = defaultdict(list)
for k in akas:
ent = _strip_quotes(k["ent_num"])
name = _strip_quotes(k["alt_name"])
if name:
aka_by_ent[ent].append(name)
rows: list[dict[str, str]] = []
for r in sdn:
ent_num = _strip_quotes(r["ent_num"])
if not ent_num:
continue
sdn_type = _TYPE_MAP.get(_strip_quotes(r["sdn_type"]).lower(), _strip_quotes(r["sdn_type"]))
if entity_type and sdn_type != entity_type:
continue
progs = _strip_quotes(r["program"])
if program and program.upper() not in progs.upper().split(";"):
continue
remarks = _strip_quotes(r["remarks"])
# DOB / POB are commonly embedded in remarks for individuals.
dob = ""
pob = ""
if sdn_type == "individual" and remarks:
for chunk in remarks.split(";"):
ch = chunk.strip()
if ch.upper().startswith("DOB"):
dob = ch.split(maxsplit=1)[1] if " " in ch else ""
elif ch.upper().startswith("POB"):
pob = ch.split(maxsplit=1)[1] if " " in ch else ""
rows.append(
{
"entity_id": ent_num,
"name": _strip_quotes(r["sdn_name"]),
"entity_type": sdn_type,
"program_list": "; ".join(p.strip() for p in progs.split(";") if p.strip()),
"title": _strip_quotes(r["title"]),
"nationalities": "", # not in this CSV; available in XML format
"aka_list": "; ".join(aka_by_ent.get(ent_num, [])),
"addresses": "; ".join(addr_by_ent.get(ent_num, [])),
"dob": dob,
"pob": pob,
"remarks": remarks,
"last_updated": "",
}
)
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=COLUMNS)
w.writeheader()
w.writerows(rows)
return len(rows)
def main() -> int:
p = argparse.ArgumentParser(description=__doc__)
p.add_argument("--program", help="Filter to specific sanctions program (e.g. SDGT, IRAN)")
p.add_argument(
"--entity-type",
choices=["individual", "entity", "vessel", "aircraft"],
help="Filter to a specific entity type",
)
p.add_argument("--out", required=True)
a = p.parse_args()
n = fetch(program=a.program, entity_type=a.entity_type, out_path=a.out)
print(f"Wrote {n} OFAC SDN rows to {a.out}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

Some files were not shown because too many files have changed in this diff Show more