diff --git a/.env.example b/.env.example index 0317296ba1..066e93f7c9 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,15 @@ # Optional base URL override (default: Google's OpenAI-compatible endpoint) # GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai +# ============================================================================= +# LLM PROVIDER (Ollama Cloud) +# ============================================================================= +# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint. +# Get your key at: https://ollama.com/settings +# OLLAMA_API_KEY=your_ollama_key_here +# Optional base URL override (default: https://ollama.com/v1) +# OLLAMA_BASE_URL=https://ollama.com/v1 + # ============================================================================= # LLM PROVIDER (z.ai / GLM) # ============================================================================= @@ -145,6 +154,10 @@ # Only override here if you need to force a backend without touching config.yaml: # TERMINAL_ENV=local +# Override the container runtime binary (e.g. to use Podman instead of Docker). +# Useful on systems where Docker's storage driver is broken or unavailable. +# HERMES_DOCKER_BINARY=/usr/local/bin/podman + # Container images (for singularity/docker/modal backends) # TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 # TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20 diff --git a/.envrc b/.envrc index 3550a30f2d..45c59523cb 100644 --- a/.envrc +++ b/.envrc @@ -1 +1,5 @@ +watch_file pyproject.toml uv.lock +watch_file ui-tui/package-lock.json ui-tui/package.json +watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix + use flake diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index 480b236f84..3e78bc61b1 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -1,11 +1,12 @@ name: Deploy Site on: + release: + types: [published] push: branches: [main] paths: - 'website/**' - - 'landingpage/**' - 'skills/**' - 'optional-skills/**' - '.github/workflows/deploy-site.yml' @@ -20,8 +21,14 @@ concurrency: cancel-in-progress: false jobs: - build-and-deploy: - # Only run on the upstream repository, not on forks + deploy-vercel: + if: github.event_name == 'release' + runs-on: ubuntu-latest + steps: + - name: Trigger Vercel Deploy + run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}" + + deploy-docs: if: github.repository == 'NousResearch/hermes-agent' runs-on: ubuntu-latest environment: @@ -65,12 +72,7 @@ jobs: - name: Stage deployment run: | mkdir -p _site/docs - # Landing page at root - cp -r landingpage/* _site/ - # Docusaurus at /docs/ cp -r website/build/* _site/docs/ - # CNAME so GitHub Pages keeps the custom domain between deploys - echo "hermes-agent.nousresearch.com" > _site/CNAME - name: Upload artifact uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3 diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index f9e846e68c..228ee33964 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -3,8 +3,13 @@ name: Docker Build and Publish on: push: branches: [main] - pull_request: - branches: [main] + paths: + - '**/*.py' + - 'pyproject.toml' + - 'uv.lock' + - 'Dockerfile' + - 'docker/**' + - '.github/workflows/docker-publish.yml' release: types: [published] @@ -49,6 +54,14 @@ jobs: - name: Test image starts run: | + # The image runs as the hermes user (UID 10000). GitHub Actions + # creates /tmp/hermes-test root-owned by default, which hermes + # can't write to — chown it to match the in-container UID before + # bind-mounting. Real users doing `docker run -v ~/.hermes:...` + # with their own UID hit the same issue and have their own + # remediations (HERMES_UID env var, or chown locally). + mkdir -p /tmp/hermes-test + sudo chown -R 10000:10000 /tmp/hermes-test docker run --rm \ -v /tmp/hermes-test:/opt/data \ --entrypoint /opt/hermes/docker/entrypoint.sh \ diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml index 4aa0fd321a..417e7b21f8 100644 --- a/.github/workflows/supply-chain-audit.yml +++ b/.github/workflows/supply-chain-audit.yml @@ -3,14 +3,31 @@ name: Supply Chain Audit on: pull_request: types: [opened, synchronize, reopened] + paths: + - '**/*.py' + - '**/*.pth' + - '**/setup.py' + - '**/setup.cfg' + - '**/sitecustomize.py' + - '**/usercustomize.py' + - '**/__init__.pth' permissions: pull-requests: write contents: read +# Narrow, high-signal scanner. Only fires on critical indicators of supply +# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics +# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits, +# Actions version unpinning, outbound POST/PUT) were intentionally +# removed — they fired on nearly every PR and trained reviewers to ignore +# the scanner. Keep this file's checks ruthlessly narrow: if you find +# yourself adding WARNING-tier patterns here again, make a separate +# advisory-only workflow instead. + jobs: scan: - name: Scan PR for supply chain risks + name: Scan PR for critical supply chain risks runs-on: ubuntu-latest steps: - name: Checkout @@ -18,7 +35,7 @@ jobs: with: fetch-depth: 0 - - name: Scan diff for suspicious patterns + - name: Scan diff for critical patterns id: scan env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -28,19 +45,19 @@ jobs: BASE="${{ github.event.pull_request.base.sha }}" HEAD="${{ github.event.pull_request.head.sha }}" - # Get the full diff (added lines only) + # Added lines only, excluding lockfiles. DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true) FINDINGS="" - CRITICAL=false # --- .pth files (auto-execute on Python startup) --- + # The exact mechanism used in the litellm supply chain attack: + # https://github.com/BerriAI/litellm/issues/24512 PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true) if [ -n "$PTH_FILES" ]; then - CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: .pth file added or modified - Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512). + Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. **Files:** \`\`\` @@ -49,13 +66,12 @@ jobs: " fi - # --- base64 + exec/eval combo (the litellm attack pattern) --- + # --- base64 decode + exec/eval on the same line (the litellm attack pattern) --- B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true) if [ -n "$B64_EXEC_HITS" ]; then - CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: base64 decode + exec/eval combo - This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads. + Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads. **Matches:** \`\`\` @@ -64,41 +80,12 @@ jobs: " fi - # --- base64 decode/encode (alone — legitimate uses exist) --- - B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true) - if [ -n "$B64_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: base64 encoding/decoding detected - Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate. - - **Matches (first 20):** - \`\`\` - ${B64_HITS} - \`\`\` - " - fi - - # --- exec/eval with string arguments --- - EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true) - if [ -n "$EXEC_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: exec() or eval() usage - Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches. - - **Matches (first 20):** - \`\`\` - ${EXEC_HITS} - \`\`\` - " - fi - - # --- subprocess with encoded/obfuscated commands --- - PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true) + # --- subprocess with encoded/obfuscated command argument --- + PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true) if [ -n "$PROC_HITS" ]; then - CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: subprocess with encoded/obfuscated command - Subprocess calls with encoded arguments are a strong indicator of payload execution. + Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution. **Matches:** \`\`\` @@ -107,25 +94,12 @@ jobs: " fi - # --- Network calls to non-standard domains --- - EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true) - if [ -n "$EXFIL_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Outbound network calls (POST/PUT) - Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate. - - **Matches (first 10):** - \`\`\` - ${EXFIL_HITS} - \`\`\` - " - fi - - # --- setup.py / setup.cfg install hooks --- - SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true) + # --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) --- + # These execute during pip install or interpreter startup. + SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true) if [ -n "$SETUP_HITS" ]; then FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Install hook files modified + ### 🚨 CRITICAL: Install-hook file added or modified These files can execute code during package installation or interpreter startup. **Files:** @@ -135,114 +109,31 @@ jobs: " fi - # --- Compile/marshal/pickle (code object injection) --- - MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true) - if [ -n "$MARSHAL_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: marshal/pickle/compile usage - These can deserialize or construct executable code objects. - - **Matches:** - \`\`\` - ${MARSHAL_HITS} - \`\`\` - " - fi - - # --- CI/CD workflow files modified --- - WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true) - if [ -n "$WORKFLOW_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: CI/CD workflow files modified - Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added. - - **Files:** - \`\`\` - ${WORKFLOW_HITS} - \`\`\` - " - fi - - # --- Dockerfile / container build files modified --- - DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true) - if [ -n "$DOCKER_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Container build files modified - Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands. - - **Files:** - \`\`\` - ${DOCKER_HITS} - \`\`\` - " - fi - - # --- Dependency manifest files modified --- - DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true) - if [ -n "$DEP_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Dependency manifest files modified - Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources. - - **Files:** - \`\`\` - ${DEP_HITS} - \`\`\` - " - fi - - # --- GitHub Actions version unpinning (mutable tags instead of SHAs) --- - ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true) - if [ -n "$ACTIONS_UNPIN" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: GitHub Actions with mutable version tags - Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised. - - **Matches:** - \`\`\` - ${ACTIONS_UNPIN} - \`\`\` - " - fi - - # --- Output results --- if [ -n "$FINDINGS" ]; then echo "found=true" >> "$GITHUB_OUTPUT" - if [ "$CRITICAL" = true ]; then - echo "critical=true" >> "$GITHUB_OUTPUT" - else - echo "critical=false" >> "$GITHUB_OUTPUT" - fi - # Write findings to a file (multiline env vars are fragile) echo "$FINDINGS" > /tmp/findings.md else echo "found=false" >> "$GITHUB_OUTPUT" - echo "critical=false" >> "$GITHUB_OUTPUT" fi - - name: Post warning comment + - name: Post critical finding comment if: steps.scan.outputs.found == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - SEVERITY="⚠️ Supply Chain Risk Detected" - if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then - SEVERITY="🚨 CRITICAL Supply Chain Risk Detected" - fi + BODY="## 🚨 CRITICAL Supply Chain Risk Detected - BODY="## ${SEVERITY} - - This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging. + This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging. $(cat /tmp/findings.md) --- - *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*" + *Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*" gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)" - name: Fail on critical findings - if: steps.scan.outputs.critical == 'true' + if: steps.scan.outputs.found == 'true' run: | echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details." exit 1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7d0822690a..a92afdfa40 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,8 +3,14 @@ name: Tests on: push: branches: [main] + paths-ignore: + - '**/*.md' + - 'docs/**' pull_request: branches: [main] + paths-ignore: + - '**/*.md' + - 'docs/**' permissions: contents: read @@ -17,7 +23,7 @@ concurrency: jobs: test: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 20 steps: - name: Checkout code uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 diff --git a/.gitignore b/.gitignore index 137793bb1d..8b455cf506 100644 --- a/.gitignore +++ b/.gitignore @@ -54,11 +54,17 @@ environments/benchmarks/evals/ # Web UI build output hermes_cli/web_dist/ +# Web UI assets — synced from @nous-research/ui at build time via +# `npm run sync-assets` (see web/package.json). +web/public/fonts/ +web/public/ds-assets/ + # Release script temp files .release_notes.md mini-swe-agent/ # Nix .direnv/ +.nix-stamps/ result website/static/api/skills-index.json diff --git a/.mailmap b/.mailmap index 0c385c5183..3f093fb5ab 100644 --- a/.mailmap +++ b/.mailmap @@ -105,3 +105,4 @@ tesseracttars-creator xinbenlv SaulJWu angelos +MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> diff --git a/AGENTS.md b/AGENTS.md index e4b998f5ee..8bd979b058 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,7 +13,7 @@ source venv/bin/activate # ALWAYS activate before running Python ``` hermes-agent/ ├── run_agent.py # AIAgent class — core conversation loop -├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call() +├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call() ├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list ├── cli.py # HermesCLI class — interactive CLI orchestrator ├── hermes_state.py # SessionDB — SQLite session store (FTS5 search) @@ -56,6 +56,19 @@ hermes-agent/ │ ├── run.py # Main loop, slash commands, message dispatch │ ├── session.py # SessionStore — conversation persistence │ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot +├── ui-tui/ # Ink (React) terminal UI — `hermes --tui` +│ ├── src/entry.tsx # TTY gate + render() +│ ├── src/app.tsx # Main state machine and UI +│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge +│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks) +│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.) +│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory +│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages) +├── tui_gateway/ # Python JSON-RPC backend for the TUI +│ ├── entry.py # stdio entrypoint +│ ├── server.py # RPC handlers and session logic +│ ├── render.py # Optional rich/ANSI bridge +│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands ├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration) ├── cron/ # Scheduler (jobs.py, scheduler.py) ├── environments/ # RL training environments (Atropos) @@ -179,9 +192,62 @@ if canonical == "mycommand": --- +## TUI Architecture (ui-tui + tui_gateway) + +The TUI is a full replacement for the classic (prompt_toolkit) CLI, activated via `hermes --tui` or `HERMES_TUI=1`. + +### Process Model + +``` +hermes --tui + └─ Node (Ink) ──stdio JSON-RPC── Python (tui_gateway) + │ └─ AIAgent + tools + sessions + └─ renders transcript, composer, prompts, activity +``` + +TypeScript owns the screen. Python owns sessions, tools, model calls, and slash command logic. + +### Transport + +Newline-delimited JSON-RPC over stdio. Requests from Ink, events from Python. See `tui_gateway/server.py` for the full method/event catalog. + +### Key Surfaces + +| Surface | Ink component | Gateway method | +|---------|---------------|----------------| +| Chat streaming | `app.tsx` + `messageLine.tsx` | `prompt.submit` → `message.delta/complete` | +| Tool activity | `thinking.tsx` | `tool.start/progress/complete` | +| Approvals | `prompts.tsx` | `approval.respond` ← `approval.request` | +| Clarify/sudo/secret | `prompts.tsx`, `maskedPrompt.tsx` | `clarify/sudo/secret.respond` | +| Session picker | `sessionPicker.tsx` | `session.list/resume` | +| Slash commands | Local handler + fallthrough | `slash.exec` → `_SlashWorker`, `command.dispatch` | +| Completions | `useCompletion` hook | `complete.slash`, `complete.path` | +| Theming | `theme.ts` + `branding.tsx` | `gateway.ready` with skin data | + +### Slash Command Flow + +1. Built-in client commands (`/help`, `/quit`, `/clear`, `/resume`, `/copy`, `/paste`, etc.) handled locally in `app.tsx` +2. Everything else → `slash.exec` (runs in persistent `_SlashWorker` subprocess) → `command.dispatch` fallback + +### Dev Commands + +```bash +cd ui-tui +npm install # first time +npm run dev # watch mode (rebuilds hermes-ink + tsx --watch) +npm start # production +npm run build # full build (hermes-ink + tsc) +npm run type-check # typecheck only (tsc --noEmit) +npm run lint # eslint +npm run fmt # prettier +npm test # vitest +``` + +--- + ## Adding New Tools -Requires changes in **3 files**: +Requires changes in **2 files**: **1. Create `tools/your_tool.py`:** ```python @@ -204,9 +270,9 @@ registry.register( ) ``` -**2. Add import** in `model_tools.py` `_discover_tools()` list. +**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. -**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. +Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string. @@ -458,13 +524,45 @@ def profile_env(tmp_path, monkeypatch): ## Testing +**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces +hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8, +4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core +developer machine with API keys set diverges from CI in ways that have caused +multiple "works locally, fails in CI" incidents (and the reverse). + ```bash -source venv/bin/activate -python -m pytest tests/ -q # Full suite (~3000 tests, ~3 min) -python -m pytest tests/test_model_tools.py -q # Toolset resolution -python -m pytest tests/test_cli_init.py -q # CLI config loading -python -m pytest tests/gateway/ -q # Gateway tests -python -m pytest tests/tools/ -q # Tool-level tests +scripts/run_tests.sh # full suite, CI-parity +scripts/run_tests.sh tests/gateway/ # one directory +scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test +scripts/run_tests.sh -v --tb=long # pass-through pytest flags ``` +### Why the wrapper (and why the old "just call pytest" doesn't work) + +Five real sources of local-vs-CI drift the script closes: + +| | Without wrapper | With wrapper | +|---|---|---| +| Provider API keys | Whatever is in your env (auto-detects pool) | All `*_API_KEY`/`*_TOKEN`/etc. unset | +| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test | +| Timezone | Local TZ (PDT etc.) | UTC | +| Locale | Whatever is set | C.UTF-8 | +| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI | + +`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest +invocation (including IDE integrations) gets hermetic behavior — but the wrapper +is belt-and-suspenders. + +### Running without the wrapper (only if you must) + +If you can't use the wrapper (e.g. on Windows or inside an IDE that shells +pytest directly), at minimum activate the venv and pass `-n 4`: + +```bash +source venv/bin/activate +python -m pytest tests/ -q -n 4 +``` + +Worker count above 4 will surface test-ordering flakes that CI never sees. + Always run the full suite before pushing changes. diff --git a/Dockerfile b/Dockerfile index 3703823326..a684f9fb31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,26 +21,34 @@ RUN useradd -u 10000 -m -d /opt/data hermes COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/ COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/ -COPY . /opt/hermes WORKDIR /opt/hermes -# Install Node dependencies and Playwright as root (--with-deps needs apt) +# ---------- Layer-cached dependency install ---------- +# Copy only package manifests first so npm install + Playwright are cached +# unless the lockfiles themselves change. +COPY package.json package-lock.json ./ +COPY web/package.json web/package-lock.json web/ + RUN npm install --prefer-offline --no-audit && \ npx playwright install --with-deps chromium --only-shell && \ - cd /opt/hermes/scripts/whatsapp-bridge && \ - npm install --prefer-offline --no-audit && \ + (cd web && npm install --prefer-offline --no-audit) && \ npm cache clean --force -# Hand ownership to hermes user, then install Python deps in a virtualenv -RUN chown -R hermes:hermes /opt/hermes -USER hermes +# ---------- Source code ---------- +# .dockerignore excludes node_modules, so the installs above survive. +COPY --chown=hermes:hermes . . +# Build web dashboard (Vite outputs to hermes_cli/web_dist/) +RUN cd web && npm run build + +# ---------- Python virtualenv ---------- +RUN chown hermes:hermes /opt/hermes +USER hermes RUN uv venv && \ uv pip install --no-cache-dir -e ".[all]" -USER root -RUN chmod +x /opt/hermes/docker/entrypoint.sh - +# ---------- Runtime ---------- +ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist ENV HERMES_HOME=/opt/data VOLUME [ "/opt/data" ] ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ] diff --git a/README.md b/README.md index 07a1404190..622910b3a9 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM. -Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in. +Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in. @@ -141,11 +141,18 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process. -Quick start for contributors: +Quick start for contributors — clone and go with `setup-hermes.sh`: ```bash git clone https://github.com/NousResearch/hermes-agent.git cd hermes-agent +./setup-hermes.sh # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes +./hermes # auto-detects the venv, no need to `source` first +``` + +Manual path (equivalent to the above): + +```bash curl -LsSf https://astral.sh/uv/install.sh | sh uv venv venv --python 3.11 source venv/bin/activate diff --git a/RELEASE_v0.10.0.md b/RELEASE_v0.10.0.md new file mode 100644 index 0000000000..1bfb101568 --- /dev/null +++ b/RELEASE_v0.10.0.md @@ -0,0 +1,27 @@ +# Hermes Agent v0.10.0 (v2026.4.16) + +**Release Date:** April 16, 2026 + +> The Tool Gateway release — paid Nous Portal subscribers can now use web search, image generation, text-to-speech, and browser automation through their existing subscription with zero additional API keys. + +--- + +## ✨ Highlights + +- **Nous Tool Gateway** — Paid [Nous Portal](https://portal.nousresearch.com) subscribers now get automatic access to **web search** (Firecrawl), **image generation** (FAL / FLUX 2 Pro), **text-to-speech** (OpenAI TTS), and **browser automation** (Browser Use) through their existing subscription. No separate API keys needed — just run `hermes model`, select Nous Portal, and pick which tools to enable. Per-tool opt-in via `use_gateway` config, full integration with `hermes tools` and `hermes status`, and the runtime correctly prefers the gateway even when direct API keys exist. Replaces the old hidden `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env var with clean subscription-based detection. ([#11206](https://github.com/NousResearch/hermes-agent/pull/11206), based on work by @jquesnelle; docs: [#11208](https://github.com/NousResearch/hermes-agent/pull/11208)) + +--- + +## 🐛 Bug Fixes & Improvements + +This release includes 180+ commits with numerous bug fixes, platform improvements, and reliability enhancements across the agent core, gateway, CLI, and tool system. Full details will be published in the v0.11.0 changelog. + +--- + +## 👥 Contributors + +- **@jquesnelle** (emozilla) — Original Tool Gateway implementation ([#10799](https://github.com/NousResearch/hermes-agent/pull/10799)), salvaged and shipped in this release + +--- + +**Full Changelog**: [v2026.4.13...v2026.4.16](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.16) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..3cede2885e --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,84 @@ +# Hermes Agent Security Policy + +This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project. + +## 1. Vulnerability Reporting + +Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities. + +### Required Submission Details +- **Title & Severity:** Concise description and CVSS score/rating. +- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`). +- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version. +- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release. +- **Impact:** Explanation of what trust boundary was crossed. + +--- + +## 2. Trust Model + +The core assumption is that Hermes is a **personal agent** with one trusted operator. + +### Operator & Session Trust +- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level. +- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries. +- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing. + +### Dangerous Command Approval +The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`: +- `"on"` (default) — prompts the user to approve dangerous commands. +- `"auto"` — auto-approves after a configurable delay. +- `"off"` — disables the gate entirely (break-glass; see Section 3). + +### Output Redaction +`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations. + +### Skills vs. MCP Servers +- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands. +- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning. + +### Code Execution Sandbox +The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls. + +### Subagents +- **No recursive delegation:** The `delegate_task` tool is disabled for child agents. +- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected. +- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation. + +--- + +## 3. Out of Scope (Non-Vulnerabilities) + +The following scenarios are **not** considered security breaches: +- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox. +- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection. +- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files). +- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability. +- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production. +- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system). + +--- + +## 4. Deployment Hardening & Best Practices + +### Filesystem & Network +- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads. +- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs. +- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment. + +### Skills & Supply Chain +- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal. +- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned. +- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns. + +### Credential Storage +- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control. +- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases. + +--- + +## 5. Disclosure Process + +- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first. +- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com. +- **Credits:** Reporters are credited in release notes unless anonymity is requested. diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py index 7db5747a4d..3089f78c27 100644 --- a/acp_adapter/entry.py +++ b/acp_adapter/entry.py @@ -20,6 +20,46 @@ from pathlib import Path from hermes_constants import get_hermes_home +# Methods clients send as periodic liveness probes. They are not part of the +# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the +# caller — but the supervisor task that dispatches the request then surfaces +# the raised RequestError via ``logging.exception("Background task failed")``, +# which dumps a traceback to stderr every probe interval. Clients like +# acp-bridge already treat the -32601 response as "agent alive", so the +# traceback is pure noise. We keep the protocol response intact and only +# silence the stderr noise for this specific benign case. +_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"}) + + +class _BenignProbeMethodFilter(logging.Filter): + """Suppress acp 'Background task failed' tracebacks caused by unknown + liveness-probe methods (e.g. ``ping``) while leaving every other + background-task error — including method_not_found for any non-probe + method — visible in stderr. + """ + + def filter(self, record: logging.LogRecord) -> bool: + if record.getMessage() != "Background task failed": + return True + exc_info = record.exc_info + if not exc_info: + return True + exc = exc_info[1] + # Imported lazily so this module stays importable when the optional + # ``agent-client-protocol`` dependency is not installed. + try: + from acp.exceptions import RequestError + except ImportError: + return True + if not isinstance(exc, RequestError): + return True + if getattr(exc, "code", None) != -32601: + return True + data = getattr(exc, "data", None) + method = data.get("method") if isinstance(data, dict) else None + return method not in _BENIGN_PROBE_METHODS + + def _setup_logging() -> None: """Route all logging to stderr so stdout stays clean for ACP stdio.""" handler = logging.StreamHandler(sys.stderr) @@ -29,6 +69,7 @@ def _setup_logging() -> None: datefmt="%Y-%m-%d %H:%M:%S", ) ) + handler.addFilter(_BenignProbeMethodFilter()) root = logging.getLogger() root.handlers.clear() root.addHandler(handler) diff --git a/acp_adapter/events.py b/acp_adapter/events.py index 08da40a685..1257f902eb 100644 --- a/acp_adapter/events.py +++ b/acp_adapter/events.py @@ -49,6 +49,7 @@ def make_tool_progress_cb( session_id: str, loop: asyncio.AbstractEventLoop, tool_call_ids: Dict[str, Deque[str]], + tool_call_meta: Dict[str, Dict[str, Any]], ) -> Callable: """Create a ``tool_progress_callback`` for AIAgent. @@ -84,6 +85,16 @@ def make_tool_progress_cb( tool_call_ids[name] = queue queue.append(tc_id) + snapshot = None + if name in {"write_file", "patch", "skill_manage"}: + try: + from agent.display import capture_local_edit_snapshot + + snapshot = capture_local_edit_snapshot(name, args) + except Exception: + logger.debug("Failed to capture ACP edit snapshot for %s", name, exc_info=True) + tool_call_meta[tc_id] = {"args": args, "snapshot": snapshot} + update = build_tool_start(tc_id, name, args) _send_update(conn, session_id, loop, update) @@ -119,6 +130,7 @@ def make_step_cb( session_id: str, loop: asyncio.AbstractEventLoop, tool_call_ids: Dict[str, Deque[str]], + tool_call_meta: Dict[str, Dict[str, Any]], ) -> Callable: """Create a ``step_callback`` for AIAgent. @@ -132,10 +144,12 @@ def make_step_cb( for tool_info in prev_tools: tool_name = None result = None + function_args = None if isinstance(tool_info, dict): tool_name = tool_info.get("name") or tool_info.get("function_name") result = tool_info.get("result") or tool_info.get("output") + function_args = tool_info.get("arguments") or tool_info.get("args") elif isinstance(tool_info, str): tool_name = tool_info @@ -145,8 +159,13 @@ def make_step_cb( tool_call_ids[tool_name] = queue if tool_name and queue: tc_id = queue.popleft() + meta = tool_call_meta.pop(tc_id, {}) update = build_tool_complete( - tc_id, tool_name, result=str(result) if result is not None else None + tc_id, + tool_name, + result=str(result) if result is not None else None, + function_args=function_args or meta.get("args"), + snapshot=meta.get("snapshot"), ) _send_update(conn, session_id, loop, update) if not queue: diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 29f9a10e8b..4685a68a8c 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -26,6 +26,7 @@ from acp.schema import ( McpServerHttp, McpServerSse, McpServerStdio, + ModelInfo, NewSessionResponse, PromptResponse, ResumeSessionResponse, @@ -36,6 +37,7 @@ from acp.schema import ( SessionCapabilities, SessionForkCapabilities, SessionListCapabilities, + SessionModelState, SessionResumeCapabilities, SessionInfo, TextContentBlock, @@ -147,6 +149,98 @@ class HermesACPAgent(acp.Agent): self._conn = conn logger.info("ACP client connected") + @staticmethod + def _encode_model_choice(provider: str | None, model: str | None) -> str: + """Encode a model selection so ACP clients can keep provider context.""" + raw_model = str(model or "").strip() + if not raw_model: + return "" + raw_provider = str(provider or "").strip().lower() + if not raw_provider: + return raw_model + return f"{raw_provider}:{raw_model}" + + def _build_model_state(self, state: SessionState) -> SessionModelState | None: + """Return the ACP model selector payload for editors like Zed.""" + model = str(state.model or getattr(state.agent, "model", "") or "").strip() + provider = getattr(state.agent, "provider", None) or detect_provider() or "openrouter" + + try: + from hermes_cli.models import curated_models_for_provider, normalize_provider, provider_label + + normalized_provider = normalize_provider(provider) + provider_name = provider_label(normalized_provider) + available_models: list[ModelInfo] = [] + seen_ids: set[str] = set() + + for model_id, description in curated_models_for_provider(normalized_provider): + rendered_model = str(model_id or "").strip() + if not rendered_model: + continue + choice_id = self._encode_model_choice(normalized_provider, rendered_model) + if choice_id in seen_ids: + continue + desc_parts = [f"Provider: {provider_name}"] + if description: + desc_parts.append(str(description).strip()) + if rendered_model == model: + desc_parts.append("current") + available_models.append( + ModelInfo( + model_id=choice_id, + name=rendered_model, + description=" • ".join(part for part in desc_parts if part), + ) + ) + seen_ids.add(choice_id) + + current_model_id = self._encode_model_choice(normalized_provider, model) + if current_model_id and current_model_id not in seen_ids: + available_models.insert( + 0, + ModelInfo( + model_id=current_model_id, + name=model, + description=f"Provider: {provider_name} • current", + ), + ) + + if available_models: + return SessionModelState( + available_models=available_models, + current_model_id=current_model_id or available_models[0].model_id, + ) + except Exception: + logger.debug("Could not build ACP model state", exc_info=True) + + if not model: + return None + + fallback_choice = self._encode_model_choice(provider, model) + return SessionModelState( + available_models=[ModelInfo(model_id=fallback_choice, name=model)], + current_model_id=fallback_choice, + ) + + @staticmethod + def _resolve_model_selection(raw_model: str, current_provider: str) -> tuple[str, str]: + """Resolve ``provider:model`` input into the provider and normalized model id.""" + target_provider = current_provider + new_model = raw_model.strip() + + try: + from hermes_cli.models import detect_provider_for_model, parse_model_input + + target_provider, new_model = parse_model_input(new_model, current_provider) + if target_provider == current_provider: + detected = detect_provider_for_model(new_model, current_provider) + if detected: + target_provider, new_model = detected + except Exception: + logger.debug("Provider detection failed, using model as-is", exc_info=True) + + return target_provider, new_model + async def _register_session_mcp_servers( self, state: SessionState, @@ -273,7 +367,10 @@ class HermesACPAgent(acp.Agent): await self._register_session_mcp_servers(state, mcp_servers) logger.info("New session %s (cwd=%s)", state.session_id, cwd) self._schedule_available_commands_update(state.session_id) - return NewSessionResponse(session_id=state.session_id) + return NewSessionResponse( + session_id=state.session_id, + models=self._build_model_state(state), + ) async def load_session( self, @@ -289,7 +386,7 @@ class HermesACPAgent(acp.Agent): await self._register_session_mcp_servers(state, mcp_servers) logger.info("Loaded session %s", session_id) self._schedule_available_commands_update(session_id) - return LoadSessionResponse() + return LoadSessionResponse(models=self._build_model_state(state)) async def resume_session( self, @@ -305,7 +402,7 @@ class HermesACPAgent(acp.Agent): await self._register_session_mcp_servers(state, mcp_servers) logger.info("Resumed session %s", state.session_id) self._schedule_available_commands_update(state.session_id) - return ResumeSessionResponse() + return ResumeSessionResponse(models=self._build_model_state(state)) async def cancel(self, session_id: str, **kwargs: Any) -> None: state = self.session_manager.get_session(session_id) @@ -340,11 +437,20 @@ class HermesACPAgent(acp.Agent): cwd: str | None = None, **kwargs: Any, ) -> ListSessionsResponse: - infos = self.session_manager.list_sessions() - sessions = [ - SessionInfo(session_id=s["session_id"], cwd=s["cwd"]) - for s in infos - ] + infos = self.session_manager.list_sessions(cwd=cwd) + sessions = [] + for s in infos: + updated_at = s.get("updated_at") + if updated_at is not None and not isinstance(updated_at, str): + updated_at = str(updated_at) + sessions.append( + SessionInfo( + session_id=s["session_id"], + cwd=s["cwd"], + title=s.get("title"), + updated_at=updated_at, + ) + ) return ListSessionsResponse(sessions=sessions) # ---- Prompt (core) ------------------------------------------------------ @@ -389,12 +495,13 @@ class HermesACPAgent(acp.Agent): state.cancel_event.clear() tool_call_ids: dict[str, Deque[str]] = defaultdict(deque) + tool_call_meta: dict[str, dict[str, Any]] = {} previous_approval_cb = None if conn: - tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids) + tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta) thinking_cb = make_thinking_cb(conn, session_id, loop) - step_cb = make_step_cb(conn, session_id, loop, tool_call_ids) + step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta) message_cb = make_message_cb(conn, session_id, loop) approval_cb = make_approval_callback(conn.request_permission, loop, session_id) else: @@ -449,6 +556,19 @@ class HermesACPAgent(acp.Agent): self.session_manager.save_session(session_id) final_response = result.get("final_response", "") + if final_response: + try: + from agent.title_generator import maybe_auto_title + + maybe_auto_title( + self.session_manager._get_db(), + session_id, + user_text, + final_response, + state.history, + ) + except Exception: + logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True) if final_response and conn: update = acp.update_agent_message_text(final_response) await conn.session_update(session_id, update) @@ -556,27 +676,15 @@ class HermesACPAgent(acp.Agent): provider = getattr(state.agent, "provider", None) or "auto" return f"Current model: {model}\nProvider: {provider}" - new_model = args.strip() - target_provider = None current_provider = getattr(state.agent, "provider", None) or "openrouter" - - # Auto-detect provider for the requested model - try: - from hermes_cli.models import parse_model_input, detect_provider_for_model - target_provider, new_model = parse_model_input(new_model, current_provider) - if target_provider == current_provider: - detected = detect_provider_for_model(new_model, current_provider) - if detected: - target_provider, new_model = detected - except Exception: - logger.debug("Provider detection failed, using model as-is", exc_info=True) + target_provider, new_model = self._resolve_model_selection(args, current_provider) state.model = new_model state.agent = self.session_manager._make_agent( session_id=state.session_id, cwd=state.cwd, model=new_model, - requested_provider=target_provider or current_provider, + requested_provider=target_provider, ) self.session_manager.save_session(state.session_id) provider_label = getattr(state.agent, "provider", None) or target_provider or current_provider @@ -678,20 +786,30 @@ class HermesACPAgent(acp.Agent): """Switch the model for a session (called by ACP protocol).""" state = self.session_manager.get_session(session_id) if state: - state.model = model_id current_provider = getattr(state.agent, "provider", None) - current_base_url = getattr(state.agent, "base_url", None) - current_api_mode = getattr(state.agent, "api_mode", None) + requested_provider, resolved_model = self._resolve_model_selection( + model_id, + current_provider or "openrouter", + ) + state.model = resolved_model + provider_changed = bool(current_provider and requested_provider != current_provider) + current_base_url = None if provider_changed else getattr(state.agent, "base_url", None) + current_api_mode = None if provider_changed else getattr(state.agent, "api_mode", None) state.agent = self.session_manager._make_agent( session_id=session_id, cwd=state.cwd, - model=model_id, - requested_provider=current_provider, + model=resolved_model, + requested_provider=requested_provider, base_url=current_base_url, api_mode=current_api_mode, ) self.session_manager.save_session(session_id) - logger.info("Session %s: model switched to %s", session_id, model_id) + logger.info( + "Session %s: model switched to %s via provider %s", + session_id, + resolved_model, + requested_provider, + ) return SetSessionModelResponse() logger.warning("Session %s: model switch requested for missing session", session_id) return None diff --git a/acp_adapter/session.py b/acp_adapter/session.py index 4bb823987e..3f5f78f9a1 100644 --- a/acp_adapter/session.py +++ b/acp_adapter/session.py @@ -13,8 +13,12 @@ from hermes_constants import get_hermes_home import copy import json import logging +import os +import re import sys +import time import uuid +from datetime import datetime, timezone from dataclasses import dataclass, field from threading import Lock from typing import Any, Dict, List, Optional @@ -22,6 +26,64 @@ from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) +def _normalize_cwd_for_compare(cwd: str | None) -> str: + raw = str(cwd or ".").strip() + if not raw: + raw = "." + expanded = os.path.expanduser(raw) + + # Normalize Windows drive paths into the equivalent WSL mount form so + # ACP history filters match the same workspace across Windows and WSL. + match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded) + if match: + drive = match.group(1).lower() + tail = match.group(2).replace("\\", "/") + expanded = f"/mnt/{drive}/{tail}" + elif re.match(r"^/mnt/[A-Za-z]/", expanded): + expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}" + + return os.path.normpath(expanded) + + +def _build_session_title(title: Any, preview: Any, cwd: str | None) -> str: + explicit = str(title or "").strip() + if explicit: + return explicit + preview_text = str(preview or "").strip() + if preview_text: + return preview_text + leaf = os.path.basename(str(cwd or "").rstrip("/\\")) + return leaf or "New thread" + + +def _format_updated_at(value: Any) -> str | None: + if value is None: + return None + if isinstance(value, str) and value.strip(): + return value + try: + return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat() + except Exception: + return None + + +def _updated_at_sort_key(value: Any) -> float: + if value is None: + return float("-inf") + if isinstance(value, (int, float)): + return float(value) + raw = str(value).strip() + if not raw: + return float("-inf") + try: + return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp() + except Exception: + try: + return float(raw) + except Exception: + return float("-inf") + + def _acp_stderr_print(*args, **kwargs) -> None: """Best-effort human-readable output sink for ACP stdio sessions. @@ -162,47 +224,78 @@ class SessionManager: logger.info("Forked ACP session %s -> %s", session_id, new_id) return state - def list_sessions(self) -> List[Dict[str, Any]]: + def list_sessions(self, cwd: str | None = None) -> List[Dict[str, Any]]: """Return lightweight info dicts for all sessions (memory + database).""" + normalized_cwd = _normalize_cwd_for_compare(cwd) if cwd else None + db = self._get_db() + persisted_rows: dict[str, dict[str, Any]] = {} + + if db is not None: + try: + for row in db.list_sessions_rich(source="acp", limit=1000): + persisted_rows[str(row["id"])] = dict(row) + except Exception: + logger.debug("Failed to load ACP sessions from DB", exc_info=True) + # Collect in-memory sessions first. with self._lock: seen_ids = set(self._sessions.keys()) - results = [ - { - "session_id": s.session_id, - "cwd": s.cwd, - "model": s.model, - "history_len": len(s.history), - } - for s in self._sessions.values() - ] + results = [] + for s in self._sessions.values(): + history_len = len(s.history) + if history_len <= 0: + continue + if normalized_cwd and _normalize_cwd_for_compare(s.cwd) != normalized_cwd: + continue + persisted = persisted_rows.get(s.session_id, {}) + preview = next( + ( + str(msg.get("content") or "").strip() + for msg in s.history + if msg.get("role") == "user" and str(msg.get("content") or "").strip() + ), + persisted.get("preview") or "", + ) + results.append( + { + "session_id": s.session_id, + "cwd": s.cwd, + "model": s.model, + "history_len": history_len, + "title": _build_session_title(persisted.get("title"), preview, s.cwd), + "updated_at": _format_updated_at( + persisted.get("last_active") or persisted.get("started_at") or time.time() + ), + } + ) # Merge any persisted sessions not currently in memory. - db = self._get_db() - if db is not None: - try: - rows = db.search_sessions(source="acp", limit=1000) - for row in rows: - sid = row["id"] - if sid in seen_ids: - continue - # Extract cwd from model_config JSON. - cwd = "." - mc = row.get("model_config") - if mc: - try: - cwd = json.loads(mc).get("cwd", ".") - except (json.JSONDecodeError, TypeError): - pass - results.append({ - "session_id": sid, - "cwd": cwd, - "model": row.get("model") or "", - "history_len": row.get("message_count") or 0, - }) - except Exception: - logger.debug("Failed to list ACP sessions from DB", exc_info=True) + for sid, row in persisted_rows.items(): + if sid in seen_ids: + continue + message_count = int(row.get("message_count") or 0) + if message_count <= 0: + continue + # Extract cwd from model_config JSON. + session_cwd = "." + mc = row.get("model_config") + if mc: + try: + session_cwd = json.loads(mc).get("cwd", ".") + except (json.JSONDecodeError, TypeError): + pass + if normalized_cwd and _normalize_cwd_for_compare(session_cwd) != normalized_cwd: + continue + results.append({ + "session_id": sid, + "cwd": session_cwd, + "model": row.get("model") or "", + "history_len": message_count, + "title": _build_session_title(row.get("title"), row.get("preview"), session_cwd), + "updated_at": _format_updated_at(row.get("last_active") or row.get("started_at")), + }) + results.sort(key=lambda item: _updated_at_sort_key(item.get("updated_at")), reverse=True) return results def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]: diff --git a/acp_adapter/tools.py b/acp_adapter/tools.py index 52313220b7..067652106e 100644 --- a/acp_adapter/tools.py +++ b/acp_adapter/tools.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import uuid from typing import Any, Dict, List, Optional @@ -96,6 +97,170 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str: return tool_name +def _build_patch_mode_content(patch_text: str) -> List[Any]: + """Parse V4A patch mode input into ACP diff blocks when possible.""" + if not patch_text: + return [acp.tool_content(acp.text_block(""))] + + try: + from tools.patch_parser import OperationType, parse_v4a_patch + + operations, error = parse_v4a_patch(patch_text) + if error or not operations: + return [acp.tool_content(acp.text_block(patch_text))] + + content: List[Any] = [] + for op in operations: + if op.operation == OperationType.UPDATE: + old_chunks: list[str] = [] + new_chunks: list[str] = [] + for hunk in op.hunks: + old_lines = [line.content for line in hunk.lines if line.prefix in (" ", "-")] + new_lines = [line.content for line in hunk.lines if line.prefix in (" ", "+")] + if old_lines or new_lines: + old_chunks.append("\n".join(old_lines)) + new_chunks.append("\n".join(new_lines)) + + old_text = "\n...\n".join(chunk for chunk in old_chunks if chunk) + new_text = "\n...\n".join(chunk for chunk in new_chunks if chunk) + if old_text or new_text: + content.append( + acp.tool_diff_content( + path=op.file_path, + old_text=old_text or None, + new_text=new_text or "", + ) + ) + continue + + if op.operation == OperationType.ADD: + added_lines = [line.content for hunk in op.hunks for line in hunk.lines if line.prefix == "+"] + content.append( + acp.tool_diff_content( + path=op.file_path, + new_text="\n".join(added_lines), + ) + ) + continue + + if op.operation == OperationType.DELETE: + content.append( + acp.tool_diff_content( + path=op.file_path, + old_text=f"Delete file: {op.file_path}", + new_text="", + ) + ) + continue + + if op.operation == OperationType.MOVE: + content.append( + acp.tool_content(acp.text_block(f"Move file: {op.file_path} -> {op.new_path}")) + ) + + return content or [acp.tool_content(acp.text_block(patch_text))] + except Exception: + return [acp.tool_content(acp.text_block(patch_text))] + + +def _strip_diff_prefix(path: str) -> str: + raw = str(path or "").strip() + if raw.startswith(("a/", "b/")): + return raw[2:] + return raw + + +def _parse_unified_diff_content(diff_text: str) -> List[Any]: + """Convert unified diff text into ACP diff content blocks.""" + if not diff_text: + return [] + + content: List[Any] = [] + current_old_path: Optional[str] = None + current_new_path: Optional[str] = None + old_lines: list[str] = [] + new_lines: list[str] = [] + + def _flush() -> None: + nonlocal current_old_path, current_new_path, old_lines, new_lines + if current_old_path is None and current_new_path is None: + return + path = current_new_path if current_new_path and current_new_path != "/dev/null" else current_old_path + if not path or path == "/dev/null": + current_old_path = None + current_new_path = None + old_lines = [] + new_lines = [] + return + content.append( + acp.tool_diff_content( + path=_strip_diff_prefix(path), + old_text="\n".join(old_lines) if old_lines else None, + new_text="\n".join(new_lines), + ) + ) + current_old_path = None + current_new_path = None + old_lines = [] + new_lines = [] + + for line in diff_text.splitlines(): + if line.startswith("--- "): + _flush() + current_old_path = line[4:].strip() + continue + if line.startswith("+++ "): + current_new_path = line[4:].strip() + continue + if line.startswith("@@"): + continue + if current_old_path is None and current_new_path is None: + continue + if line.startswith("+"): + new_lines.append(line[1:]) + elif line.startswith("-"): + old_lines.append(line[1:]) + elif line.startswith(" "): + shared = line[1:] + old_lines.append(shared) + new_lines.append(shared) + + _flush() + return content + + +def _build_tool_complete_content( + tool_name: str, + result: Optional[str], + *, + function_args: Optional[Dict[str, Any]] = None, + snapshot: Any = None, +) -> List[Any]: + """Build structured ACP completion content, falling back to plain text.""" + display_result = result or "" + if len(display_result) > 5000: + display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)" + + if tool_name in {"write_file", "patch", "skill_manage"}: + try: + from agent.display import extract_edit_diff + + diff_text = extract_edit_diff( + tool_name, + result, + function_args=function_args, + snapshot=snapshot, + ) + if isinstance(diff_text, str) and diff_text.strip(): + diff_content = _parse_unified_diff_content(diff_text) + if diff_content: + return diff_content + except Exception: + pass + + return [acp.tool_content(acp.text_block(display_result))] + + # --------------------------------------------------------------------------- # Build ACP content objects for tool-call events # --------------------------------------------------------------------------- @@ -119,9 +284,8 @@ def build_tool_start( new = arguments.get("new_string", "") content = [acp.tool_diff_content(path=path, new_text=new, old_text=old)] else: - # Patch mode — show the patch content as text patch_text = arguments.get("patch", "") - content = [acp.tool_content(acp.text_block(patch_text))] + content = _build_patch_mode_content(patch_text) return acp.start_tool_call( tool_call_id, title, kind=kind, content=content, locations=locations, raw_input=arguments, @@ -178,16 +342,17 @@ def build_tool_complete( tool_call_id: str, tool_name: str, result: Optional[str] = None, + function_args: Optional[Dict[str, Any]] = None, + snapshot: Any = None, ) -> ToolCallProgress: """Create a ToolCallUpdate (progress) event for a completed tool call.""" kind = get_tool_kind(tool_name) - - # Truncate very large results for the UI - display_result = result or "" - if len(display_result) > 5000: - display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)" - - content = [acp.tool_content(acp.text_block(display_result))] + content = _build_tool_complete_content( + tool_name, + result, + function_args=function_args, + snapshot=snapshot, + ) return acp.update_tool_call( tool_call_id, kind=kind, diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index b85f77a9d2..bf2b8a62c5 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -28,19 +28,45 @@ except ImportError: logger = logging.getLogger(__name__) THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000} +# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort). +# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max. +# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh. +# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/ +# agentic work) and downgrade it to max on pre-4.7 adaptive models (which +# is the strongest level they accept). "minimal" is a legacy alias that +# maps to low on every model. See: +# https://platform.claude.com/docs/en/about-claude/models/migration-guide ADAPTIVE_EFFORT_MAP = { - "xhigh": "max", - "high": "high", - "medium": "medium", - "low": "low", + "max": "max", + "xhigh": "xhigh", + "high": "high", + "medium": "medium", + "low": "low", "minimal": "low", } +# Models that accept the "xhigh" output_config.effort level. Opus 4.7 added +# xhigh as a distinct level between high and max; older adaptive-thinking +# models (4.6) reject it with a 400. Keep this substring list in sync with +# the Anthropic migration guide as new model families ship. +_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7") + +# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive +# is the only supported mode; 4.7 additionally forbids manual thinking entirely +# and drops temperature/top_p/top_k). +_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7") + +# Models where temperature/top_p/top_k return 400 if set to non-default values. +# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it. +_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7") + # ── Max output token limits per Anthropic model ─────────────────────── # Source: Anthropic docs + Cline model catalog. Anthropic's API requires # max_tokens as a mandatory field. Previously we hardcoded 16384, which # starves thinking-enabled models (thinking tokens count toward the limit). _ANTHROPIC_OUTPUT_LIMITS = { + # Claude 4.7 + "claude-opus-4-7": 128_000, # Claude 4.6 "claude-opus-4-6": 128_000, "claude-sonnet-4-6": 64_000, @@ -91,11 +117,37 @@ def _get_anthropic_max_output(model: str) -> int: def _supports_adaptive_thinking(model: str) -> bool: - """Return True for Claude 4.6 models that support adaptive thinking.""" - return any(v in model for v in ("4-6", "4.6")) + """Return True for Claude 4.6+ models that support adaptive thinking.""" + return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS) -# Beta headers for enhanced features (sent with ALL auth types) +def _supports_xhigh_effort(model: str) -> bool: + """Return True for models that accept the 'xhigh' adaptive effort level. + + Opus 4.7 introduced xhigh as a distinct level between high and max. + Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max + and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max + when this returns False. + """ + return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS) + + +def _forbids_sampling_params(model: str) -> bool: + """Return True for models that 400 on any non-default temperature/top_p/top_k. + + Opus 4.7 explicitly rejects sampling parameters; later Claude releases are + expected to follow suit. Callers should omit these fields entirely rather + than passing zero/default values (the API rejects anything non-null). + """ + return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS) + + +# Beta headers for enhanced features (sent with ALL auth types). +# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the +# beta headers are still accepted (harmless no-op) but not required. Kept +# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints +# that still gate on the headers continue to get the enhanced features. +# Migration guide: remove these if you no longer support ≤4.5 models. _COMMON_BETAS = [ "interleaved-thinking-2025-05-14", "fine-grained-tool-streaming-2025-05-14", @@ -240,9 +292,15 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]: return _COMMON_BETAS -def build_anthropic_client(api_key: str, base_url: str = None): +def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None): """Create an Anthropic client, auto-detecting setup-tokens vs API keys. + If *timeout* is provided it overrides the default 900s read timeout. The + connect timeout stays at 10s. Callers pass this from the per-provider / + per-model ``request_timeout_seconds`` config so Anthropic-native and + Anthropic-compatible providers respect the same knob as OpenAI-wire + providers. + Returns an anthropic.Anthropic instance. """ if _anthropic_sdk is None: @@ -253,8 +311,9 @@ def build_anthropic_client(api_key: str, base_url: str = None): from httpx import Timeout normalized_base_url = _normalize_base_url_text(base_url) + _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0 kwargs = { - "timeout": Timeout(timeout=900.0, connect=10.0), + "timeout": Timeout(timeout=float(_read_timeout), connect=10.0), } if normalized_base_url: kwargs["base_url"] = normalized_base_url @@ -298,6 +357,33 @@ def build_anthropic_client(api_key: str, base_url: str = None): return _anthropic_sdk.Anthropic(**kwargs) +def build_anthropic_bedrock_client(region: str): + """Create an AnthropicBedrock client for Bedrock Claude models. + + Uses the Anthropic SDK's native Bedrock adapter, which provides full + Claude feature parity: prompt caching, thinking budgets, adaptive + thinking, fast mode — features not available via the Converse API. + + Auth uses the boto3 default credential chain (IAM roles, SSO, env vars). + """ + if _anthropic_sdk is None: + raise ImportError( + "The 'anthropic' package is required for the Bedrock provider. " + "Install it with: pip install 'anthropic>=0.39.0'" + ) + if not hasattr(_anthropic_sdk, "AnthropicBedrock"): + raise ImportError( + "anthropic.AnthropicBedrock not available. " + "Upgrade with: pip install 'anthropic>=0.39.0'" + ) + from httpx import Timeout + + return _anthropic_sdk.AnthropicBedrock( + aws_region=region, + timeout=Timeout(timeout=900.0, connect=10.0), + ) + + def read_claude_code_credentials() -> Optional[Dict[str, Any]]: """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json. @@ -1314,18 +1400,31 @@ def build_anthropic_kwargs( kwargs["tool_choice"] = {"type": "tool", "name": tool_choice} # Map reasoning_config to Anthropic's thinking parameter. - # Claude 4.6 models use adaptive thinking + output_config.effort. + # Claude 4.6+ models use adaptive thinking + output_config.effort. # Older models use manual thinking with budget_tokens. # MiniMax Anthropic-compat endpoints support thinking (manual mode only, # not adaptive). Haiku does NOT support extended thinking — skip entirely. + # + # On 4.7+ the `thinking.display` field defaults to "omitted", which + # silently hides reasoning text that Hermes surfaces in its CLI. We + # request "summarized" so the reasoning blocks stay populated — matching + # 4.6 behavior and preserving the activity-feed UX during long tool runs. if reasoning_config and isinstance(reasoning_config, dict): if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): effort = str(reasoning_config.get("effort", "medium")).lower() budget = THINKING_BUDGET.get(effort, 8000) if _supports_adaptive_thinking(model): - kwargs["thinking"] = {"type": "adaptive"} + kwargs["thinking"] = { + "type": "adaptive", + "display": "summarized", + } + adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium") + # Downgrade xhigh→max on models that don't list xhigh as a + # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh. + if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model): + adaptive_effort = "max" kwargs["output_config"] = { - "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium") + "effort": adaptive_effort, } else: kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget} @@ -1333,6 +1432,15 @@ def build_anthropic_kwargs( kwargs["temperature"] = 1 kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096) + # ── Strip sampling params on 4.7+ ───────────────────────────────── + # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400. + # Callers (auxiliary_client, flush_memories, etc.) may set these for + # older models; drop them here as a safety net so upstream 4.6 → 4.7 + # migrations don't require coordinated edits everywhere. + if _forbids_sampling_params(model): + for _sampling_key in ("temperature", "top_p", "top_k"): + kwargs.pop(_sampling_key, None) + # ── Fast mode (Opus 4.6 only) ──────────────────────────────────── # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x # output speed. Only for native Anthropic endpoints — third-party @@ -1390,12 +1498,20 @@ def normalize_anthropic_response( ) ) - # Map Anthropic stop_reason to OpenAI finish_reason + # Map Anthropic stop_reason to OpenAI finish_reason. + # Newer stop reasons added in Claude 4.5+ / 4.7: + # - refusal: the model declined to answer (cyber safeguards, CSAM, etc.) + # - model_context_window_exceeded: hit context limit (not max_tokens) + # Both need distinct handling upstream — a refusal should surface to the + # user with a clear message, and a context-window overflow should trigger + # compression/truncation rather than be treated as normal end-of-turn. stop_reason_map = { "end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop", + "refusal": "content_filter", + "model_context_window_exceeded": "length", } finish_reason = stop_reason_map.get(response.stop_reason, "stop") diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 49dea65f9b..24260126f8 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -58,6 +58,9 @@ _PROVIDER_ALIASES = { "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini", + "x-ai": "xai", + "x.ai": "xai", + "grok": "xai", "glm": "zai", "z-ai": "zai", "z.ai": "zai", @@ -91,6 +94,87 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: return "custom" return _PROVIDER_ALIASES.get(normalized, normalized) + +_FIXED_TEMPERATURE_MODELS: Dict[str, float] = { + "kimi-for-coding": 0.6, +} + +# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents: +# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed +# value 0.6. Any other value will result in an error." The same lock applies +# to the other k2.* models served on that endpoint. Enumerated explicitly so +# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on +# the standard chat API and third parties) are NOT clamped. +# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart +_KIMI_INSTANT_MODELS: frozenset = frozenset({ + "kimi-k2.5", + "kimi-k2-turbo-preview", + "kimi-k2-0905-preview", +}) +_KIMI_THINKING_MODELS: frozenset = frozenset({ + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", +}) + +# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different +# temperature contract than the Coding Plan endpoint above. Empirically, +# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400 +# "invalid temperature: only 1 is allowed for this model" — the Coding Plan +# lock (0.6 for non-thinking) does not apply. `kimi-k2-turbo-preview` and the +# thinking variants already match the Coding Plan contract on the public +# endpoint, so we only override the models that diverge. +# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the +# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see +# hermes_cli/auth.py:_kimi_base_url_for_key). +_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = { + "kimi-k2.5": 1.0, +} + + +def _fixed_temperature_for_model( + model: Optional[str], + base_url: Optional[str] = None, +) -> Optional[float]: + """Return a required temperature override for models with strict contracts. + + Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on + the k2.5 family. Non-thinking variants require exactly 0.6; thinking + variants require 1.0. An optional ``vendor/`` prefix (e.g. + ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. + + When ``base_url`` points to Moonshot's public chat endpoint + (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public + API only accepts ``temperature=1``, not 0.6. That override takes precedence + over the Coding Plan defaults above. + + Returns ``None`` for every other model, including ``kimi-k2-instruct*`` + which is the separate non-coding K2 family with variable temperature. + """ + normalized = (model or "").strip().lower() + bare = normalized.rsplit("/", 1)[-1] + + # Public Moonshot API has a stricter contract for some models than the + # Coding Plan endpoint — check it first so it wins on conflict. + if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()): + public = _KIMI_PUBLIC_API_OVERRIDES.get(bare) + if public is not None: + logger.debug( + "Forcing temperature=%s for %r on public Moonshot API", public, model + ) + return public + + fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) + if fixed is not None: + logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) + return fixed + if bare in _KIMI_THINKING_MODELS: + logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) + return 1.0 + if bare in _KIMI_INSTANT_MODELS: + logger.debug("Forcing temperature=0.6 for kimi instant model %r", model) + return 0.6 + return None + # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "gemini": "gemini-3-flash-preview", @@ -104,6 +188,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "opencode-zen": "gemini-3-flash", "opencode-go": "glm-5", "kilocode": "google/gemini-3-flash-preview", + "ollama-cloud": "nemotron-3-nano:30b", } # Vision-specific model overrides for direct providers. @@ -112,6 +197,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { # "exotic provider" branch checks this before falling back to the main model. _PROVIDER_VISION_MODELS: Dict[str, str] = { "xiaomi": "mimo-v2-omni", + "zai": "glm-5v-turbo", } # OpenRouter app attribution headers @@ -147,6 +233,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex" _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" +def _codex_cloudflare_headers(access_token: str) -> Dict[str, str]: + """Headers required to avoid Cloudflare 403s on chatgpt.com/backend-api/codex. + + The Cloudflare layer in front of the Codex endpoint whitelists a small set of + first-party originators (``codex_cli_rs``, ``codex_vscode``, ``codex_sdk_ts``, + anything starting with ``Codex``). Requests from non-residential IPs (VPS, + server-hosted agents) that don't advertise an allowed originator are served + a 403 with ``cf-mitigated: challenge`` regardless of auth correctness. + + We pin ``originator: codex_cli_rs`` to match the upstream codex-rs CLI, set + ``User-Agent`` to a codex_cli_rs-shaped string (beats SDK fingerprinting), + and extract ``ChatGPT-Account-ID`` (canonical casing, from codex-rs + ``auth.rs``) out of the OAuth JWT's ``chatgpt_account_id`` claim. + + Malformed tokens are tolerated — we drop the account-ID header rather than + raise, so a bad token still surfaces as an auth error (401) instead of a + crash at client construction. + """ + headers = { + "User-Agent": "codex_cli_rs/0.0.0 (Hermes Agent)", + "originator": "codex_cli_rs", + } + if not isinstance(access_token, str) or not access_token.strip(): + return headers + try: + import base64 + parts = access_token.split(".") + if len(parts) < 2: + return headers + payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) + claims = json.loads(base64.urlsafe_b64decode(payload_b64)) + acct_id = claims.get("https://api.openai.com/auth", {}).get("chatgpt_account_id") + if isinstance(acct_id, str) and acct_id: + headers["ChatGPT-Account-ID"] = acct_id + except Exception: + pass + return headers + + def _to_openai_base_url(base_url: str) -> str: """Normalize an Anthropic-style base URL to OpenAI-compatible format. @@ -513,8 +638,13 @@ class _AnthropicCompletionsAdapter: tool_choice=normalized_tool_choice, is_oauth=self._is_oauth, ) + # Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set + # temperature for models that still accept it. build_anthropic_kwargs + # additionally strips these keys as a safety net — keep both layers. if temperature is not None: - anthropic_kwargs["temperature"] = temperature + from agent.anthropic_adapter import _forbids_sampling_params + if not _forbids_sampling_params(model): + anthropic_kwargs["temperature"] = temperature response = self._client.messages.create(**anthropic_kwargs) assistant_message, finish_reason = normalize_anthropic_response(response) @@ -717,6 +847,11 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if model is None: continue # skip provider if we don't know a valid aux model logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) + if provider_id == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + if is_native_gemini_base_url(base_url): + return GeminiNativeClient(api_key=api_key, base_url=base_url), model extra = {} if "api.kimi.com" in base_url.lower(): extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} @@ -738,6 +873,11 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if model is None: continue # skip provider if we don't know a valid aux model logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model) + if provider_id == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + if is_native_gemini_base_url(base_url): + return GeminiNativeClient(api_key=api_key, base_url=base_url), model extra = {} if "api.kimi.com" in base_url.lower(): extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} @@ -774,6 +914,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: + # Check cross-session rate limit guard before attempting Nous — + # if another session already recorded a 429, skip Nous entirely + # to avoid piling more requests onto the tapped RPH bucket. + try: + from agent.nous_rate_guard import nous_rate_limit_remaining + _remaining = nous_rate_limit_remaining() + if _remaining is not None and _remaining > 0: + logger.debug( + "Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)", + _remaining, + ) + return None, None + except Exception: + pass + nous = _read_nous_auth() if not nous: return None, None @@ -898,7 +1053,52 @@ def _current_custom_base_url() -> str: return custom_base or "" -def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: +def _validate_proxy_env_urls() -> None: + """Fail fast with a clear error when proxy env vars have malformed URLs. + + Common cause: shell config (e.g. .zshrc) with a typo like + ``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...`` + which concatenates 'export' into the port number. Without this + check the OpenAI/httpx client raises a cryptic ``Invalid port`` + error that doesn't name the offending env var. + """ + from urllib.parse import urlparse + + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + value = str(os.environ.get(key) or "").strip() + if not value: + continue + try: + parsed = urlparse(value) + if parsed.scheme: + _ = parsed.port # raises ValueError for e.g. '6153export' + except ValueError as exc: + raise RuntimeError( + f"Malformed proxy environment variable {key}={value!r}. " + "Fix or unset your proxy settings and try again." + ) from exc + + +def _validate_base_url(base_url: str) -> None: + """Reject obviously broken custom endpoint URLs before they reach httpx.""" + from urllib.parse import urlparse + + candidate = str(base_url or "").strip() + if not candidate or candidate.startswith("acp://"): + return + try: + parsed = urlparse(candidate) + if parsed.scheme in {"http", "https"}: + _ = parsed.port # raises ValueError for malformed ports + except ValueError as exc: + raise RuntimeError( + f"Malformed custom endpoint URL: {candidate!r}. " + "Run `hermes setup` or `hermes model` and enter a valid http(s) base URL." + ) from exc + + +def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: runtime = _resolve_custom_runtime() if len(runtime) == 2: custom_base, custom_key = runtime @@ -914,6 +1114,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: if custom_mode == "codex_responses": real_client = OpenAI(api_key=custom_key, base_url=custom_base) return CodexAuxiliaryClient(real_client, model), model + if custom_mode == "anthropic_messages": + # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM, + # LiteLLM proxies, etc.). Must NEVER be treated as OAuth — + # Anthropic OAuth claims only apply to api.anthropic.com. + try: + from agent.anthropic_adapter import build_anthropic_client + real_client = build_anthropic_client(custom_key, custom_base) + except ImportError: + logger.warning( + "Custom endpoint declares api_mode=anthropic_messages but the " + "anthropic SDK is not installed — falling back to OpenAI-wire." + ) + return OpenAI(api_key=custom_key, base_url=custom_base), model + return ( + AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False), + model, + ) return OpenAI(api_key=custom_key, base_url=custom_base), model @@ -934,7 +1151,11 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]: return None, None base_url = _CODEX_AUX_BASE_URL logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=base_url) + real_client = OpenAI( + api_key=codex_token, + base_url=base_url, + default_headers=_codex_cloudflare_headers(codex_token), + ) return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL @@ -994,8 +1215,6 @@ _AUTO_PROVIDER_LABELS = { "_resolve_api_key_provider": "api-key", } -_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"}) - _MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode") @@ -1126,11 +1345,15 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option """Full auto-detection chain. Priority: - 1. If the user's main provider is NOT an aggregator (OpenRouter / Nous), - use their main provider + main model directly. This ensures users on - Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same - provider they already have credentials for — no OpenRouter key needed. - 2. OpenRouter → Nous → custom → Codex → API-key providers (original chain). + 1. User's main provider + main model, regardless of provider type. + This means auxiliary tasks (compression, vision, web extraction, + session search, etc.) use the same model the user configured for + chat. Users on OpenRouter/Nous get their chosen chat model; users + on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the + user's picked model keeps behavior predictable — no surprise + switches to a cheap fallback model for side tasks. + 2. OpenRouter → Nous → custom → Codex → API-key providers (fallback + chain, only used when the main provider has no working client). """ global auxiliary_is_nous, _stale_base_url_warned auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins @@ -1160,11 +1383,16 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option ) _stale_base_url_warned = True - # ── Step 1: non-aggregator main provider → use main model directly ── + # ── Step 1: main provider + main model → use them directly ── + # + # This is the primary aux backend for every user. "auto" means + # "use my main chat model for side tasks as well" — including users + # on aggregators (OpenRouter, Nous) who previously got routed to a + # cheap provider-side default. Explicit per-task overrides set via + # config.yaml (auxiliary..provider) still win over this. main_provider = runtime_provider or _read_main_provider() main_model = runtime_model or _read_main_model() if (main_provider and main_model - and main_provider not in _AGGREGATOR_PROVIDERS and main_provider not in ("auto", "")): resolved_provider = main_provider explicit_base_url = None @@ -1223,6 +1451,13 @@ def _to_async_client(sync_client, model: str): return AsyncCodexAuxiliaryClient(sync_client), model if isinstance(sync_client, AnthropicAuxiliaryClient): return AsyncAnthropicAuxiliaryClient(sync_client), model + try: + from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient + + if isinstance(sync_client, GeminiNativeClient): + return AsyncGeminiNativeClient(sync_client), model + except ImportError: + pass try: from agent.copilot_acp_client import CopilotACPClient if isinstance(sync_client, CopilotACPClient): @@ -1298,6 +1533,7 @@ def resolve_provider_client( Returns: (client, resolved_model) or (None, None) if auth is unavailable. """ + _validate_proxy_env_urls() # Normalise aliases provider = _normalize_aux_provider(provider) @@ -1386,7 +1622,11 @@ def resolve_provider_client( "but no Codex OAuth token found (run: hermes model)") return None, None final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider) - raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + raw_client = OpenAI( + api_key=codex_token, + base_url=_CODEX_AUX_BASE_URL, + default_headers=_codex_cloudflare_headers(codex_token), + ) return (raw_client, final_model) # Standard path: wrap in CodexAuxiliaryClient adapter client, default = _try_codex() @@ -1514,6 +1754,15 @@ def resolve_provider_client( default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "") final_model = _normalize_resolved_model(model or default_model, provider) + if provider == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + if is_native_gemini_base_url(base_url): + client = GeminiNativeClient(api_key=api_key, base_url=base_url) + logger.debug("resolve_provider_client: %s (%s)", provider, final_model) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + # Provider-specific headers headers = {} if "api.kimi.com" in base_url.lower(): @@ -1522,7 +1771,6 @@ def resolve_provider_client( from hermes_cli.models import copilot_default_headers headers.update(copilot_default_headers()) - client = OpenAI(api_key=api_key, base_url=base_url, **({"default_headers": headers} if headers else {})) @@ -1746,34 +1994,31 @@ def resolve_vision_provider_client( if requested == "auto": # Vision auto-detection order: - # 1. Active provider + model (user's main chat config) - # 2. OpenRouter (known vision-capable default model) - # 3. Nous Portal (known vision-capable default model) + # 1. User's main provider + main model (including aggregators). + # _PROVIDER_VISION_MODELS provides per-provider vision model + # overrides when the provider has a dedicated multimodal model + # that differs from the chat model (e.g. xiaomi → mimo-v2-omni, + # zai → glm-5v-turbo). + # 2. OpenRouter (vision-capable aggregator fallback) + # 3. Nous Portal (vision-capable aggregator fallback) # 4. Stop main_provider = _read_main_provider() main_model = _read_main_model() if main_provider and main_provider not in ("auto", ""): - if main_provider in _VISION_AUTO_PROVIDER_ORDER: - # Known strict backend — use its defaults. - sync_client, default_model = _resolve_strict_vision_backend(main_provider) - if sync_client is not None: - return _finalize(main_provider, sync_client, default_model) - else: - # Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.) - # Use provider-specific vision model if available, otherwise main model. - vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model) - rpc_client, rpc_model = resolve_provider_client( - main_provider, vision_model, - api_mode=resolved_api_mode) - if rpc_client is not None: - logger.info( - "Vision auto-detect: using active provider %s (%s)", - main_provider, rpc_model or vision_model, - ) - return _finalize( - main_provider, rpc_client, rpc_model or vision_model) + vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model) + rpc_client, rpc_model = resolve_provider_client( + main_provider, vision_model, + api_mode=resolved_api_mode) + if rpc_client is not None: + logger.info( + "Vision auto-detect: using main provider %s (%s)", + main_provider, rpc_model or vision_model, + ) + return _finalize( + main_provider, rpc_client, rpc_model or vision_model) - # Fall back through aggregators. + # Fall back through aggregators (uses their dedicated vision model, + # not the user's main model) when main provider has no client. for candidate in _VISION_AUTO_PROVIDER_ORDER: if candidate == main_provider: continue # already tried above @@ -1834,9 +2079,15 @@ def auxiliary_max_tokens_param(value: int) -> dict: # Every auxiliary LLM consumer should use these instead of manually # constructing clients and calling .chat.completions.create(). -# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model) +# Client cache: (provider, async_mode, base_url, api_key, api_mode, runtime_key) -> (client, default_model, loop) +# NOTE: loop identity is NOT part of the key. On async cache hits we check +# whether the cached loop is the *current* loop; if not, the stale entry is +# replaced in-place. This bounds cache growth to one entry per unique +# provider config rather than one per (config × event-loop), which previously +# caused unbounded fd accumulation in long-running gateway processes (#10200). _client_cache: Dict[tuple, tuple] = {} _client_cache_lock = threading.Lock() +_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded def neuter_async_httpx_del() -> None: @@ -1969,39 +2220,49 @@ def _get_cached_client( Async clients (AsyncOpenAI) use httpx.AsyncClient internally, which binds to the event loop that was current when the client was created. Using such a client on a *different* loop causes deadlocks or - RuntimeError. To prevent cross-loop issues (especially in gateway - mode where _run_async() may spawn fresh loops in worker threads), the - cache key for async clients includes the current event loop's identity - so each loop gets its own client instance. + RuntimeError. To prevent cross-loop issues, the cache validates on + every async hit that the cached loop is the *current, open* loop. + If the loop changed (e.g. a new gateway worker-thread loop), the stale + entry is replaced in-place rather than creating an additional entry. + + This keeps cache size bounded to one entry per unique provider config, + preventing the fd-exhaustion that previously occurred in long-running + gateways where recycled worker threads created unbounded entries (#10200). """ - # Include loop identity for async clients to prevent cross-loop reuse. - # httpx.AsyncClient (inside AsyncOpenAI) is bound to the loop where it - # was created — reusing it on a different loop causes deadlocks (#2681). - loop_id = 0 + # Resolve the current event loop for async clients so we can validate + # cached entries. Loop identity is NOT in the cache key — instead we + # check at hit time whether the cached loop is still current and open. + # This prevents unbounded cache growth from recycled worker-thread loops + # while still guaranteeing we never reuse a client on the wrong loop + # (which causes deadlocks, see #2681). current_loop = None if async_mode: try: import asyncio as _aio current_loop = _aio.get_event_loop() - loop_id = id(current_loop) except RuntimeError: pass runtime = _normalize_main_runtime(main_runtime) runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () - cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key) + cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) with _client_cache_lock: if cache_key in _client_cache: cached_client, cached_default, cached_loop = _client_cache[cache_key] if async_mode: - # A cached async client whose loop has been closed will raise - # "Event loop is closed" when httpx tries to clean up its - # transport. Discard the stale client and create a fresh one. - if cached_loop is not None and cached_loop.is_closed(): - _force_close_async_httpx(cached_client) - del _client_cache[cache_key] - else: + # Validate: the cached client must be bound to the CURRENT, + # OPEN loop. If the loop changed or was closed, the httpx + # transport inside is dead — force-close and replace. + loop_ok = ( + cached_loop is not None + and cached_loop is current_loop + and not cached_loop.is_closed() + ) + if loop_ok: effective = _compat_model(cached_client, model, cached_default) return cached_client, effective + # Stale — evict and fall through to create a new client. + _force_close_async_httpx(cached_client) + del _client_cache[cache_key] else: effective = _compat_model(cached_client, model, cached_default) return cached_client, effective @@ -2021,6 +2282,12 @@ def _get_cached_client( bound_loop = current_loop with _client_cache_lock: if cache_key not in _client_cache: + # Safety belt: if the cache has grown beyond the max, evict + # the oldest entries (FIFO — dict preserves insertion order). + while len(_client_cache) >= _CLIENT_CACHE_MAX_SIZE: + evict_key, evict_entry = next(iter(_client_cache.items())) + _force_close_async_httpx(evict_entry[0]) + del _client_cache[evict_key] _client_cache[cache_key] = (client, default_model, bound_loop) else: client, default_model, _ = _client_cache[cache_key] @@ -2046,7 +2313,6 @@ def _resolve_task_provider_model( to "custom" and the task uses that direct endpoint. api_mode is one of "chat_completions", "codex_responses", or None (auto-detect). """ - config = {} cfg_provider = None cfg_model = None cfg_base_url = None @@ -2054,16 +2320,7 @@ def _resolve_task_provider_model( cfg_api_mode = None if task: - try: - from hermes_cli.config import load_config - config = load_config() - except ImportError: - config = {} - - aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} - task_config = aux.get(task, {}) if isinstance(aux, dict) else {} - if not isinstance(task_config, dict): - task_config = {} + task_config = _get_auxiliary_task_config(task) cfg_provider = str(task_config.get("provider", "")).strip() or None cfg_model = str(task_config.get("model", "")).strip() or None cfg_base_url = str(task_config.get("base_url", "")).strip() or None @@ -2093,17 +2350,25 @@ def _resolve_task_provider_model( _DEFAULT_AUX_TIMEOUT = 30.0 -def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float: - """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*.""" +def _get_auxiliary_task_config(task: str) -> Dict[str, Any]: + """Return the config dict for auxiliary., or {} when unavailable.""" if not task: - return default + return {} try: from hermes_cli.config import load_config config = load_config() except ImportError: - return default + return {} aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} task_config = aux.get(task, {}) if isinstance(aux, dict) else {} + return task_config if isinstance(task_config, dict) else {} + + +def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float: + """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*.""" + if not task: + return default + task_config = _get_auxiliary_task_config(task) raw = task_config.get("timeout") if raw is not None: try: @@ -2113,6 +2378,15 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float return default +def _get_task_extra_body(task: str) -> Dict[str, Any]: + """Read auxiliary..extra_body and return a shallow copy when valid.""" + task_config = _get_auxiliary_task_config(task) + raw = task_config.get("extra_body") + if isinstance(raw, dict): + return dict(raw) + return {} + + # --------------------------------------------------------------------------- # Anthropic-compatible endpoint detection + image block conversion # --------------------------------------------------------------------------- @@ -2200,6 +2474,19 @@ def _build_call_kwargs( "timeout": timeout, } + fixed_temperature = _fixed_temperature_for_model(model, base_url) + if fixed_temperature is not None: + temperature = fixed_temperature + + # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently + # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on + # flush_memories, 0 on structured-JSON extraction) don't 400 the moment + # the aux model is flipped to 4.7. + if temperature is not None: + from agent.anthropic_adapter import _forbids_sampling_params + if _forbids_sampling_params(model): + temperature = None + if temperature is not None: kwargs["temperature"] = temperature @@ -2300,13 +2587,15 @@ def call_llm( """ resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model( task, provider, model, base_url, api_key) + effective_extra_body = _get_task_extra_body(task) + effective_extra_body.update(extra_body or {}) if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( - provider=provider, - model=model, - base_url=base_url, - api_key=api_key, + provider=resolved_provider if resolved_provider != "auto" else provider, + model=resolved_model or model, + base_url=resolved_base_url or base_url, + api_key=resolved_api_key or api_key, async_mode=False, ) if client is None and resolved_provider != "auto" and not resolved_base_url: @@ -2368,11 +2657,14 @@ def call_llm( task, resolved_provider or "auto", final_model or "default", f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "") + # Pass the client's actual base_url (not just resolved_base_url) so + # endpoint-specific temperature overrides can distinguish + # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes. kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=effective_timeout, extra_body=extra_body, - base_url=resolved_base_url) + tools=tools, timeout=effective_timeout, extra_body=effective_extra_body, + base_url=_base_info or resolved_base_url) # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) _client_base = str(getattr(client, "base_url", "") or "") @@ -2426,7 +2718,8 @@ def call_llm( fb_label, fb_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, - extra_body=extra_body) + extra_body=effective_extra_body, + base_url=str(getattr(fb_client, "base_url", "") or "")) return _validate_llm_response( fb_client.chat.completions.create(**fb_kwargs), task) raise @@ -2508,13 +2801,15 @@ async def async_call_llm( """ resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model( task, provider, model, base_url, api_key) + effective_extra_body = _get_task_extra_body(task) + effective_extra_body.update(extra_body or {}) if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( - provider=provider, - model=model, - base_url=base_url, - api_key=api_key, + provider=resolved_provider if resolved_provider != "auto" else provider, + model=resolved_model or model, + base_url=resolved_base_url or base_url, + api_key=resolved_api_key or api_key, async_mode=True, ) if client is None and resolved_provider != "auto" and not resolved_base_url: @@ -2561,14 +2856,17 @@ async def async_call_llm( effective_timeout = timeout if timeout is not None else _get_task_timeout(task) + # Pass the client's actual base_url (not just resolved_base_url) so + # endpoint-specific temperature overrides can distinguish + # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes. + _client_base = str(getattr(client, "base_url", "") or "") kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=effective_timeout, extra_body=extra_body, - base_url=resolved_base_url) + tools=tools, timeout=effective_timeout, extra_body=effective_extra_body, + base_url=_client_base or resolved_base_url) # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) - _client_base = str(getattr(client, "base_url", "") or "") if _is_anthropic_compat_endpoint(resolved_provider, _client_base): kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) @@ -2604,7 +2902,8 @@ async def async_call_llm( fb_label, fb_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, - extra_body=extra_body) + extra_body=effective_extra_body, + base_url=str(getattr(fb_client, "base_url", "") or "")) # Convert sync fallback client to async async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "") if async_fb_model and async_fb_model != fb_kwargs.get("model"): diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py new file mode 100644 index 0000000000..9e4297581d --- /dev/null +++ b/agent/bedrock_adapter.py @@ -0,0 +1,1098 @@ +"""AWS Bedrock Converse API adapter for Hermes Agent. + +Provides native integration with Amazon Bedrock using the Converse API, +bypassing the OpenAI-compatible endpoint in favor of direct AWS SDK calls. +This enables full access to the Bedrock ecosystem: + + - **Native Converse API**: Unified interface for all Bedrock models + (Claude, Nova, Llama, Mistral, etc.) with streaming support. + - **AWS credential chain**: IAM roles, SSO profiles, environment variables, + instance metadata — zero API key management for AWS-native environments. + - **Dynamic model discovery**: Auto-discovers available foundation models + and cross-region inference profiles via the Bedrock control plane. + - **Guardrails support**: Optional Bedrock Guardrails configuration for + content filtering and safety policies. + - **Inference profiles**: Supports cross-region inference profiles + (us.anthropic.claude-*, global.anthropic.claude-*) for better capacity + and automatic failover. + +Architecture follows the same pattern as ``anthropic_adapter.py``: + - All Bedrock-specific logic is isolated in this module. + - Messages/tools are converted between OpenAI format and Converse format. + - Responses are normalized back to OpenAI-compatible objects for the agent loop. + +Reference: OpenClaw's ``extensions/amazon-bedrock/`` plugin, which implements +the same Converse API integration in TypeScript via ``@aws-sdk/client-bedrock``. + +Requires: ``boto3`` (optional dependency — only needed when using the Bedrock provider). +""" + +import json +import logging +import os +import re +from types import SimpleNamespace +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Lazy boto3 import — only loaded when the Bedrock provider is actually used. +# This keeps startup fast for users who don't use Bedrock. +# --------------------------------------------------------------------------- + +_bedrock_runtime_client_cache: Dict[str, Any] = {} +_bedrock_control_client_cache: Dict[str, Any] = {} + + +def _require_boto3(): + """Import boto3, raising a clear error if not installed.""" + try: + import boto3 + return boto3 + except ImportError: + raise ImportError( + "The 'boto3' package is required for the AWS Bedrock provider. " + "Install it with: pip install boto3\n" + "Or install Hermes with Bedrock support: pip install -e '.[bedrock]'" + ) + + +def _get_bedrock_runtime_client(region: str): + """Get or create a cached ``bedrock-runtime`` client for the given region. + + Uses the default AWS credential chain (env vars → profile → instance role). + """ + if region not in _bedrock_runtime_client_cache: + boto3 = _require_boto3() + _bedrock_runtime_client_cache[region] = boto3.client( + "bedrock-runtime", region_name=region, + ) + return _bedrock_runtime_client_cache[region] + + +def _get_bedrock_control_client(region: str): + """Get or create a cached ``bedrock`` control-plane client for model discovery.""" + if region not in _bedrock_control_client_cache: + boto3 = _require_boto3() + _bedrock_control_client_cache[region] = boto3.client( + "bedrock", region_name=region, + ) + return _bedrock_control_client_cache[region] + + +def reset_client_cache(): + """Clear cached boto3 clients. Used in tests and profile switches.""" + _bedrock_runtime_client_cache.clear() + _bedrock_control_client_cache.clear() + + +# --------------------------------------------------------------------------- +# AWS credential detection +# --------------------------------------------------------------------------- + +# Priority order matches OpenClaw's resolveAwsSdkEnvVarName(): +# 1. AWS_BEARER_TOKEN_BEDROCK (Bedrock-specific bearer token) +# 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (explicit IAM credentials) +# 3. AWS_PROFILE (named profile → SSO, assume-role, etc.) +# 4. Implicit: instance role, ECS task role, Lambda execution role +_AWS_CREDENTIAL_ENV_VARS = [ + "AWS_BEARER_TOKEN_BEDROCK", + "AWS_ACCESS_KEY_ID", + "AWS_PROFILE", + # These are checked by boto3's default chain but we list them for + # has_aws_credentials() detection: + "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", + "AWS_WEB_IDENTITY_TOKEN_FILE", +] + + +def resolve_aws_auth_env_var(env: Optional[Dict[str, str]] = None) -> Optional[str]: + """Return the name of the AWS auth source that is active, or None. + + Checks environment variables first, then falls back to boto3's credential + chain for implicit sources (EC2 IMDS, ECS task role, etc.). + + This mirrors OpenClaw's ``resolveAwsSdkEnvVarName()`` — used to detect + whether the user has any AWS credentials configured without actually + attempting to authenticate. + """ + env = env if env is not None else os.environ + # Bearer token takes highest priority + if env.get("AWS_BEARER_TOKEN_BEDROCK", "").strip(): + return "AWS_BEARER_TOKEN_BEDROCK" + # Explicit access key pair + if (env.get("AWS_ACCESS_KEY_ID", "").strip() + and env.get("AWS_SECRET_ACCESS_KEY", "").strip()): + return "AWS_ACCESS_KEY_ID" + # Named profile (SSO, assume-role, etc.) + if env.get("AWS_PROFILE", "").strip(): + return "AWS_PROFILE" + # Container credentials (ECS, CodeBuild) + if env.get("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", "").strip(): + return "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI" + # Web identity (EKS IRSA) + if env.get("AWS_WEB_IDENTITY_TOKEN_FILE", "").strip(): + return "AWS_WEB_IDENTITY_TOKEN_FILE" + # No env vars — check if boto3 can resolve credentials via IMDS or other + # implicit sources (EC2 instance role, ECS task role, Lambda, etc.) + try: + import botocore.session + session = botocore.session.get_session() + credentials = session.get_credentials() + if credentials is not None: + resolved = credentials.get_frozen_credentials() + if resolved and resolved.access_key: + return "iam-role" + except Exception: + pass + return None + + +def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool: + """Return True if any AWS credential source is detected. + + Checks environment variables first (fast, no I/O), then falls back to + boto3's credential chain which covers EC2 instance roles, ECS task roles, + Lambda execution roles, and other IMDS-based sources that don't set + environment variables. + + This two-tier approach mirrors the pattern from OpenClaw PR #62673: + cloud environments (EC2, ECS, Lambda) provide credentials via instance + metadata, not environment variables. The env-var check is a fast path + for local development; the boto3 fallback covers all cloud deployments. + """ + if resolve_aws_auth_env_var(env) is not None: + return True + # Fall back to boto3's credential resolver — this covers EC2 instance + # metadata (IMDS), ECS container credentials, and other implicit sources + # that don't set environment variables. + try: + import botocore.session + session = botocore.session.get_session() + credentials = session.get_credentials() + if credentials is not None: + resolved = credentials.get_frozen_credentials() + if resolved and resolved.access_key: + return True + except Exception: + pass + return False + + +def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str: + """Resolve the AWS region for Bedrock API calls. + + Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback). + """ + env = env if env is not None else os.environ + return ( + env.get("AWS_REGION", "").strip() + or env.get("AWS_DEFAULT_REGION", "").strip() + or "us-east-1" + ) + + +# --------------------------------------------------------------------------- +# Tool-calling capability detection +# --------------------------------------------------------------------------- +# Some Bedrock models don't support tool/function calling. Sending toolConfig +# to these models causes ValidationException. We maintain a denylist of known +# non-tool-calling model patterns and strip tools for them. +# +# This is a conservative approach: unknown models are assumed to support tools. +# If a model fails with a tool-related ValidationException, add it here. + +_NON_TOOL_CALLING_PATTERNS = [ + "deepseek.r1", # DeepSeek R1 — reasoning only, no tool support + "deepseek-r1", # Alternate ID format + "stability.", # Image generation models + "cohere.embed", # Embedding models + "amazon.titan-embed", # Embedding models +] + + +def _model_supports_tool_use(model_id: str) -> bool: + """Return True if the model is expected to support tool/function calling. + + Models in the denylist are known to reject toolConfig in the Converse API. + Unknown models default to True (assume tool support). + """ + model_lower = model_id.lower() + return not any(pattern in model_lower for pattern in _NON_TOOL_CALLING_PATTERNS) + + +def is_anthropic_bedrock_model(model_id: str) -> bool: + """Return True if the model is an Anthropic Claude model on Bedrock. + + These models should use the AnthropicBedrock SDK path for full feature + parity (prompt caching, thinking budgets, adaptive thinking). + Non-Claude models use the Converse API path. + + Matches: + - ``anthropic.claude-*`` (foundation model IDs) + - ``us.anthropic.claude-*`` (US inference profiles) + - ``global.anthropic.claude-*`` (global inference profiles) + - ``eu.anthropic.claude-*`` (EU inference profiles) + """ + model_lower = model_id.lower() + # Strip regional prefix if present + for prefix in ("us.", "global.", "eu.", "ap.", "jp."): + if model_lower.startswith(prefix): + model_lower = model_lower[len(prefix):] + break + return model_lower.startswith("anthropic.claude") + + +# --------------------------------------------------------------------------- +# Message format conversion: OpenAI → Bedrock Converse +# --------------------------------------------------------------------------- + +def convert_tools_to_converse(tools: List[Dict]) -> List[Dict]: + """Convert OpenAI-format tool definitions to Bedrock Converse ``toolConfig``. + + OpenAI format:: + + {"type": "function", "function": {"name": "...", "description": "...", + "parameters": {"type": "object", "properties": {...}}}} + + Converse format:: + + {"toolSpec": {"name": "...", "description": "...", + "inputSchema": {"json": {"type": "object", "properties": {...}}}}} + """ + if not tools: + return [] + result = [] + for t in tools: + fn = t.get("function", {}) + name = fn.get("name", "") + description = fn.get("description", "") + parameters = fn.get("parameters", {"type": "object", "properties": {}}) + result.append({ + "toolSpec": { + "name": name, + "description": description, + "inputSchema": {"json": parameters}, + } + }) + return result + + +def _convert_content_to_converse(content) -> List[Dict]: + """Convert OpenAI message content (string or list) to Converse content blocks. + + Handles: + - Plain text strings → [{"text": "..."}] + - Content arrays with text/image_url parts → mixed text/image blocks + + Filters out empty text blocks — Bedrock's Converse API rejects messages + where a text content block has an empty ``text`` field (ValidationException: + "text content blocks must be non-empty"). Ref: issue #9486. + """ + if content is None: + return [{"text": " "}] + if isinstance(content, str): + return [{"text": content}] if content.strip() else [{"text": " "}] + if isinstance(content, list): + blocks = [] + for part in content: + if isinstance(part, str): + blocks.append({"text": part}) + continue + if not isinstance(part, dict): + continue + part_type = part.get("type", "") + if part_type == "text": + text = part.get("text", "") + blocks.append({"text": text if text else " "}) + elif part_type == "image_url": + image_url = part.get("image_url", {}) + url = image_url.get("url", "") if isinstance(image_url, dict) else "" + if url.startswith("data:"): + # data:image/jpeg;base64,/9j/4AAQ... + header, _, data = url.partition(",") + media_type = "image/jpeg" + if header.startswith("data:"): + mime_part = header[5:].split(";")[0] + if mime_part: + media_type = mime_part + blocks.append({ + "image": { + "format": media_type.split("/")[-1] if "/" in media_type else "jpeg", + "source": {"bytes": data}, + } + }) + else: + # Remote URL — Converse doesn't support URLs directly, + # include as text reference for the model. + blocks.append({"text": f"[Image: {url}]"}) + return blocks if blocks else [{"text": " "}] + return [{"text": str(content)}] + + +def convert_messages_to_converse( + messages: List[Dict], +) -> Tuple[Optional[List[Dict]], List[Dict]]: + """Convert OpenAI-format messages to Bedrock Converse format. + + Returns ``(system_prompt, converse_messages)`` where: + - ``system_prompt`` is a list of system content blocks (or None) + - ``converse_messages`` is the conversation in Converse format + + Handles: + - System messages → extracted as system prompt + - User messages → ``{"role": "user", "content": [...]}`` + - Assistant messages → ``{"role": "assistant", "content": [...]}`` + - Tool calls → ``{"toolUse": {"toolUseId": ..., "name": ..., "input": ...}}`` + - Tool results → ``{"toolResult": {"toolUseId": ..., "content": [...]}}`` + + Converse requires strict user/assistant alternation. Consecutive messages + with the same role are merged into a single message. + """ + system_blocks: List[Dict] = [] + converse_msgs: List[Dict] = [] + + for msg in messages: + role = msg.get("role", "") + content = msg.get("content") + + if role == "system": + # System messages become the system prompt + if isinstance(content, str) and content.strip(): + system_blocks.append({"text": content}) + elif isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + system_blocks.append({"text": part.get("text", "")}) + elif isinstance(part, str): + system_blocks.append({"text": part}) + continue + + if role == "tool": + # Tool result messages → merge into the preceding user turn + tool_call_id = msg.get("tool_call_id", "") + result_content = content if isinstance(content, str) else json.dumps(content) + tool_result_block = { + "toolResult": { + "toolUseId": tool_call_id, + "content": [{"text": result_content}], + } + } + # In Converse, tool results go in a "user" role message + if converse_msgs and converse_msgs[-1]["role"] == "user": + converse_msgs[-1]["content"].append(tool_result_block) + else: + converse_msgs.append({ + "role": "user", + "content": [tool_result_block], + }) + continue + + if role == "assistant": + content_blocks = [] + # Convert text content + if isinstance(content, str) and content.strip(): + content_blocks.append({"text": content}) + elif isinstance(content, list): + content_blocks.extend(_convert_content_to_converse(content)) + + # Convert tool calls + tool_calls = msg.get("tool_calls", []) + for tc in (tool_calls or []): + fn = tc.get("function", {}) + args_str = fn.get("arguments", "{}") + try: + args_dict = json.loads(args_str) if isinstance(args_str, str) else args_str + except (json.JSONDecodeError, TypeError): + args_dict = {} + content_blocks.append({ + "toolUse": { + "toolUseId": tc.get("id", ""), + "name": fn.get("name", ""), + "input": args_dict, + } + }) + + if not content_blocks: + content_blocks = [{"text": " "}] + + # Merge with previous assistant message if needed (strict alternation) + if converse_msgs and converse_msgs[-1]["role"] == "assistant": + converse_msgs[-1]["content"].extend(content_blocks) + else: + converse_msgs.append({ + "role": "assistant", + "content": content_blocks, + }) + continue + + if role == "user": + content_blocks = _convert_content_to_converse(content) + # Merge with previous user message if needed (strict alternation) + if converse_msgs and converse_msgs[-1]["role"] == "user": + converse_msgs[-1]["content"].extend(content_blocks) + else: + converse_msgs.append({ + "role": "user", + "content": content_blocks, + }) + continue + + # Converse requires the first message to be from the user + if converse_msgs and converse_msgs[0]["role"] != "user": + converse_msgs.insert(0, {"role": "user", "content": [{"text": " "}]}) + + # Converse requires the last message to be from the user + if converse_msgs and converse_msgs[-1]["role"] != "user": + converse_msgs.append({"role": "user", "content": [{"text": " "}]}) + + return (system_blocks if system_blocks else None, converse_msgs) + + +# --------------------------------------------------------------------------- +# Response format conversion: Bedrock Converse → OpenAI +# --------------------------------------------------------------------------- + +def _converse_stop_reason_to_openai(stop_reason: str) -> str: + """Map Bedrock Converse stop reasons to OpenAI finish_reason values.""" + mapping = { + "end_turn": "stop", + "stop_sequence": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "content_filtered": "content_filter", + "guardrail_intervened": "content_filter", + } + return mapping.get(stop_reason, "stop") + + +def normalize_converse_response(response: Dict) -> SimpleNamespace: + """Convert a Bedrock Converse API response to an OpenAI-compatible object. + + The agent loop in ``run_agent.py`` expects responses shaped like + ``openai.ChatCompletion`` — this function bridges the gap. + + Returns a SimpleNamespace with: + - ``.choices[0].message.content`` — text response + - ``.choices[0].message.tool_calls`` — tool call list (if any) + - ``.choices[0].finish_reason`` — stop/tool_calls/length + - ``.usage`` — token usage stats + """ + output = response.get("output", {}) + message = output.get("message", {}) + content_blocks = message.get("content", []) + stop_reason = response.get("stopReason", "end_turn") + + text_parts = [] + tool_calls = [] + + for block in content_blocks: + if "text" in block: + text_parts.append(block["text"]) + elif "toolUse" in block: + tu = block["toolUse"] + tool_calls.append(SimpleNamespace( + id=tu.get("toolUseId", ""), + type="function", + function=SimpleNamespace( + name=tu.get("name", ""), + arguments=json.dumps(tu.get("input", {})), + ), + )) + + # Build the message object + msg = SimpleNamespace( + role="assistant", + content="\n".join(text_parts) if text_parts else None, + tool_calls=tool_calls if tool_calls else None, + ) + + # Build usage stats + usage_data = response.get("usage", {}) + usage = SimpleNamespace( + prompt_tokens=usage_data.get("inputTokens", 0), + completion_tokens=usage_data.get("outputTokens", 0), + total_tokens=( + usage_data.get("inputTokens", 0) + usage_data.get("outputTokens", 0) + ), + ) + + finish_reason = _converse_stop_reason_to_openai(stop_reason) + if tool_calls and finish_reason == "stop": + finish_reason = "tool_calls" + + choice = SimpleNamespace( + index=0, + message=msg, + finish_reason=finish_reason, + ) + + return SimpleNamespace( + choices=[choice], + usage=usage, + model=response.get("modelId", ""), + ) + + +# --------------------------------------------------------------------------- +# Streaming response conversion +# --------------------------------------------------------------------------- + +def normalize_converse_stream_events(event_stream) -> SimpleNamespace: + """Consume a Bedrock ConverseStream event stream and build an OpenAI-compatible response. + + Processes the stream events in order: + - ``messageStart`` — role info + - ``contentBlockStart`` — new text or toolUse block + - ``contentBlockDelta`` — incremental text or toolUse input + - ``contentBlockStop`` — block complete + - ``messageStop`` — stop reason + - ``metadata`` — usage stats + + Returns the same shape as ``normalize_converse_response()``. + """ + return stream_converse_with_callbacks(event_stream) + + +def stream_converse_with_callbacks( + event_stream, + on_text_delta=None, + on_tool_start=None, + on_reasoning_delta=None, + on_interrupt_check=None, +) -> SimpleNamespace: + """Process a Bedrock ConverseStream event stream with real-time callbacks. + + This is the core streaming function that powers both the CLI's live token + display and the gateway's progressive message updates. + + Args: + event_stream: The boto3 ``converse_stream()`` response containing a + ``stream`` key with an iterable of events. + on_text_delta: Called with each text chunk as it arrives. Only fires + when no tool_use blocks have been seen (same semantics as the + Anthropic and chat_completions streaming paths). + on_tool_start: Called with the tool name when a toolUse block begins. + Lets the TUI show a spinner while tool arguments are generated. + on_reasoning_delta: Called with reasoning/thinking text chunks. + Bedrock surfaces thinking via ``reasoning`` content block deltas + on supported models (Claude 4.6+). + on_interrupt_check: Called on each event. Should return True if the + agent has been interrupted and streaming should stop. + + Returns: + An OpenAI-compatible SimpleNamespace response, identical in shape to + ``normalize_converse_response()``. + """ + text_parts: List[str] = [] + tool_calls: List[SimpleNamespace] = [] + current_tool: Optional[Dict] = None + current_text_buffer: List[str] = [] + has_tool_use = False + stop_reason = "end_turn" + usage_data: Dict[str, int] = {} + + for event in event_stream.get("stream", []): + # Check for interrupt + if on_interrupt_check and on_interrupt_check(): + break + + if "contentBlockStart" in event: + start = event["contentBlockStart"].get("start", {}) + if "toolUse" in start: + has_tool_use = True + # Flush any accumulated text + if current_text_buffer: + text_parts.append("".join(current_text_buffer)) + current_text_buffer = [] + current_tool = { + "toolUseId": start["toolUse"].get("toolUseId", ""), + "name": start["toolUse"].get("name", ""), + "input_json": "", + } + if on_tool_start: + on_tool_start(current_tool["name"]) + + elif "contentBlockDelta" in event: + delta = event["contentBlockDelta"].get("delta", {}) + if "text" in delta: + text = delta["text"] + current_text_buffer.append(text) + # Fire text delta callback only when no tool calls are present + # (same semantics as Anthropic/chat_completions streaming) + if on_text_delta and not has_tool_use: + on_text_delta(text) + elif "toolUse" in delta: + if current_tool is not None: + current_tool["input_json"] += delta["toolUse"].get("input", "") + elif "reasoningContent" in delta: + # Claude 4.6+ on Bedrock surfaces thinking via reasoningContent + reasoning = delta["reasoningContent"] + if isinstance(reasoning, dict): + thinking_text = reasoning.get("text", "") + if thinking_text and on_reasoning_delta: + on_reasoning_delta(thinking_text) + + elif "contentBlockStop" in event: + if current_tool is not None: + try: + input_dict = json.loads(current_tool["input_json"]) if current_tool["input_json"] else {} + except (json.JSONDecodeError, TypeError): + input_dict = {} + tool_calls.append(SimpleNamespace( + id=current_tool["toolUseId"], + type="function", + function=SimpleNamespace( + name=current_tool["name"], + arguments=json.dumps(input_dict), + ), + )) + current_tool = None + elif current_text_buffer: + text_parts.append("".join(current_text_buffer)) + current_text_buffer = [] + + elif "messageStop" in event: + stop_reason = event["messageStop"].get("stopReason", "end_turn") + + elif "metadata" in event: + meta_usage = event["metadata"].get("usage", {}) + usage_data = { + "inputTokens": meta_usage.get("inputTokens", 0), + "outputTokens": meta_usage.get("outputTokens", 0), + } + + # Flush remaining text + if current_text_buffer: + text_parts.append("".join(current_text_buffer)) + + msg = SimpleNamespace( + role="assistant", + content="\n".join(text_parts) if text_parts else None, + tool_calls=tool_calls if tool_calls else None, + ) + + usage = SimpleNamespace( + prompt_tokens=usage_data.get("inputTokens", 0), + completion_tokens=usage_data.get("outputTokens", 0), + total_tokens=( + usage_data.get("inputTokens", 0) + usage_data.get("outputTokens", 0) + ), + ) + + finish_reason = _converse_stop_reason_to_openai(stop_reason) + if tool_calls and finish_reason == "stop": + finish_reason = "tool_calls" + + choice = SimpleNamespace( + index=0, + message=msg, + finish_reason=finish_reason, + ) + + return SimpleNamespace( + choices=[choice], + usage=usage, + model="", + ) + + +# --------------------------------------------------------------------------- +# High-level API: call Bedrock Converse +# --------------------------------------------------------------------------- + +def build_converse_kwargs( + model: str, + messages: List[Dict], + tools: Optional[List[Dict]] = None, + max_tokens: int = 4096, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + guardrail_config: Optional[Dict] = None, +) -> Dict[str, Any]: + """Build kwargs for ``bedrock-runtime.converse()`` or ``converse_stream()``. + + Converts OpenAI-format inputs to Converse API parameters. + """ + system_prompt, converse_messages = convert_messages_to_converse(messages) + + kwargs: Dict[str, Any] = { + "modelId": model, + "messages": converse_messages, + "inferenceConfig": { + "maxTokens": max_tokens, + }, + } + + if system_prompt: + kwargs["system"] = system_prompt + + if temperature is not None: + kwargs["inferenceConfig"]["temperature"] = temperature + + if top_p is not None: + kwargs["inferenceConfig"]["topP"] = top_p + + if stop_sequences: + kwargs["inferenceConfig"]["stopSequences"] = stop_sequences + + if tools: + converse_tools = convert_tools_to_converse(tools) + if converse_tools: + # Some Bedrock models don't support tool/function calling (e.g. + # DeepSeek R1, reasoning-only models). Sending toolConfig to + # these models causes a ValidationException → retry loop → failure. + # Strip tools for known non-tool-calling models and warn the user. + # Ref: PR #7920 feedback from @ptlally, pattern from PR #4346. + if _model_supports_tool_use(model): + kwargs["toolConfig"] = {"tools": converse_tools} + else: + logger.warning( + "Model %s does not support tool calling — tools stripped. " + "The agent will operate in text-only mode.", model + ) + + if guardrail_config: + kwargs["guardrailConfig"] = guardrail_config + + return kwargs + + +def call_converse( + region: str, + model: str, + messages: List[Dict], + tools: Optional[List[Dict]] = None, + max_tokens: int = 4096, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + guardrail_config: Optional[Dict] = None, +) -> SimpleNamespace: + """Call Bedrock Converse API (non-streaming) and return an OpenAI-compatible response. + + This is the primary entry point for the agent loop when using the Bedrock provider. + """ + client = _get_bedrock_runtime_client(region) + kwargs = build_converse_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + stop_sequences=stop_sequences, + guardrail_config=guardrail_config, + ) + + response = client.converse(**kwargs) + return normalize_converse_response(response) + + +def call_converse_stream( + region: str, + model: str, + messages: List[Dict], + tools: Optional[List[Dict]] = None, + max_tokens: int = 4096, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + guardrail_config: Optional[Dict] = None, +) -> SimpleNamespace: + """Call Bedrock ConverseStream API and return an OpenAI-compatible response. + + Consumes the full stream and returns the assembled response. For true + streaming with delta callbacks, use ``iter_converse_stream()`` instead. + """ + client = _get_bedrock_runtime_client(region) + kwargs = build_converse_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + stop_sequences=stop_sequences, + guardrail_config=guardrail_config, + ) + + response = client.converse_stream(**kwargs) + return normalize_converse_stream_events(response) + + +# --------------------------------------------------------------------------- +# Model discovery +# --------------------------------------------------------------------------- + +_discovery_cache: Dict[str, Any] = {} +_DISCOVERY_CACHE_TTL_SECONDS = 3600 + + +def reset_discovery_cache(): + """Clear the model discovery cache. Used in tests.""" + _discovery_cache.clear() + + +def discover_bedrock_models( + region: str, + provider_filter: Optional[List[str]] = None, +) -> List[Dict[str, Any]]: + """Discover available Bedrock foundation models and inference profiles. + + Returns a list of model info dicts with keys: + - ``id``: Model ID (e.g. "anthropic.claude-sonnet-4-6-20250514-v1:0") + - ``name``: Human-readable name + - ``provider``: Model provider (e.g. "Anthropic", "Amazon", "Meta") + - ``input_modalities``: List of input types (e.g. ["TEXT", "IMAGE"]) + - ``output_modalities``: List of output types + - ``streaming``: Whether streaming is supported + + Caches results for 1 hour per region to avoid repeated API calls. + + Mirrors OpenClaw's ``discoverBedrockModels()`` in + ``extensions/amazon-bedrock/discovery.ts``. + """ + import time + + cache_key = f"{region}:{','.join(sorted(provider_filter or []))}" + cached = _discovery_cache.get(cache_key) + if cached and (time.time() - cached["timestamp"]) < _DISCOVERY_CACHE_TTL_SECONDS: + return cached["models"] + + try: + client = _get_bedrock_control_client(region) + except Exception as e: + logger.warning("Failed to create Bedrock client for model discovery: %s", e) + return [] + + models = [] + seen_ids = set() + filter_set = {f.lower() for f in (provider_filter or [])} + + # 1. Discover foundation models + try: + response = client.list_foundation_models() + for summary in response.get("modelSummaries", []): + model_id = (summary.get("modelId") or "").strip() + if not model_id: + continue + + # Apply provider filter + if filter_set: + provider_name = (summary.get("providerName") or "").lower() + model_prefix = model_id.split(".")[0].lower() if "." in model_id else "" + if provider_name not in filter_set and model_prefix not in filter_set: + continue + + # Only include active, streaming-capable, text-output models + lifecycle = summary.get("modelLifecycle", {}) + if lifecycle.get("status", "").upper() != "ACTIVE": + continue + if not summary.get("responseStreamingSupported", False): + continue + output_mods = summary.get("outputModalities", []) + if "TEXT" not in output_mods: + continue + + models.append({ + "id": model_id, + "name": (summary.get("modelName") or model_id).strip(), + "provider": (summary.get("providerName") or "").strip(), + "input_modalities": summary.get("inputModalities", []), + "output_modalities": output_mods, + "streaming": True, + }) + seen_ids.add(model_id.lower()) + except Exception as e: + logger.warning("Failed to list Bedrock foundation models: %s", e) + + # 2. Discover inference profiles (cross-region, better capacity) + try: + profiles = [] + next_token = None + while True: + kwargs = {} + if next_token: + kwargs["nextToken"] = next_token + response = client.list_inference_profiles(**kwargs) + for profile in response.get("inferenceProfileSummaries", []): + profiles.append(profile) + next_token = response.get("nextToken") + if not next_token: + break + + for profile in profiles: + profile_id = (profile.get("inferenceProfileId") or "").strip() + if not profile_id: + continue + if profile.get("status") != "ACTIVE": + continue + if profile_id.lower() in seen_ids: + continue + + # Apply provider filter to underlying models + if filter_set: + profile_models = profile.get("models", []) + matches = any( + _extract_provider_from_arn(m.get("modelArn", "")).lower() in filter_set + for m in profile_models + ) + if not matches: + continue + + models.append({ + "id": profile_id, + "name": (profile.get("inferenceProfileName") or profile_id).strip(), + "provider": "inference-profile", + "input_modalities": ["TEXT"], + "output_modalities": ["TEXT"], + "streaming": True, + }) + seen_ids.add(profile_id.lower()) + except Exception as e: + logger.debug("Skipping inference profile discovery: %s", e) + + # Sort: global cross-region profiles first (recommended), then alphabetical + models.sort(key=lambda m: ( + 0 if m["id"].startswith("global.") else 1, + m["name"].lower(), + )) + + _discovery_cache[cache_key] = { + "timestamp": time.time(), + "models": models, + } + return models + + +def _extract_provider_from_arn(arn: str) -> str: + """Extract the model provider from a Bedrock model ARN. + + Example: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-v2" + → "anthropic" + """ + match = re.search(r"foundation-model/([^.]+)", arn) + return match.group(1) if match else "" + + +def get_bedrock_model_ids(region: str) -> List[str]: + """Return a flat list of available Bedrock model IDs for the given region. + + Convenience wrapper around ``discover_bedrock_models()`` for use in + the model selection UI. + """ + models = discover_bedrock_models(region) + return [m["id"] for m in models] + + +# --------------------------------------------------------------------------- +# Error classification — Bedrock-specific exceptions +# --------------------------------------------------------------------------- +# Mirrors OpenClaw's classifyFailoverReason() and matchesContextOverflowError() +# in extensions/amazon-bedrock/register.sync.runtime.ts. + +# Patterns that indicate the input context exceeded the model's token limit. +# Used by run_agent.py to trigger context compression instead of retrying. +CONTEXT_OVERFLOW_PATTERNS = [ + re.compile(r"ValidationException.*(?:input is too long|max input token|input token.*exceed)", re.IGNORECASE), + re.compile(r"ValidationException.*(?:exceeds? the (?:maximum|max) (?:number of )?(?:input )?tokens)", re.IGNORECASE), + re.compile(r"ModelStreamErrorException.*(?:Input is too long|too many input tokens)", re.IGNORECASE), +] + +# Patterns for throttling / rate limit errors — should trigger backoff + retry. +THROTTLE_PATTERNS = [ + re.compile(r"ThrottlingException", re.IGNORECASE), + re.compile(r"Too many concurrent requests", re.IGNORECASE), + re.compile(r"ServiceQuotaExceededException", re.IGNORECASE), +] + +# Patterns for transient overload — model is temporarily unavailable. +OVERLOAD_PATTERNS = [ + re.compile(r"ModelNotReadyException", re.IGNORECASE), + re.compile(r"ModelTimeoutException", re.IGNORECASE), + re.compile(r"InternalServerException", re.IGNORECASE), +] + + +def is_context_overflow_error(error_message: str) -> bool: + """Return True if the error indicates the input context was too large. + + When this returns True, the agent should compress context and retry + rather than treating it as a fatal error. + """ + return any(p.search(error_message) for p in CONTEXT_OVERFLOW_PATTERNS) + + +def classify_bedrock_error(error_message: str) -> str: + """Classify a Bedrock error for retry/failover decisions. + + Returns: + - ``"context_overflow"`` — input too long, compress and retry + - ``"rate_limit"`` — throttled, backoff and retry + - ``"overloaded"`` — model temporarily unavailable, retry with delay + - ``"unknown"`` — unclassified error + """ + if is_context_overflow_error(error_message): + return "context_overflow" + if any(p.search(error_message) for p in THROTTLE_PATTERNS): + return "rate_limit" + if any(p.search(error_message) for p in OVERLOAD_PATTERNS): + return "overloaded" + return "unknown" + + +# --------------------------------------------------------------------------- +# Bedrock model context lengths +# --------------------------------------------------------------------------- +# Static fallback table for models where the Bedrock API doesn't expose +# context window sizes. Used by agent/model_metadata.py when dynamic +# detection is unavailable. + +BEDROCK_CONTEXT_LENGTHS: Dict[str, int] = { + # Anthropic Claude models on Bedrock + "anthropic.claude-opus-4-6": 200_000, + "anthropic.claude-sonnet-4-6": 200_000, + "anthropic.claude-sonnet-4-5": 200_000, + "anthropic.claude-haiku-4-5": 200_000, + "anthropic.claude-opus-4": 200_000, + "anthropic.claude-sonnet-4": 200_000, + "anthropic.claude-3-5-sonnet": 200_000, + "anthropic.claude-3-5-haiku": 200_000, + "anthropic.claude-3-opus": 200_000, + "anthropic.claude-3-sonnet": 200_000, + "anthropic.claude-3-haiku": 200_000, + # Amazon Nova + "amazon.nova-pro": 300_000, + "amazon.nova-lite": 300_000, + "amazon.nova-micro": 128_000, + # Meta Llama + "meta.llama4-maverick": 128_000, + "meta.llama4-scout": 128_000, + "meta.llama3-3-70b-instruct": 128_000, + # Mistral + "mistral.mistral-large": 128_000, + # DeepSeek + "deepseek.v3": 128_000, +} + +# Default for unknown Bedrock models +BEDROCK_DEFAULT_CONTEXT_LENGTH = 128_000 + + +def get_bedrock_context_length(model_id: str) -> int: + """Look up the context window size for a Bedrock model. + + Uses substring matching so versioned IDs like + ``anthropic.claude-sonnet-4-6-20250514-v1:0`` resolve correctly. + """ + model_lower = model_id.lower() + best_key = "" + best_val = BEDROCK_DEFAULT_CONTEXT_LENGTH + for key, val in BEDROCK_CONTEXT_LENGTHS.items(): + if key in model_lower and len(key) > len(best_key): + best_key = key + best_val = val + return best_val diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 4163966aaa..a681b0c6bc 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -17,7 +17,10 @@ Improvements over v2: - Richer tool call/result detail in summarizer input """ +import hashlib +import json import logging +import re import time from typing import Any, Dict, List, Optional @@ -36,7 +39,10 @@ SUMMARY_PREFIX = ( "into the summary below. This is a handoff from a previous context " "window — treat it as background reference, NOT as active instructions. " "Do NOT answer questions or fulfill requests mentioned in this summary; " - "they were already addressed. Respond ONLY to the latest user message " + "they were already addressed. " + "Your current task is identified in the '## Active Task' section of the " + "summary — resume exactly from there. " + "Respond ONLY to the latest user message " "that appears AFTER this summary. The current session state (files, " "config, etc.) may reflect work described here — avoid repeating it:" ) @@ -57,6 +63,174 @@ _CHARS_PER_TOKEN = 4 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600 +def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str: + """Shrink long string values inside a tool-call arguments JSON blob while + preserving JSON validity. + + The ``function.arguments`` field on a tool call is a JSON-encoded string + passed through to the LLM provider; downstream providers strictly + validate it and return a non-retryable 400 when it is not well-formed. + An earlier implementation sliced the raw JSON at a fixed byte offset and + appended ``...[truncated]`` — which routinely produced strings like:: + + {"path": "/foo/bar", "content": "# long markdown + ...[truncated] + + i.e. an unterminated string and a missing closing brace. MiniMax, for + example, rejects this with ``invalid function arguments json string`` + and the session gets stuck re-sending the same broken history on every + turn. See issue #11762 for the observed loop. + + This helper parses the arguments, shrinks long string leaves inside the + parsed structure, and re-serialises. Non-string values (paths, ints, + booleans) are preserved intact. If the arguments are not valid JSON + to begin with — some model backends use non-JSON tool arguments — the + original string is returned unchanged rather than replaced with + something neither we nor the backend can parse. + """ + try: + parsed = json.loads(args) + except (ValueError, TypeError): + return args + + def _shrink(obj: Any) -> Any: + if isinstance(obj, str): + if len(obj) > head_chars: + return obj[:head_chars] + "...[truncated]" + return obj + if isinstance(obj, dict): + return {k: _shrink(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_shrink(v) for v in obj] + return obj + + shrunken = _shrink(parsed) + # ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX + return json.dumps(shrunken, ensure_ascii=False) + + +def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str: + """Create an informative 1-line summary of a tool call + result. + + Used during the pre-compression pruning pass to replace large tool + outputs with a short but useful description of what the tool did, + rather than a generic placeholder that carries zero information. + + Returns strings like:: + + [terminal] ran `npm test` -> exit 0, 47 lines output + [read_file] read config.py from line 1 (1,200 chars) + [search_files] content search for 'compress' in agent/ -> 12 matches + """ + try: + args = json.loads(tool_args) if tool_args else {} + except (json.JSONDecodeError, TypeError): + args = {} + + content = tool_content or "" + content_len = len(content) + line_count = content.count("\n") + 1 if content.strip() else 0 + + if tool_name == "terminal": + cmd = args.get("command", "") + if len(cmd) > 80: + cmd = cmd[:77] + "..." + exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content) + exit_code = exit_match.group(1) if exit_match else "?" + return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output" + + if tool_name == "read_file": + path = args.get("path", "?") + offset = args.get("offset", 1) + return f"[read_file] read {path} from line {offset} ({content_len:,} chars)" + + if tool_name == "write_file": + path = args.get("path", "?") + written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?" + return f"[write_file] wrote to {path} ({written_lines} lines)" + + if tool_name == "search_files": + pattern = args.get("pattern", "?") + path = args.get("path", ".") + target = args.get("target", "content") + match_count = re.search(r'"total_count"\s*:\s*(\d+)', content) + count = match_count.group(1) if match_count else "?" + return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches" + + if tool_name == "patch": + path = args.get("path", "?") + mode = args.get("mode", "replace") + return f"[patch] {mode} in {path} ({content_len:,} chars result)" + + if tool_name in ("browser_navigate", "browser_click", "browser_snapshot", + "browser_type", "browser_scroll", "browser_vision"): + url = args.get("url", "") + ref = args.get("ref", "") + detail = f" {url}" if url else (f" ref={ref}" if ref else "") + return f"[{tool_name}]{detail} ({content_len:,} chars)" + + if tool_name == "web_search": + query = args.get("query", "?") + return f"[web_search] query='{query}' ({content_len:,} chars result)" + + if tool_name == "web_extract": + urls = args.get("urls", []) + url_desc = urls[0] if isinstance(urls, list) and urls else "?" + if isinstance(urls, list) and len(urls) > 1: + url_desc += f" (+{len(urls) - 1} more)" + return f"[web_extract] {url_desc} ({content_len:,} chars)" + + if tool_name == "delegate_task": + goal = args.get("goal", "") + if len(goal) > 60: + goal = goal[:57] + "..." + return f"[delegate_task] '{goal}' ({content_len:,} chars result)" + + if tool_name == "execute_code": + code_preview = (args.get("code") or "")[:60].replace("\n", " ") + if len(args.get("code", "")) > 60: + code_preview += "..." + return f"[execute_code] `{code_preview}` ({line_count} lines output)" + + if tool_name in ("skill_view", "skills_list", "skill_manage"): + name = args.get("name", "?") + return f"[{tool_name}] name={name} ({content_len:,} chars)" + + if tool_name == "vision_analyze": + question = args.get("question", "")[:50] + return f"[vision_analyze] '{question}' ({content_len:,} chars)" + + if tool_name == "memory": + action = args.get("action", "?") + target = args.get("target", "?") + return f"[memory] {action} on {target}" + + if tool_name == "todo": + return "[todo] updated task list" + + if tool_name == "clarify": + return "[clarify] asked user a question" + + if tool_name == "text_to_speech": + return f"[text_to_speech] generated audio ({content_len:,} chars)" + + if tool_name == "cronjob": + action = args.get("action", "?") + return f"[cronjob] {action}" + + if tool_name == "process": + action = args.get("action", "?") + sid = args.get("session_id", "?") + return f"[process] {action} session={sid}" + + # Generic fallback + first_arg = "" + for k, v in list(args.items())[:2]: + sv = str(v)[:40] + first_arg += f" {k}={sv}" + return f"[{tool_name}]{first_arg} ({content_len:,} chars result)" + + class ContextCompressor(ContextEngine): """Default context engine — compresses conversation context via lossy summarization. @@ -78,6 +252,8 @@ class ContextCompressor(ContextEngine): self._context_probed = False self._context_probe_persistable = False self._previous_summary = None + self._last_compression_savings_pct = 100.0 + self._ineffective_compression_count = 0 def update_model( self, @@ -167,6 +343,9 @@ class ContextCompressor(ContextEngine): # Stores the previous compaction summary for iterative updates self._previous_summary: Optional[str] = None + # Anti-thrashing: track whether last compression was effective + self._last_compression_savings_pct: float = 100.0 + self._ineffective_compression_count: int = 0 self._summary_failure_cooldown_until: float = 0.0 def update_from_response(self, usage: Dict[str, Any]): @@ -175,9 +354,26 @@ class ContextCompressor(ContextEngine): self.last_completion_tokens = usage.get("completion_tokens", 0) def should_compress(self, prompt_tokens: int = None) -> bool: - """Check if context exceeds the compression threshold.""" + """Check if context exceeds the compression threshold. + + Includes anti-thrashing protection: if the last two compressions + each saved less than 10%, skip compression to avoid infinite loops + where each pass removes only 1-2 messages. + """ tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens - return tokens >= self.threshold_tokens + if tokens < self.threshold_tokens: + return False + # Anti-thrashing: back off if recent compressions were ineffective + if self._ineffective_compression_count >= 2: + if not self.quiet_mode: + logger.warning( + "Compression skipped — last %d compressions saved <10%% each. " + "Consider /new to start a fresh session, or /compress " + "for focused compression.", + self._ineffective_compression_count, + ) + return False + return True # ------------------------------------------------------------------ # Tool output pruning (cheap pre-pass, no LLM call) @@ -187,7 +383,16 @@ class ContextCompressor(ContextEngine): self, messages: List[Dict[str, Any]], protect_tail_count: int, protect_tail_tokens: int | None = None, ) -> tuple[List[Dict[str, Any]], int]: - """Replace old tool result contents with a short placeholder. + """Replace old tool result contents with informative 1-line summaries. + + Instead of a generic placeholder, generates a summary like:: + + [terminal] ran `npm test` -> exit 0, 47 lines output + [read_file] read config.py from line 1 (3,400 chars) + + Also deduplicates identical tool results (e.g. reading the same file + 5x keeps only the newest full copy) and truncates large tool_call + arguments in assistant messages outside the protected tail. Walks backward from the end, protecting the most recent messages that fall within ``protect_tail_tokens`` (when provided) OR the last @@ -203,6 +408,22 @@ class ContextCompressor(ContextEngine): result = [m.copy() for m in messages] pruned = 0 + # Build index: tool_call_id -> (tool_name, arguments_json) + call_id_to_tool: Dict[str, tuple] = {} + for msg in result: + if msg.get("role") == "assistant": + for tc in msg.get("tool_calls") or []: + if isinstance(tc, dict): + cid = tc.get("id", "") + fn = tc.get("function", {}) + call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", "")) + else: + cid = getattr(tc, "id", "") or "" + fn = getattr(tc, "function", None) + name = getattr(fn, "name", "unknown") if fn else "unknown" + args_str = getattr(fn, "arguments", "") if fn else "" + call_id_to_tool[cid] = (name, args_str) + # Determine the prune boundary if protect_tail_tokens is not None and protect_tail_tokens > 0: # Token-budget approach: walk backward accumulating tokens @@ -211,7 +432,8 @@ class ContextCompressor(ContextEngine): min_protect = min(protect_tail_count, len(result) - 1) for i in range(len(result) - 1, -1, -1): msg = result[i] - content_len = len(msg.get("content") or "") + raw_content = msg.get("content") or "" + content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content) msg_tokens = content_len // _CHARS_PER_TOKEN + 10 for tc in msg.get("tool_calls") or []: if isinstance(tc, dict): @@ -226,18 +448,76 @@ class ContextCompressor(ContextEngine): else: prune_boundary = len(result) - protect_tail_count + # Pass 1: Deduplicate identical tool results. + # When the same file is read multiple times, keep only the most recent + # full copy and replace older duplicates with a back-reference. + content_hashes: dict = {} # hash -> (index, tool_call_id) + for i in range(len(result) - 1, -1, -1): + msg = result[i] + if msg.get("role") != "tool": + continue + content = msg.get("content") or "" + # Skip multimodal content (list of content blocks) + if isinstance(content, list): + continue + if len(content) < 200: + continue + h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12] + if h in content_hashes: + # This is an older duplicate — replace with back-reference + result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"} + pruned += 1 + else: + content_hashes[h] = (i, msg.get("tool_call_id", "?")) + + # Pass 2: Replace old tool results with informative summaries for i in range(prune_boundary): msg = result[i] if msg.get("role") != "tool": continue content = msg.get("content", "") + # Skip multimodal content (list of content blocks) + if isinstance(content, list): + continue if not content or content == _PRUNED_TOOL_PLACEHOLDER: continue + # Skip already-deduplicated or previously-summarized results + if content.startswith("[Duplicate tool output"): + continue # Only prune if the content is substantial (>200 chars) if len(content) > 200: - result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER} + call_id = msg.get("tool_call_id", "") + tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", "")) + summary = _summarize_tool_result(tool_name, tool_args, content) + result[i] = {**msg, "content": summary} pruned += 1 + # Pass 3: Truncate large tool_call arguments in assistant messages + # outside the protected tail. write_file with 50KB content, for + # example, survives pruning entirely without this. + # + # The shrinking is done inside the parsed JSON structure so the + # result remains valid JSON — otherwise downstream providers 400 + # on every subsequent turn until the broken call falls out of + # the window. See ``_truncate_tool_call_args_json`` docstring. + for i in range(prune_boundary): + msg = result[i] + if msg.get("role") != "assistant" or not msg.get("tool_calls"): + continue + new_tcs = [] + modified = False + for tc in msg["tool_calls"]: + if isinstance(tc, dict): + args = tc.get("function", {}).get("arguments", "") + if len(args) > 500: + new_args = _truncate_tool_call_args_json(args) + if new_args != args: + tc = {**tc, "function": {**tc["function"], "arguments": new_args}} + modified = True + new_tcs.append(tc) + if modified: + result[i] = {**msg, "tool_calls": new_tcs} + return result, pruned # ------------------------------------------------------------------ @@ -353,33 +633,51 @@ class ContextCompressor(ContextEngine): "assistant that continues the conversation. " "Do NOT respond to any questions or requests in the conversation — " "only output the structured summary. " - "Do NOT include any preamble, greeting, or prefix." + "Do NOT include any preamble, greeting, or prefix. " + "Write the summary in the same language the user was using in the " + "conversation — do not translate or switch to English." ) # Shared structured template (used by both paths). - # Key changes vs v1: - # - "Pending User Asks" section (from Claude Code) explicitly tracks - # unanswered questions so the model knows what's resolved vs open - # - "Remaining Work" replaces "Next Steps" to avoid reading as active - # instructions - # - "Resolved Questions" makes it clear which questions were already - # answered (prevents model from re-answering them) - _template_sections = f"""## Goal -[What the user is trying to accomplish] + _template_sections = f"""## Active Task +[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or +task assignment verbatim — the exact words they used. If multiple tasks +were requested and only some are done, list only the ones NOT yet completed. +The next assistant must pick up exactly here. Example: +"User asked: 'Now refactor the auth module to use JWT instead of sessions'" +If no outstanding task exists, write "None."] + +## Goal +[What the user is trying to accomplish overall] ## Constraints & Preferences [User preferences, coding style, constraints, important decisions] -## Progress -### Done -[Completed work — include specific file paths, commands run, results obtained] -### In Progress -[Work currently underway] -### Blocked -[Any blockers or issues encountered] +## Completed Actions +[Numbered list of concrete actions taken — include tool used, target, and outcome. +Format each as: N. ACTION target — outcome [tool: name] +Example: +1. READ config.py:45 — found `==` should be `!=` [tool: read_file] +2. PATCH config.py:45 — changed `==` to `!=` [tool: patch] +3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal] +Be specific with file paths, commands, line numbers, and results.] + +## Active State +[Current working state — include: +- Working directory and branch (if applicable) +- Modified/created files with brief note on each +- Test status (X/Y passing) +- Any running processes or servers +- Environment details that matter] + +## In Progress +[Work currently underway — what was being done when compaction fired] + +## Blocked +[Any blockers, errors, or issues not yet resolved. Include exact error messages.] ## Key Decisions -[Important technical decisions and why they were made] +[Important technical decisions and WHY they were made] ## Resolved Questions [Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them] @@ -396,10 +694,7 @@ class ContextCompressor(ContextEngine): ## Critical Context [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation] -## Tools & Patterns -[Which tools were used, how they were used effectively, and any tool-specific discoveries] - -Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. +Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed. Write only the summary body. Do not include any preamble or prefix.""" @@ -415,7 +710,7 @@ PREVIOUS SUMMARY: NEW TURNS TO INCORPORATE: {content_to_summarize} -Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete. +Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity. {_template_sections}""" else: @@ -450,7 +745,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio "api_mode": self.api_mode, }, "messages": [{"role": "user", "content": prompt}], - "max_tokens": summary_budget * 2, + "max_tokens": int(summary_budget * 1.3), # timeout resolved from auxiliary.compression.timeout config by call_llm } if self.summary_model: @@ -464,8 +759,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Store for iterative updates on next compaction self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 + self._summary_model_fallen_back = False return self._with_summary_prefix(summary) except RuntimeError: + # No provider configured — long cooldown, unlikely to self-resolve self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS logging.warning("Context compression: no provider available for " "summary. Middle turns will be dropped without summary " @@ -473,12 +770,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio _SUMMARY_FAILURE_COOLDOWN_SECONDS) return None except Exception as e: - self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS + # If the summary model is different from the main model and the + # error looks permanent (model not found, 503, 404), fall back to + # using the main model instead of entering cooldown that leaves + # context growing unbounded. (#8620 sub-issue 4) + _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None) + _err_str = str(e).lower() + _is_model_not_found = ( + _status in (404, 503) + or "model_not_found" in _err_str + or "does not exist" in _err_str + or "no available channel" in _err_str + ) + if ( + _is_model_not_found + and self.summary_model + and self.summary_model != self.model + and not getattr(self, "_summary_model_fallen_back", False) + ): + self._summary_model_fallen_back = True + logging.warning( + "Summary model '%s' not available (%s). " + "Falling back to main model '%s' for compression.", + self.summary_model, e, self.model, + ) + self.summary_model = "" # empty = use main model + self._summary_failure_cooldown_until = 0.0 # no cooldown + return self._generate_summary(messages, summary_budget) # retry immediately + + # Transient errors (timeout, rate limit, network) — shorter cooldown + _transient_cooldown = 60 + self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown logging.warning( "Failed to generate context summary: %s. " "Further summary attempts paused for %d seconds.", e, - _SUMMARY_FAILURE_COOLDOWN_SECONDS, + _transient_cooldown, ) return None @@ -601,6 +928,62 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Tail protection by token budget # ------------------------------------------------------------------ + def _find_last_user_message_idx( + self, messages: List[Dict[str, Any]], head_end: int + ) -> int: + """Return the index of the last user-role message at or after *head_end*, or -1.""" + for i in range(len(messages) - 1, head_end - 1, -1): + if messages[i].get("role") == "user": + return i + return -1 + + def _ensure_last_user_message_in_tail( + self, + messages: List[Dict[str, Any]], + cut_idx: int, + head_end: int, + ) -> int: + """Guarantee the most recent user message is in the protected tail. + + Context compressor bug (#10896): ``_align_boundary_backward`` can pull + ``cut_idx`` past a user message when it tries to keep tool_call/result + groups together. If the last user message ends up in the *compressed* + middle region the LLM summariser writes it into "Pending User Asks", + but ``SUMMARY_PREFIX`` tells the next model to respond only to user + messages *after* the summary — so the task effectively disappears from + the active context, causing the agent to stall, repeat completed work, + or silently drop the user's latest request. + + Fix: if the last user-role message is not already in the tail + (``messages[cut_idx:]``), walk ``cut_idx`` back to include it. We + then re-align backward one more time to avoid splitting any + tool_call/result group that immediately precedes the user message. + """ + last_user_idx = self._find_last_user_message_idx(messages, head_end) + if last_user_idx < 0: + # No user message found beyond head — nothing to anchor. + return cut_idx + + if last_user_idx >= cut_idx: + # Already in the tail; nothing to do. + return cut_idx + + # The last user message is in the middle (compressed) region. + # Pull cut_idx back to it directly — a user message is already a + # clean boundary (no tool_call/result splitting risk), so there is no + # need to call _align_boundary_backward here; doing so would + # unnecessarily pull the cut further back into the preceding + # assistant + tool_calls group. + if not self.quiet_mode: + logger.debug( + "Anchoring tail cut to last user message at index %d " + "(was %d) to prevent active-task loss after compression", + last_user_idx, + cut_idx, + ) + # Safety: never go back into the head region. + return max(last_user_idx, head_end + 1) + def _find_tail_cut_by_tokens( self, messages: List[Dict[str, Any]], head_end: int, token_budget: int | None = None, @@ -618,7 +1001,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio read, etc.). If even the minimum 3 messages exceed 1.5x the budget the cut is placed right after the head so compression still runs. - Never cuts inside a tool_call/result group. + Never cuts inside a tool_call/result group. Always ensures the most + recent user message is in the tail (see ``_ensure_last_user_message_in_tail``). """ if token_budget is None: token_budget = self.tail_token_budget @@ -657,6 +1041,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Align to avoid splitting tool groups cut_idx = self._align_boundary_backward(messages, cut_idx) + # Ensure the most recent user message is always in the tail so the + # active task is never lost to compression (fixes #10896). + cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end) + return max(cut_idx, head_end + 1) # ------------------------------------------------------------------ @@ -744,11 +1132,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio compressed = [] for i in range(compress_start): msg = messages[i].copy() - if i == 0 and msg.get("role") == "system" and self.compression_count == 0: - msg["content"] = ( - (msg.get("content") or "") - + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]" - ) + if i == 0 and msg.get("role") == "system": + existing = msg.get("content") or "" + _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]" + if _compression_note not in existing: + msg["content"] = existing + "\n\n" + _compression_note compressed.append(msg) # If LLM summary failed, insert a static fallback so the model @@ -806,14 +1194,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio compressed = self._sanitize_tool_pairs(compressed) + new_estimate = estimate_messages_tokens_rough(compressed) + saved_estimate = display_tokens - new_estimate + + # Anti-thrashing: track compression effectiveness + savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0 + self._last_compression_savings_pct = savings_pct + if savings_pct < 10: + self._ineffective_compression_count += 1 + else: + self._ineffective_compression_count = 0 + if not self.quiet_mode: - new_estimate = estimate_messages_tokens_rough(compressed) - saved_estimate = display_tokens - new_estimate logger.info( - "Compressed: %d -> %d messages (~%d tokens saved)", + "Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)", n_messages, len(compressed), saved_estimate, + savings_pct, ) logger.info("Compression #%d complete", self.compression_count) diff --git a/agent/context_references.py b/agent/context_references.py index 7ecb90c497..50a33a1d75 100644 --- a/agent/context_references.py +++ b/agent/context_references.py @@ -483,9 +483,7 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None: text=True, timeout=10, ) - except FileNotFoundError: - return None - except subprocess.TimeoutExpired: + except (FileNotFoundError, OSError, subprocess.TimeoutExpired): return None if result.returncode != 0: return None diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 235fd9a1a5..031c58d705 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -313,9 +313,25 @@ class CopilotACPClient: tools=tools, tool_choice=tool_choice, ) + # Normalise timeout: run_agent.py may pass an httpx.Timeout object + # (used natively by the OpenAI SDK) rather than a plain float. + if timeout is None: + _effective_timeout = _DEFAULT_TIMEOUT_SECONDS + elif isinstance(timeout, (int, float)): + _effective_timeout = float(timeout) + else: + # httpx.Timeout or similar — pick the largest component so the + # subprocess has enough wall-clock time for the full response. + _candidates = [ + getattr(timeout, attr, None) + for attr in ("read", "write", "connect", "pool", "timeout") + ] + _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))] + _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS + response_text, reasoning_text = self._run_prompt( prompt_text, - timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS), + timeout_seconds=_effective_timeout, ) tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 8a2fecf5d6..b02514e990 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -22,8 +22,6 @@ from hermes_cli.auth import ( _auth_store_lock, _codex_access_token_is_expiring, _decode_jwt_claims, - _import_codex_cli_tokens, - _write_codex_cli_tokens, _load_auth_store, _load_provider_state, _resolve_kimi_base_url, @@ -457,39 +455,6 @@ class CredentialPool: logger.debug("Failed to sync from credentials file: %s", exc) return entry - def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential: - """Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ. - - OpenAI OAuth refresh tokens are single-use and rotate on every refresh. - When the Codex CLI (or another Hermes profile) refreshes its token, - the pool entry's refresh_token becomes stale. This method detects that - by comparing against ~/.codex/auth.json and syncing the fresh pair. - """ - if self.provider != "openai-codex": - return entry - try: - cli_tokens = _import_codex_cli_tokens() - if not cli_tokens: - return entry - cli_refresh = cli_tokens.get("refresh_token", "") - cli_access = cli_tokens.get("access_token", "") - if cli_refresh and cli_refresh != entry.refresh_token: - logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id) - updated = replace( - entry, - access_token=cli_access, - refresh_token=cli_refresh, - last_status=None, - last_status_at=None, - last_error_code=None, - ) - self._replace_entry(entry, updated) - self._persist() - return updated - except Exception as exc: - logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc) - return entry - def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None: """Write refreshed pool entry tokens back to auth.json providers. @@ -585,13 +550,6 @@ class CredentialPool: except Exception as wexc: logger.debug("Failed to write refreshed token to credentials file: %s", wexc) elif self.provider == "openai-codex": - # Proactively sync from ~/.codex/auth.json before refresh. - # The Codex CLI (or another Hermes profile) may have already - # consumed our refresh_token. Syncing first avoids a - # "refresh_token_reused" error when the CLI has a newer pair. - synced = self._sync_codex_entry_from_cli(entry) - if synced is not entry: - entry = synced refreshed = auth_mod.refresh_codex_oauth_pure( entry.access_token, entry.refresh_token, @@ -677,45 +635,6 @@ class CredentialPool: # Credentials file had a valid (non-expired) token — use it directly logger.debug("Credentials file has valid token, using without refresh") return synced - # For openai-codex: the refresh_token may have been consumed by - # the Codex CLI between our proactive sync and the refresh call. - # Re-sync and retry once. - if self.provider == "openai-codex": - synced = self._sync_codex_entry_from_cli(entry) - if synced.refresh_token != entry.refresh_token: - logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json") - try: - refreshed = auth_mod.refresh_codex_oauth_pure( - synced.access_token, - synced.refresh_token, - ) - updated = replace( - synced, - access_token=refreshed["access_token"], - refresh_token=refreshed["refresh_token"], - last_refresh=refreshed.get("last_refresh"), - last_status=STATUS_OK, - last_status_at=None, - last_error_code=None, - ) - self._replace_entry(synced, updated) - self._persist() - self._sync_device_code_entry_to_auth_store(updated) - try: - _write_codex_cli_tokens( - updated.access_token, - updated.refresh_token, - last_refresh=updated.last_refresh, - ) - except Exception as wexc: - logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc) - return updated - except Exception as retry_exc: - logger.debug("Codex retry refresh also failed: %s", retry_exc) - elif not self._entry_needs_refresh(synced): - logger.debug("Codex CLI has valid token, using without refresh") - self._sync_device_code_entry_to_auth_store(synced) - return synced self._mark_exhausted(entry, None) return None @@ -734,17 +653,6 @@ class CredentialPool: # _seed_from_singletons() on the next load_pool() sees fresh state # instead of re-seeding stale/consumed tokens. self._sync_device_code_entry_to_auth_store(updated) - # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI - # and VS Code don't hit "refresh_token_reused" on their next refresh. - if self.provider == "openai-codex": - try: - _write_codex_cli_tokens( - updated.access_token, - updated.refresh_token, - last_refresh=updated.last_refresh, - ) - except Exception as wexc: - logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc) return updated def _entry_needs_refresh(self, entry: PooledCredential) -> bool: @@ -790,16 +698,6 @@ class CredentialPool: if synced is not entry: entry = synced cleared_any = True - # For openai-codex entries, sync from ~/.codex/auth.json before - # any status/refresh checks. This picks up tokens refreshed by - # the Codex CLI or another Hermes profile. - if (self.provider == "openai-codex" - and entry.last_status == STATUS_EXHAUSTED - and entry.refresh_token): - synced = self._sync_codex_entry_from_cli(entry) - if synced is not entry: - entry = synced - cleared_any = True if entry.last_status == STATUS_EXHAUSTED: exhausted_until = _exhausted_until(entry) if exhausted_until is not None and now < exhausted_until: @@ -1130,6 +1028,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup state = _load_provider_state(auth_store, "nous") if state: active_sources.add("device_code") + # Prefer a user-supplied label embedded in the singleton state + # (set by persist_nous_credentials(label=...) when the user ran + # `hermes auth add nous --label `). Fall back to the + # auto-derived token fingerprint for logins that didn't supply one. + custom_label = str(state.get("label") or "").strip() + seeded_label = custom_label or label_from_token( + state.get("access_token", ""), "device_code" + ) changed |= _upsert_entry( entries, provider, @@ -1148,7 +1054,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup "agent_key": state.get("agent_key"), "agent_key_expires_at": state.get("agent_key_expires_at"), "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None, - "label": label_from_token(state.get("access_token", ""), "device_code"), + "label": seeded_label, }, ) @@ -1162,6 +1068,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup if token: source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" active_sources.add(source_name) + pconfig = PROVIDER_REGISTRY.get(provider) changed |= _upsert_entry( entries, provider, @@ -1170,6 +1077,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup "source": source_name, "auth_type": AUTH_TYPE_API_KEY, "access_token": token, + "base_url": pconfig.inference_base_url if pconfig else "", "label": source, }, ) @@ -1206,25 +1114,27 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup logger.debug("Qwen OAuth token seed failed: %s", exc) elif provider == "openai-codex": + # Respect user suppression — `hermes auth remove openai-codex` marks + # the device_code source as suppressed so it won't be re-seeded from + # the Hermes auth store. Without this gate the removal is instantly + # undone on the next load_pool() call. + codex_suppressed = False + try: + from hermes_cli.auth import is_source_suppressed + codex_suppressed = is_source_suppressed(provider, "device_code") + except ImportError: + pass + if codex_suppressed: + return changed, active_sources + state = _load_provider_state(auth_store, "openai-codex") tokens = state.get("tokens") if isinstance(state, dict) else None - # Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth - # store has no tokens. This mirrors resolve_codex_runtime_credentials() - # so that load_pool() and list_authenticated_providers() detect tokens - # that only exist in the Codex CLI shared file. - if not (isinstance(tokens, dict) and tokens.get("access_token")): - try: - from hermes_cli.auth import _import_codex_cli_tokens, _save_codex_tokens - cli_tokens = _import_codex_cli_tokens() - if cli_tokens: - logger.info("Importing Codex CLI tokens into Hermes auth store.") - _save_codex_tokens(cli_tokens) - # Re-read state after import - auth_store = _load_auth_store() - state = _load_provider_state(auth_store, "openai-codex") - tokens = state.get("tokens") if isinstance(state, dict) else None - except Exception as exc: - logger.debug("Codex CLI token import failed: %s", exc) + # Hermes owns its own Codex auth state — we do NOT auto-import from + # ~/.codex/auth.json at pool-load time. OAuth refresh tokens are + # single-use, so sharing them with Codex CLI / VS Code causes + # refresh_token_reused race failures. Users who want to adopt + # existing Codex CLI credentials get a one-time, explicit prompt + # via `hermes auth openai-codex`. if isinstance(tokens, dict) and tokens.get("access_token"): active_sources.add("device_code") changed |= _upsert_entry( diff --git a/agent/display.py b/agent/display.py index 063b7bb1c7..474595d76c 100644 --- a/agent/display.py +++ b/agent/display.py @@ -225,9 +225,11 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) - content = _oneline(args.get("content", "")) return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\"" elif action == "replace": - return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\"" + old = _oneline(args.get("old_text") or "") or "" + return f"~{target}: \"{old[:20]}\"" elif action == "remove": - return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\"" + old = _oneline(args.get("old_text") or "") or "" + return f"-{target}: \"{old[:20]}\"" return action if tool_name == "send_message": @@ -600,6 +602,45 @@ class KawaiiSpinner: "analyzing", "computing", "synthesizing", "formulating", "brainstorming", ] + @classmethod + def get_waiting_faces(cls) -> list: + """Return waiting faces from the active skin, falling back to KAWAII_WAITING.""" + try: + skin = _get_skin() + if skin: + faces = skin.spinner.get("waiting_faces", []) + if faces: + return faces + except Exception: + pass + return cls.KAWAII_WAITING + + @classmethod + def get_thinking_faces(cls) -> list: + """Return thinking faces from the active skin, falling back to KAWAII_THINKING.""" + try: + skin = _get_skin() + if skin: + faces = skin.spinner.get("thinking_faces", []) + if faces: + return faces + except Exception: + pass + return cls.KAWAII_THINKING + + @classmethod + def get_thinking_verbs(cls) -> list: + """Return thinking verbs from the active skin, falling back to THINKING_VERBS.""" + try: + skin = _get_skin() + if skin: + verbs = skin.spinner.get("thinking_verbs", []) + if verbs: + return verbs + except Exception: + pass + return cls.THINKING_VERBS + def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None): self.message = message self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots']) @@ -900,9 +941,13 @@ def get_cute_tool_message( if action == "add": return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}") elif action == "replace": - return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}") + old = args.get("old_text") or "" + old = old if old else "" + return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(old, 20)}\" {dur}") elif action == "remove": - return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}") + old = args.get("old_text") or "" + old = old if old else "" + return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(old, 20)}\" {dur}") return _wrap(f"┊ 🧠 memory {action} {dur}") if tool_name == "skills_list": return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}") @@ -954,84 +999,4 @@ def get_cute_tool_message( # Honcho session line (one-liner with clickable OSC 8 hyperlink) # ========================================================================= -_DIM = "\033[2m" -_SKY_BLUE = "\033[38;5;117m" -_ANSI_RESET = "\033[0m" - -# ========================================================================= -# Context pressure display (CLI user-facing warnings) -# ========================================================================= - -# ANSI color codes for context pressure tiers -_CYAN = "\033[36m" -_YELLOW = "\033[33m" -_BOLD = "\033[1m" -_DIM_ANSI = "\033[2m" - -# Bar characters -_BAR_FILLED = "▰" -_BAR_EMPTY = "▱" -_BAR_WIDTH = 20 - - -def format_context_pressure( - compaction_progress: float, - threshold_tokens: int, - threshold_percent: float, - compression_enabled: bool = True, -) -> str: - """Build a formatted context pressure line for CLI display. - - The bar and percentage show progress toward the compaction threshold, - NOT the raw context window. 100% = compaction fires. - - Args: - compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires). - threshold_tokens: Compaction threshold in tokens. - threshold_percent: Compaction threshold as a fraction of context window. - compression_enabled: Whether auto-compression is active. - """ - pct_int = min(int(compaction_progress * 100), 100) - filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) - bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) - - threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens) - threshold_pct_int = int(threshold_percent * 100) - - color = f"{_BOLD}{_YELLOW}" - icon = "⚠" - if compression_enabled: - hint = "compaction approaching" - else: - hint = "no auto-compaction" - - return ( - f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}" - f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}" - ) - - -def format_context_pressure_gateway( - compaction_progress: float, - threshold_percent: float, - compression_enabled: bool = True, -) -> str: - """Build a plain-text context pressure notification for messaging platforms. - - No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc. - The percentage shows progress toward the compaction threshold. - """ - pct_int = min(int(compaction_progress * 100), 100) - filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH) - bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled) - - threshold_pct_int = int(threshold_percent * 100) - - icon = "⚠️" - if compression_enabled: - hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)." - else: - hint = "Auto-compaction is disabled — context may be truncated." - - return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}" diff --git a/agent/error_classifier.py b/agent/error_classifier.py index e436e55710..fcdb8ba676 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [ "please retry after", "resource_exhausted", "rate increased too quickly", # Alibaba/DashScope throttling + # AWS Bedrock throttling + "throttlingexception", + "too many concurrent requests", + "servicequotaexceededexception", ] # Usage-limit patterns that need disambiguation (could be billing OR rate_limit) @@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [ # Chinese error messages (some providers return these) "超过最大长度", "上下文长度", + # AWS Bedrock Converse API error patterns + "input is too long", + "max input token", + "input token", + "exceeds the maximum number of input tokens", ] # Model not found patterns @@ -281,7 +290,7 @@ def classify_api_error( if isinstance(body, dict): _err_obj = body.get("error", {}) if isinstance(_err_obj, dict): - _body_msg = (_err_obj.get("message") or "").lower() + _body_msg = str(_err_obj.get("message") or "").lower() # Parse metadata.raw for wrapped provider errors _metadata = _err_obj.get("metadata", {}) if isinstance(_metadata, dict): @@ -293,11 +302,11 @@ def classify_api_error( if isinstance(_inner, dict): _inner_err = _inner.get("error", {}) if isinstance(_inner_err, dict): - _metadata_msg = (_inner_err.get("message") or "").lower() + _metadata_msg = str(_inner_err.get("message") or "").lower() except (json.JSONDecodeError, TypeError): pass if not _body_msg: - _body_msg = (body.get("message") or "").lower() + _body_msg = str(body.get("message") or "").lower() # Combine all message sources for pattern matching parts = [_raw_msg] if _body_msg and _body_msg not in _raw_msg: @@ -597,10 +606,10 @@ def _classify_400( if isinstance(body, dict): err_obj = body.get("error", {}) if isinstance(err_obj, dict): - err_body_msg = (err_obj.get("message") or "").strip().lower() + err_body_msg = str(err_obj.get("message") or "").strip().lower() # Responses API (and some providers) use flat body: {"message": "..."} if not err_body_msg: - err_body_msg = (body.get("message") or "").strip().lower() + err_body_msg = str(body.get("message") or "").strip().lower() is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "") is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80 diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py new file mode 100644 index 0000000000..b5a8fb9272 --- /dev/null +++ b/agent/gemini_cloudcode_adapter.py @@ -0,0 +1,904 @@ +"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend. + +This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were +a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP +traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent, +streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE. + +Architecture +------------ +- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)`` + mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses. +- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated + to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` / + ``toolConfig`` / ``systemInstruction`` shape. +- The request body is wrapped ``{project, model, user_prompt_id, request}`` + per Code Assist API expectations. +- Responses (``candidates[].content.parts[]``) are converted back to + OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``. +- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks. + +Attribution +----------- +Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public +Gemini API docs. Request envelope shape +(``{project, model, user_prompt_id, request}``) is documented nowhere; it is +reverse-engineered from the opencode-gemini-auth and clawdbot implementations. +""" + +from __future__ import annotations + +import json +import logging +import os +import time +import uuid +from types import SimpleNamespace +from typing import Any, Dict, Iterator, List, Optional + +import httpx + +from agent import google_oauth +from agent.gemini_schema import sanitize_gemini_tool_parameters +from agent.google_code_assist import ( + CODE_ASSIST_ENDPOINT, + FREE_TIER_ID, + CodeAssistError, + ProjectContext, + resolve_project_context, +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Request translation: OpenAI → Gemini +# ============================================================================= + +_ROLE_MAP_OPENAI_TO_GEMINI = { + "user": "user", + "assistant": "model", + "system": "user", # handled separately via systemInstruction + "tool": "user", # functionResponse is wrapped in a user-role turn + "function": "user", +} + + +def _coerce_content_to_text(content: Any) -> str: + """OpenAI content may be str or a list of parts; reduce to plain text.""" + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + pieces: List[str] = [] + for p in content: + if isinstance(p, str): + pieces.append(p) + elif isinstance(p, dict): + if p.get("type") == "text" and isinstance(p.get("text"), str): + pieces.append(p["text"]) + # Multimodal (image_url, etc.) — stub for now; log and skip + elif p.get("type") in ("image_url", "input_audio"): + logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type")) + return "\n".join(pieces) + return str(content) + + +def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]: + """OpenAI tool_call -> Gemini functionCall part.""" + fn = tool_call.get("function") or {} + args_raw = fn.get("arguments", "") + try: + args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {} + except json.JSONDecodeError: + args = {"_raw": args_raw} + if not isinstance(args, dict): + args = {"_value": args} + return { + "functionCall": { + "name": fn.get("name") or "", + "args": args, + }, + # Sentinel signature — matches opencode-gemini-auth's approach. + # Without this, Code Assist rejects function calls that originated + # outside its own chain. + "thoughtSignature": "skip_thought_signature_validator", + } + + +def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]: + """OpenAI tool-role message -> Gemini functionResponse part. + + The function name isn't in the OpenAI tool message directly; it must be + passed via the assistant message that issued the call. For simplicity we + look up ``name`` on the message (OpenAI SDK copies it there) or on the + ``tool_call_id`` cross-reference. + """ + name = str(message.get("name") or message.get("tool_call_id") or "tool") + content = _coerce_content_to_text(message.get("content")) + # Gemini expects the response as a dict under `response`. We wrap plain + # text in {"output": "..."}. + try: + parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None + except json.JSONDecodeError: + parsed = None + response = parsed if isinstance(parsed, dict) else {"output": content} + return { + "functionResponse": { + "name": name, + "response": response, + }, + } + + +def _build_gemini_contents( + messages: List[Dict[str, Any]], +) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]: + """Convert OpenAI messages[] to Gemini contents[] + systemInstruction.""" + system_text_parts: List[str] = [] + contents: List[Dict[str, Any]] = [] + + for msg in messages: + if not isinstance(msg, dict): + continue + role = str(msg.get("role") or "user") + + if role == "system": + system_text_parts.append(_coerce_content_to_text(msg.get("content"))) + continue + + # Tool result message — emit a user-role turn with functionResponse + if role == "tool" or role == "function": + contents.append({ + "role": "user", + "parts": [_translate_tool_result_to_gemini(msg)], + }) + continue + + gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user") + parts: List[Dict[str, Any]] = [] + + text = _coerce_content_to_text(msg.get("content")) + if text: + parts.append({"text": text}) + + # Assistant messages can carry tool_calls + tool_calls = msg.get("tool_calls") or [] + if isinstance(tool_calls, list): + for tc in tool_calls: + if isinstance(tc, dict): + parts.append(_translate_tool_call_to_gemini(tc)) + + if not parts: + # Gemini rejects empty parts; skip the turn entirely + continue + + contents.append({"role": gemini_role, "parts": parts}) + + system_instruction: Optional[Dict[str, Any]] = None + joined_system = "\n".join(p for p in system_text_parts if p).strip() + if joined_system: + system_instruction = { + "role": "system", + "parts": [{"text": joined_system}], + } + + return contents, system_instruction + + +def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]: + """OpenAI tools[] -> Gemini tools[].functionDeclarations[].""" + if not isinstance(tools, list) or not tools: + return [] + declarations: List[Dict[str, Any]] = [] + for t in tools: + if not isinstance(t, dict): + continue + fn = t.get("function") or {} + if not isinstance(fn, dict): + continue + name = fn.get("name") + if not name: + continue + decl = {"name": str(name)} + if fn.get("description"): + decl["description"] = str(fn["description"]) + params = fn.get("parameters") + if isinstance(params, dict): + decl["parameters"] = sanitize_gemini_tool_parameters(params) + declarations.append(decl) + if not declarations: + return [] + return [{"functionDeclarations": declarations}] + + +def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]: + """OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig.""" + if tool_choice is None: + return None + if isinstance(tool_choice, str): + if tool_choice == "auto": + return {"functionCallingConfig": {"mode": "AUTO"}} + if tool_choice == "required": + return {"functionCallingConfig": {"mode": "ANY"}} + if tool_choice == "none": + return {"functionCallingConfig": {"mode": "NONE"}} + if isinstance(tool_choice, dict): + fn = tool_choice.get("function") or {} + name = fn.get("name") + if name: + return { + "functionCallingConfig": { + "mode": "ANY", + "allowedFunctionNames": [str(name)], + }, + } + return None + + +def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]: + """Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case).""" + if not isinstance(config, dict) or not config: + return None + budget = config.get("thinkingBudget", config.get("thinking_budget")) + level = config.get("thinkingLevel", config.get("thinking_level")) + include = config.get("includeThoughts", config.get("include_thoughts")) + normalized: Dict[str, Any] = {} + if isinstance(budget, (int, float)): + normalized["thinkingBudget"] = int(budget) + if isinstance(level, str) and level.strip(): + normalized["thinkingLevel"] = level.strip().lower() + if isinstance(include, bool): + normalized["includeThoughts"] = include + return normalized or None + + +def build_gemini_request( + *, + messages: List[Dict[str, Any]], + tools: Any = None, + tool_choice: Any = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + top_p: Optional[float] = None, + stop: Any = None, + thinking_config: Any = None, +) -> Dict[str, Any]: + """Build the inner Gemini request body (goes inside ``request`` wrapper).""" + contents, system_instruction = _build_gemini_contents(messages) + + body: Dict[str, Any] = {"contents": contents} + if system_instruction is not None: + body["systemInstruction"] = system_instruction + + gemini_tools = _translate_tools_to_gemini(tools) + if gemini_tools: + body["tools"] = gemini_tools + tool_cfg = _translate_tool_choice_to_gemini(tool_choice) + if tool_cfg is not None: + body["toolConfig"] = tool_cfg + + generation_config: Dict[str, Any] = {} + if isinstance(temperature, (int, float)): + generation_config["temperature"] = float(temperature) + if isinstance(max_tokens, int) and max_tokens > 0: + generation_config["maxOutputTokens"] = max_tokens + if isinstance(top_p, (int, float)): + generation_config["topP"] = float(top_p) + if isinstance(stop, str) and stop: + generation_config["stopSequences"] = [stop] + elif isinstance(stop, list) and stop: + generation_config["stopSequences"] = [str(s) for s in stop if s] + normalized_thinking = _normalize_thinking_config(thinking_config) + if normalized_thinking: + generation_config["thinkingConfig"] = normalized_thinking + if generation_config: + body["generationConfig"] = generation_config + + return body + + +def wrap_code_assist_request( + *, + project_id: str, + model: str, + inner_request: Dict[str, Any], + user_prompt_id: Optional[str] = None, +) -> Dict[str, Any]: + """Wrap the inner Gemini request in the Code Assist envelope.""" + return { + "project": project_id, + "model": model, + "user_prompt_id": user_prompt_id or str(uuid.uuid4()), + "request": inner_request, + } + + +# ============================================================================= +# Response translation: Gemini → OpenAI +# ============================================================================= + +def _translate_gemini_response( + resp: Dict[str, Any], + model: str, +) -> SimpleNamespace: + """Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace. + + Code Assist wraps the actual Gemini response inside ``response``, so we + unwrap it first if present. + """ + inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp + + candidates = inner.get("candidates") or [] + if not isinstance(candidates, list) or not candidates: + return _empty_response(model) + + cand = candidates[0] + content_obj = cand.get("content") if isinstance(cand, dict) else {} + parts = content_obj.get("parts") if isinstance(content_obj, dict) else [] + + text_pieces: List[str] = [] + reasoning_pieces: List[str] = [] + tool_calls: List[SimpleNamespace] = [] + + for i, part in enumerate(parts or []): + if not isinstance(part, dict): + continue + # Thought parts are model's internal reasoning — surface as reasoning, + # don't mix into content. + if part.get("thought") is True: + if isinstance(part.get("text"), str): + reasoning_pieces.append(part["text"]) + continue + if isinstance(part.get("text"), str): + text_pieces.append(part["text"]) + continue + fc = part.get("functionCall") + if isinstance(fc, dict) and fc.get("name"): + try: + args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False) + except (TypeError, ValueError): + args_str = "{}" + tool_calls.append(SimpleNamespace( + id=f"call_{uuid.uuid4().hex[:12]}", + type="function", + index=i, + function=SimpleNamespace(name=str(fc["name"]), arguments=args_str), + )) + + finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason( + str(cand.get("finishReason") or "") + ) + + usage_meta = inner.get("usageMetadata") or {} + usage = SimpleNamespace( + prompt_tokens=int(usage_meta.get("promptTokenCount") or 0), + completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0), + total_tokens=int(usage_meta.get("totalTokenCount") or 0), + prompt_tokens_details=SimpleNamespace( + cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0), + ), + ) + + message = SimpleNamespace( + role="assistant", + content="".join(text_pieces) if text_pieces else None, + tool_calls=tool_calls or None, + reasoning="".join(reasoning_pieces) or None, + reasoning_content="".join(reasoning_pieces) or None, + reasoning_details=None, + ) + choice = SimpleNamespace( + index=0, + message=message, + finish_reason=finish_reason, + ) + return SimpleNamespace( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion", + created=int(time.time()), + model=model, + choices=[choice], + usage=usage, + ) + + +def _empty_response(model: str) -> SimpleNamespace: + message = SimpleNamespace( + role="assistant", content="", tool_calls=None, + reasoning=None, reasoning_content=None, reasoning_details=None, + ) + choice = SimpleNamespace(index=0, message=message, finish_reason="stop") + usage = SimpleNamespace( + prompt_tokens=0, completion_tokens=0, total_tokens=0, + prompt_tokens_details=SimpleNamespace(cached_tokens=0), + ) + return SimpleNamespace( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion", + created=int(time.time()), + model=model, + choices=[choice], + usage=usage, + ) + + +def _map_gemini_finish_reason(reason: str) -> str: + mapping = { + "STOP": "stop", + "MAX_TOKENS": "length", + "SAFETY": "content_filter", + "RECITATION": "content_filter", + "OTHER": "stop", + } + return mapping.get(reason.upper(), "stop") + + +# ============================================================================= +# Streaming SSE iterator +# ============================================================================= + +class _GeminiStreamChunk(SimpleNamespace): + """Mimics an OpenAI ChatCompletionChunk with .choices[0].delta.""" + pass + + +def _make_stream_chunk( + *, + model: str, + content: str = "", + tool_call_delta: Optional[Dict[str, Any]] = None, + finish_reason: Optional[str] = None, + reasoning: str = "", +) -> _GeminiStreamChunk: + delta_kwargs: Dict[str, Any] = {"role": "assistant"} + if content: + delta_kwargs["content"] = content + if tool_call_delta is not None: + delta_kwargs["tool_calls"] = [SimpleNamespace( + index=tool_call_delta.get("index", 0), + id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}", + type="function", + function=SimpleNamespace( + name=tool_call_delta.get("name") or "", + arguments=tool_call_delta.get("arguments") or "", + ), + )] + if reasoning: + delta_kwargs["reasoning"] = reasoning + delta_kwargs["reasoning_content"] = reasoning + delta = SimpleNamespace(**delta_kwargs) + choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason) + return _GeminiStreamChunk( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion.chunk", + created=int(time.time()), + model=model, + choices=[choice], + usage=None, + ) + + +def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]: + """Parse Server-Sent Events from an httpx streaming response.""" + buffer = "" + for chunk in response.iter_text(): + if not chunk: + continue + buffer += chunk + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.rstrip("\r") + if not line: + continue + if line.startswith("data: "): + data = line[6:] + if data == "[DONE]": + return + try: + yield json.loads(data) + except json.JSONDecodeError: + logger.debug("Non-JSON SSE line: %s", data[:200]) + + +def _translate_stream_event( + event: Dict[str, Any], + model: str, + tool_call_counter: List[int], +) -> List[_GeminiStreamChunk]: + """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s). + + ``tool_call_counter`` is a single-element list used as a mutable counter + across events in the same stream. Each ``functionCall`` part gets a + fresh, unique OpenAI ``index`` — keying by function name would collide + whenever the model issues parallel calls to the same tool (e.g. reading + three files in one turn). + """ + inner = event.get("response") if isinstance(event.get("response"), dict) else event + candidates = inner.get("candidates") or [] + if not candidates: + return [] + cand = candidates[0] + if not isinstance(cand, dict): + return [] + + chunks: List[_GeminiStreamChunk] = [] + + content = cand.get("content") or {} + parts = content.get("parts") if isinstance(content, dict) else [] + for part in parts or []: + if not isinstance(part, dict): + continue + if part.get("thought") is True and isinstance(part.get("text"), str): + chunks.append(_make_stream_chunk( + model=model, reasoning=part["text"], + )) + continue + if isinstance(part.get("text"), str) and part["text"]: + chunks.append(_make_stream_chunk(model=model, content=part["text"])) + fc = part.get("functionCall") + if isinstance(fc, dict) and fc.get("name"): + name = str(fc["name"]) + idx = tool_call_counter[0] + tool_call_counter[0] += 1 + try: + args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False) + except (TypeError, ValueError): + args_str = "{}" + chunks.append(_make_stream_chunk( + model=model, + tool_call_delta={ + "index": idx, + "name": name, + "arguments": args_str, + }, + )) + + finish_reason_raw = str(cand.get("finishReason") or "") + if finish_reason_raw: + mapped = _map_gemini_finish_reason(finish_reason_raw) + if tool_call_counter[0] > 0: + mapped = "tool_calls" + chunks.append(_make_stream_chunk(model=model, finish_reason=mapped)) + return chunks + + +# ============================================================================= +# GeminiCloudCodeClient — OpenAI-compatible facade +# ============================================================================= + +MARKER_BASE_URL = "cloudcode-pa://google" + + +class _GeminiChatCompletions: + def __init__(self, client: "GeminiCloudCodeClient"): + self._client = client + + def create(self, **kwargs: Any) -> Any: + return self._client._create_chat_completion(**kwargs) + + +class _GeminiChatNamespace: + def __init__(self, client: "GeminiCloudCodeClient"): + self.completions = _GeminiChatCompletions(client) + + +class GeminiCloudCodeClient: + """Minimal OpenAI-SDK-compatible facade over Code Assist v1internal.""" + + def __init__( + self, + *, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + default_headers: Optional[Dict[str, str]] = None, + project_id: str = "", + **_: Any, + ): + # `api_key` here is a dummy — real auth is the OAuth access token + # fetched on every call via agent.google_oauth.get_valid_access_token(). + # We accept the kwarg for openai.OpenAI interface parity. + self.api_key = api_key or "google-oauth" + self.base_url = base_url or MARKER_BASE_URL + self._default_headers = dict(default_headers or {}) + self._configured_project_id = project_id + self._project_context: Optional[ProjectContext] = None + self._project_context_lock = False # simple single-thread guard + self.chat = _GeminiChatNamespace(self) + self.is_closed = False + self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)) + + def close(self) -> None: + self.is_closed = True + try: + self._http.close() + except Exception: + pass + + # Implement the OpenAI SDK's context-manager-ish closure check + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext: + """Lazily resolve and cache the project context for this client.""" + if self._project_context is not None: + return self._project_context + + env_project = google_oauth.resolve_project_id_from_env() + creds = google_oauth.load_credentials() + stored_project = creds.project_id if creds else "" + + # Prefer what's already baked into the creds + if stored_project: + self._project_context = ProjectContext( + project_id=stored_project, + managed_project_id=creds.managed_project_id if creds else "", + tier_id="", + source="stored", + ) + return self._project_context + + ctx = resolve_project_context( + access_token, + configured_project_id=self._configured_project_id, + env_project_id=env_project, + user_agent_model=model, + ) + # Persist discovered project back to the creds file so the next + # session doesn't re-run the discovery. + if ctx.project_id or ctx.managed_project_id: + google_oauth.update_project_ids( + project_id=ctx.project_id, + managed_project_id=ctx.managed_project_id, + ) + self._project_context = ctx + return ctx + + def _create_chat_completion( + self, + *, + model: str = "gemini-2.5-flash", + messages: Optional[List[Dict[str, Any]]] = None, + stream: bool = False, + tools: Any = None, + tool_choice: Any = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + top_p: Optional[float] = None, + stop: Any = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Any = None, + **_: Any, + ) -> Any: + access_token = google_oauth.get_valid_access_token() + ctx = self._ensure_project_context(access_token, model) + + thinking_config = None + if isinstance(extra_body, dict): + thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig") + + inner = build_gemini_request( + messages=messages or [], + tools=tools, + tool_choice=tool_choice, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + stop=stop, + thinking_config=thinking_config, + ) + wrapped = wrap_code_assist_request( + project_id=ctx.project_id, + model=model, + inner_request=inner, + ) + + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": f"Bearer {access_token}", + "User-Agent": "hermes-agent (gemini-cli-compat)", + "X-Goog-Api-Client": "gl-python/hermes", + "x-activity-request-id": str(uuid.uuid4()), + } + headers.update(self._default_headers) + + if stream: + return self._stream_completion(model=model, wrapped=wrapped, headers=headers) + + url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent" + response = self._http.post(url, json=wrapped, headers=headers) + if response.status_code != 200: + raise _gemini_http_error(response) + try: + payload = response.json() + except ValueError as exc: + raise CodeAssistError( + f"Invalid JSON from Code Assist: {exc}", + code="code_assist_invalid_json", + ) from exc + return _translate_gemini_response(payload, model=model) + + def _stream_completion( + self, + *, + model: str, + wrapped: Dict[str, Any], + headers: Dict[str, str], + ) -> Iterator[_GeminiStreamChunk]: + """Generator that yields OpenAI-shaped streaming chunks.""" + url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse" + stream_headers = dict(headers) + stream_headers["Accept"] = "text/event-stream" + + def _generator() -> Iterator[_GeminiStreamChunk]: + try: + with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response: + if response.status_code != 200: + # Materialize error body for better diagnostics + response.read() + raise _gemini_http_error(response) + tool_call_counter: List[int] = [0] + for event in _iter_sse_events(response): + for chunk in _translate_stream_event(event, model, tool_call_counter): + yield chunk + except httpx.HTTPError as exc: + raise CodeAssistError( + f"Streaming request failed: {exc}", + code="code_assist_stream_error", + ) from exc + + return _generator() + + +def _gemini_http_error(response: httpx.Response) -> CodeAssistError: + """Translate an httpx response into a CodeAssistError with rich metadata. + + Parses Google's error envelope (``{"error": {"code", "message", "status", + "details": [...]}}``) so the agent's error classifier can reason about + the failure — ``status_code`` enables the rate_limit / auth classification + paths, and ``response`` lets the main loop honor ``Retry-After`` just + like it does for OpenAI SDK exceptions. + + Also lifts a few recognizable Google conditions into human-readable + messages so the user sees something better than a 500-char JSON dump: + + MODEL_CAPACITY_EXHAUSTED → "Gemini model capacity exhausted for + . This is a Google-side throttle..." + RESOURCE_EXHAUSTED w/o reason → quota-style message + 404 → "Model not found at cloudcode-pa..." + """ + status = response.status_code + + # Parse the body once, surviving any weird encodings. + body_text = "" + body_json: Dict[str, Any] = {} + try: + body_text = response.text + except Exception: + body_text = "" + if body_text: + try: + parsed = json.loads(body_text) + if isinstance(parsed, dict): + body_json = parsed + except (ValueError, TypeError): + body_json = {} + + # Dig into Google's error envelope. Shape is: + # {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED", + # "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED", + # "metadata": {...}}, + # {"@type": ".../RetryInfo", "retryDelay": "30s"}]}} + err_obj = body_json.get("error") if isinstance(body_json, dict) else None + if not isinstance(err_obj, dict): + err_obj = {} + err_status = str(err_obj.get("status") or "").strip() + err_message = str(err_obj.get("message") or "").strip() + err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else [] + + # Extract google.rpc.ErrorInfo reason + metadata. There may be more + # than one ErrorInfo (rare), so we pick the first one with a reason. + error_reason = "" + error_metadata: Dict[str, Any] = {} + retry_delay_seconds: Optional[float] = None + for detail in err_details_list: + if not isinstance(detail, dict): + continue + type_url = str(detail.get("@type") or "") + if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"): + reason = detail.get("reason") + if isinstance(reason, str) and reason: + error_reason = reason + md = detail.get("metadata") + if isinstance(md, dict): + error_metadata = md + elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"): + # retryDelay is a google.protobuf.Duration string like "30s" or "1.5s". + delay_raw = detail.get("retryDelay") + if isinstance(delay_raw, str) and delay_raw.endswith("s"): + try: + retry_delay_seconds = float(delay_raw[:-1]) + except ValueError: + pass + elif isinstance(delay_raw, (int, float)): + retry_delay_seconds = float(delay_raw) + + # Fall back to the Retry-After header if the body didn't include RetryInfo. + if retry_delay_seconds is None: + try: + header_val = response.headers.get("Retry-After") or response.headers.get("retry-after") + except Exception: + header_val = None + if header_val: + try: + retry_delay_seconds = float(header_val) + except (TypeError, ValueError): + retry_delay_seconds = None + + # Classify the error code. ``code_assist_rate_limited`` stays the default + # for 429s; a more specific reason tag helps downstream callers (e.g. tests, + # logs) without changing the rate_limit classification path. + code = f"code_assist_http_{status}" + if status == 401: + code = "code_assist_unauthorized" + elif status == 429: + code = "code_assist_rate_limited" + if error_reason == "MODEL_CAPACITY_EXHAUSTED": + code = "code_assist_capacity_exhausted" + + # Build a human-readable message. Keep the status + a raw-body tail for + # debugging, but lead with a friendlier summary when we recognize the + # Google signal. + model_hint = "" + if isinstance(error_metadata, dict): + model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip() + + if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED": + target = model_hint or "this Gemini model" + message = ( + f"Gemini capacity exhausted for {target} (Google-side throttle, " + f"not a Hermes issue). Try a different Gemini model or set a " + f"fallback_providers entry to a non-Gemini provider." + ) + if retry_delay_seconds is not None: + message += f" Google suggests retrying in {retry_delay_seconds:g}s." + elif status == 429 and err_status == "RESOURCE_EXHAUSTED": + message = ( + f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). " + f"Check /gquota for remaining daily requests." + ) + if retry_delay_seconds is not None: + message += f" Retry suggested in {retry_delay_seconds:g}s." + elif status == 404: + # Google returns 404 when a model has been retired or renamed. + target = model_hint or (err_message or "model") + message = ( + f"Code Assist 404: {target} is not available at " + f"cloudcode-pa.googleapis.com. It may have been renamed or " + f"retired. Check hermes_cli/models.py for the current list." + ) + elif err_message: + # Generic fallback with the parsed message. + message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}" + else: + # Last-ditch fallback — raw body snippet. + message = f"Code Assist returned HTTP {status}: {body_text[:500]}" + + return CodeAssistError( + message, + code=code, + status_code=status, + response=response, + retry_after=retry_delay_seconds, + details={ + "status": err_status, + "reason": error_reason, + "metadata": error_metadata, + "message": err_message, + }, + ) diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py new file mode 100644 index 0000000000..8418cec987 --- /dev/null +++ b/agent/gemini_native_adapter.py @@ -0,0 +1,846 @@ +"""OpenAI-compatible facade over Google AI Studio's native Gemini API. + +Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the +main agent loop can keep using its existing OpenAI-shaped message flow. +This adapter is the transport shim that converts those OpenAI-style +``messages[]`` / ``tools[]`` requests into Gemini's native +``models/{model}:generateContent`` schema and converts the responses back. + +Why this exists +--------------- +Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn +agent/tool loop (auth churn, tool-call replay quirks, thought-signature +requirements). The native Gemini API is the canonical path and avoids the +OpenAI-compat layer entirely. +""" + +from __future__ import annotations + +import asyncio +import base64 +import json +import logging +import time +import uuid +from types import SimpleNamespace +from typing import Any, Dict, Iterator, List, Optional + +import httpx + +from agent.gemini_schema import sanitize_gemini_tool_parameters + +logger = logging.getLogger(__name__) + +DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta" + + +def is_native_gemini_base_url(base_url: str) -> bool: + """Return True when the endpoint speaks Gemini's native REST API.""" + normalized = str(base_url or "").strip().rstrip("/").lower() + if not normalized: + return False + if "generativelanguage.googleapis.com" not in normalized: + return False + return not normalized.endswith("/openai") + + +class GeminiAPIError(Exception): + """Error shape compatible with Hermes retry/error classification.""" + + def __init__( + self, + message: str, + *, + code: str = "gemini_api_error", + status_code: Optional[int] = None, + response: Optional[httpx.Response] = None, + retry_after: Optional[float] = None, + details: Optional[Dict[str, Any]] = None, + ) -> None: + super().__init__(message) + self.code = code + self.status_code = status_code + self.response = response + self.retry_after = retry_after + self.details = details or {} + + +def _coerce_content_to_text(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + pieces: List[str] = [] + for part in content: + if isinstance(part, str): + pieces.append(part) + elif isinstance(part, dict) and part.get("type") == "text": + text = part.get("text") + if isinstance(text, str): + pieces.append(text) + return "\n".join(pieces) + return str(content) + + +def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]: + if not isinstance(content, list): + text = _coerce_content_to_text(content) + return [{"text": text}] if text else [] + + parts: List[Dict[str, Any]] = [] + for item in content: + if isinstance(item, str): + parts.append({"text": item}) + continue + if not isinstance(item, dict): + continue + ptype = item.get("type") + if ptype == "text": + text = item.get("text") + if isinstance(text, str) and text: + parts.append({"text": text}) + elif ptype == "image_url": + url = ((item.get("image_url") or {}).get("url") or "") + if not isinstance(url, str) or not url.startswith("data:"): + continue + try: + header, encoded = url.split(",", 1) + mime = header.split(":", 1)[1].split(";", 1)[0] + raw = base64.b64decode(encoded) + except Exception: + continue + parts.append( + { + "inlineData": { + "mimeType": mime, + "data": base64.b64encode(raw).decode("ascii"), + } + } + ) + return parts + + +def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]: + extra = tool_call.get("extra_content") or {} + if not isinstance(extra, dict): + return None + google = extra.get("google") or extra.get("thought_signature") + if isinstance(google, dict): + sig = google.get("thought_signature") or google.get("thoughtSignature") + return str(sig) if isinstance(sig, str) and sig else None + if isinstance(google, str) and google: + return google + return None + + +def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]: + fn = tool_call.get("function") or {} + args_raw = fn.get("arguments", "") + try: + args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {} + except json.JSONDecodeError: + args = {"_raw": args_raw} + if not isinstance(args, dict): + args = {"_value": args} + + part: Dict[str, Any] = { + "functionCall": { + "name": str(fn.get("name") or ""), + "args": args, + } + } + thought_signature = _tool_call_extra_signature(tool_call) + if thought_signature: + part["thoughtSignature"] = thought_signature + return part + + +def _translate_tool_result_to_gemini( + message: Dict[str, Any], + tool_name_by_call_id: Optional[Dict[str, str]] = None, +) -> Dict[str, Any]: + tool_name_by_call_id = tool_name_by_call_id or {} + tool_call_id = str(message.get("tool_call_id") or "") + name = str( + message.get("name") + or tool_name_by_call_id.get(tool_call_id) + or tool_call_id + or "tool" + ) + content = _coerce_content_to_text(message.get("content")) + try: + parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None + except json.JSONDecodeError: + parsed = None + response = parsed if isinstance(parsed, dict) else {"output": content} + return { + "functionResponse": { + "name": name, + "response": response, + } + } + + +def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]: + system_text_parts: List[str] = [] + contents: List[Dict[str, Any]] = [] + tool_name_by_call_id: Dict[str, str] = {} + + for msg in messages: + if not isinstance(msg, dict): + continue + role = str(msg.get("role") or "user") + + if role == "system": + system_text_parts.append(_coerce_content_to_text(msg.get("content"))) + continue + + if role in {"tool", "function"}: + contents.append( + { + "role": "user", + "parts": [ + _translate_tool_result_to_gemini( + msg, + tool_name_by_call_id=tool_name_by_call_id, + ) + ], + } + ) + continue + + gemini_role = "model" if role == "assistant" else "user" + parts: List[Dict[str, Any]] = [] + + content_parts = _extract_multimodal_parts(msg.get("content")) + parts.extend(content_parts) + + tool_calls = msg.get("tool_calls") or [] + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict): + tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "") + tool_name = str(((tool_call.get("function") or {}).get("name") or "")) + if tool_call_id and tool_name: + tool_name_by_call_id[tool_call_id] = tool_name + parts.append(_translate_tool_call_to_gemini(tool_call)) + + if parts: + contents.append({"role": gemini_role, "parts": parts}) + + system_instruction = None + joined_system = "\n".join(part for part in system_text_parts if part).strip() + if joined_system: + system_instruction = {"parts": [{"text": joined_system}]} + return contents, system_instruction + + +def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]: + if not isinstance(tools, list): + return [] + declarations: List[Dict[str, Any]] = [] + for tool in tools: + if not isinstance(tool, dict): + continue + fn = tool.get("function") or {} + if not isinstance(fn, dict): + continue + name = fn.get("name") + if not isinstance(name, str) or not name: + continue + decl: Dict[str, Any] = {"name": name} + description = fn.get("description") + if isinstance(description, str) and description: + decl["description"] = description + parameters = fn.get("parameters") + if isinstance(parameters, dict): + decl["parameters"] = sanitize_gemini_tool_parameters(parameters) + declarations.append(decl) + return [{"functionDeclarations": declarations}] if declarations else [] + + +def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]: + if tool_choice is None: + return None + if isinstance(tool_choice, str): + if tool_choice == "auto": + return {"functionCallingConfig": {"mode": "AUTO"}} + if tool_choice == "required": + return {"functionCallingConfig": {"mode": "ANY"}} + if tool_choice == "none": + return {"functionCallingConfig": {"mode": "NONE"}} + if isinstance(tool_choice, dict): + fn = tool_choice.get("function") or {} + name = fn.get("name") + if isinstance(name, str) and name: + return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}} + return None + + +def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]: + if not isinstance(config, dict) or not config: + return None + budget = config.get("thinkingBudget", config.get("thinking_budget")) + include = config.get("includeThoughts", config.get("include_thoughts")) + level = config.get("thinkingLevel", config.get("thinking_level")) + normalized: Dict[str, Any] = {} + if isinstance(budget, (int, float)): + normalized["thinkingBudget"] = int(budget) + if isinstance(include, bool): + normalized["includeThoughts"] = include + if isinstance(level, str) and level.strip(): + normalized["thinkingLevel"] = level.strip().lower() + return normalized or None + + +def build_gemini_request( + *, + messages: List[Dict[str, Any]], + tools: Any = None, + tool_choice: Any = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + top_p: Optional[float] = None, + stop: Any = None, + thinking_config: Any = None, +) -> Dict[str, Any]: + contents, system_instruction = _build_gemini_contents(messages) + request: Dict[str, Any] = {"contents": contents} + if system_instruction: + request["systemInstruction"] = system_instruction + + gemini_tools = _translate_tools_to_gemini(tools) + if gemini_tools: + request["tools"] = gemini_tools + + tool_config = _translate_tool_choice_to_gemini(tool_choice) + if tool_config: + request["toolConfig"] = tool_config + + generation_config: Dict[str, Any] = {} + if temperature is not None: + generation_config["temperature"] = temperature + if max_tokens is not None: + generation_config["maxOutputTokens"] = max_tokens + if top_p is not None: + generation_config["topP"] = top_p + if stop: + generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)] + normalized_thinking = _normalize_thinking_config(thinking_config) + if normalized_thinking: + generation_config["thinkingConfig"] = normalized_thinking + if generation_config: + request["generationConfig"] = generation_config + + return request + + +def _map_gemini_finish_reason(reason: str) -> str: + mapping = { + "STOP": "stop", + "MAX_TOKENS": "length", + "SAFETY": "content_filter", + "RECITATION": "content_filter", + "OTHER": "stop", + } + return mapping.get(str(reason or "").upper(), "stop") + + +def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]: + sig = part.get("thoughtSignature") + if isinstance(sig, str) and sig: + return {"google": {"thought_signature": sig}} + return None + + +def _empty_response(model: str) -> SimpleNamespace: + message = SimpleNamespace( + role="assistant", + content="", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=None, + ) + choice = SimpleNamespace(index=0, message=message, finish_reason="stop") + usage = SimpleNamespace( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0, + prompt_tokens_details=SimpleNamespace(cached_tokens=0), + ) + return SimpleNamespace( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion", + created=int(time.time()), + model=model, + choices=[choice], + usage=usage, + ) + + +def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace: + candidates = resp.get("candidates") or [] + if not isinstance(candidates, list) or not candidates: + return _empty_response(model) + + cand = candidates[0] if isinstance(candidates[0], dict) else {} + content_obj = cand.get("content") if isinstance(cand, dict) else {} + parts = content_obj.get("parts") if isinstance(content_obj, dict) else [] + + text_pieces: List[str] = [] + reasoning_pieces: List[str] = [] + tool_calls: List[SimpleNamespace] = [] + + for index, part in enumerate(parts or []): + if not isinstance(part, dict): + continue + if part.get("thought") is True and isinstance(part.get("text"), str): + reasoning_pieces.append(part["text"]) + continue + if isinstance(part.get("text"), str): + text_pieces.append(part["text"]) + continue + fc = part.get("functionCall") + if isinstance(fc, dict) and fc.get("name"): + try: + args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False) + except (TypeError, ValueError): + args_str = "{}" + tool_call = SimpleNamespace( + id=f"call_{uuid.uuid4().hex[:12]}", + type="function", + index=index, + function=SimpleNamespace(name=str(fc["name"]), arguments=args_str), + ) + extra_content = _tool_call_extra_from_part(part) + if extra_content: + tool_call.extra_content = extra_content + tool_calls.append(tool_call) + + finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or "")) + usage_meta = resp.get("usageMetadata") or {} + usage = SimpleNamespace( + prompt_tokens=int(usage_meta.get("promptTokenCount") or 0), + completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0), + total_tokens=int(usage_meta.get("totalTokenCount") or 0), + prompt_tokens_details=SimpleNamespace( + cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0), + ), + ) + reasoning = "".join(reasoning_pieces) or None + message = SimpleNamespace( + role="assistant", + content="".join(text_pieces) if text_pieces else None, + tool_calls=tool_calls or None, + reasoning=reasoning, + reasoning_content=reasoning, + reasoning_details=None, + ) + choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason) + return SimpleNamespace( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion", + created=int(time.time()), + model=model, + choices=[choice], + usage=usage, + ) + + +class _GeminiStreamChunk(SimpleNamespace): + pass + + +def _make_stream_chunk( + *, + model: str, + content: str = "", + tool_call_delta: Optional[Dict[str, Any]] = None, + finish_reason: Optional[str] = None, + reasoning: str = "", +) -> _GeminiStreamChunk: + delta_kwargs: Dict[str, Any] = { + "role": "assistant", + "content": None, + "tool_calls": None, + "reasoning": None, + "reasoning_content": None, + } + if content: + delta_kwargs["content"] = content + if tool_call_delta is not None: + tool_delta = SimpleNamespace( + index=tool_call_delta.get("index", 0), + id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}", + type="function", + function=SimpleNamespace( + name=tool_call_delta.get("name") or "", + arguments=tool_call_delta.get("arguments") or "", + ), + ) + extra_content = tool_call_delta.get("extra_content") + if isinstance(extra_content, dict): + tool_delta.extra_content = extra_content + delta_kwargs["tool_calls"] = [tool_delta] + if reasoning: + delta_kwargs["reasoning"] = reasoning + delta_kwargs["reasoning_content"] = reasoning + delta = SimpleNamespace(**delta_kwargs) + choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason) + return _GeminiStreamChunk( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion.chunk", + created=int(time.time()), + model=model, + choices=[choice], + usage=None, + ) + + +def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]: + buffer = "" + for chunk in response.iter_text(): + if not chunk: + continue + buffer += chunk + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.rstrip("\r") + if not line: + continue + if not line.startswith("data: "): + continue + data = line[6:] + if data == "[DONE]": + return + try: + payload = json.loads(data) + except json.JSONDecodeError: + logger.debug("Non-JSON Gemini SSE line: %s", data[:200]) + continue + if isinstance(payload, dict): + yield payload + + +def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]: + candidates = event.get("candidates") or [] + if not candidates: + return [] + cand = candidates[0] if isinstance(candidates[0], dict) else {} + parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else [] + chunks: List[_GeminiStreamChunk] = [] + + for part_index, part in enumerate(parts): + if not isinstance(part, dict): + continue + if part.get("thought") is True and isinstance(part.get("text"), str): + chunks.append(_make_stream_chunk(model=model, reasoning=part["text"])) + continue + if isinstance(part.get("text"), str) and part["text"]: + chunks.append(_make_stream_chunk(model=model, content=part["text"])) + fc = part.get("functionCall") + if isinstance(fc, dict) and fc.get("name"): + name = str(fc["name"]) + try: + args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True) + except (TypeError, ValueError): + args_str = "{}" + thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else "" + call_key = json.dumps( + { + "part_index": part_index, + "name": name, + "thought_signature": thought_signature, + }, + sort_keys=True, + ) + slot = tool_call_indices.get(call_key) + if slot is None: + slot = { + "index": len(tool_call_indices), + "id": f"call_{uuid.uuid4().hex[:12]}", + "last_arguments": "", + } + tool_call_indices[call_key] = slot + emitted_arguments = args_str + last_arguments = str(slot.get("last_arguments") or "") + if last_arguments: + if args_str == last_arguments: + emitted_arguments = "" + elif args_str.startswith(last_arguments): + emitted_arguments = args_str[len(last_arguments):] + slot["last_arguments"] = args_str + chunks.append( + _make_stream_chunk( + model=model, + tool_call_delta={ + "index": slot["index"], + "id": slot["id"], + "name": name, + "arguments": emitted_arguments, + "extra_content": _tool_call_extra_from_part(part), + }, + ) + ) + + finish_reason_raw = str(cand.get("finishReason") or "") + if finish_reason_raw: + mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw) + chunks.append(_make_stream_chunk(model=model, finish_reason=mapped)) + return chunks + + +def gemini_http_error(response: httpx.Response) -> GeminiAPIError: + status = response.status_code + body_text = "" + body_json: Dict[str, Any] = {} + try: + body_text = response.text + except Exception: + body_text = "" + if body_text: + try: + parsed = json.loads(body_text) + if isinstance(parsed, dict): + body_json = parsed + except (ValueError, TypeError): + body_json = {} + + err_obj = body_json.get("error") if isinstance(body_json, dict) else None + if not isinstance(err_obj, dict): + err_obj = {} + err_status = str(err_obj.get("status") or "").strip() + err_message = str(err_obj.get("message") or "").strip() + details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else [] + + reason = "" + retry_after: Optional[float] = None + metadata: Dict[str, Any] = {} + for detail in details_list: + if not isinstance(detail, dict): + continue + type_url = str(detail.get("@type") or "") + if not reason and type_url.endswith("/google.rpc.ErrorInfo"): + reason_value = detail.get("reason") + if isinstance(reason_value, str): + reason = reason_value + md = detail.get("metadata") + if isinstance(md, dict): + metadata = md + header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after") + if header_retry: + try: + retry_after = float(header_retry) + except (TypeError, ValueError): + retry_after = None + + code = f"gemini_http_{status}" + if status == 401: + code = "gemini_unauthorized" + elif status == 429: + code = "gemini_rate_limited" + elif status == 404: + code = "gemini_model_not_found" + + if err_message: + message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}" + else: + message = f"Gemini returned HTTP {status}: {body_text[:500]}" + + return GeminiAPIError( + message, + code=code, + status_code=status, + response=response, + retry_after=retry_after, + details={ + "status": err_status, + "reason": reason, + "metadata": metadata, + "message": err_message, + }, + ) + + +class _GeminiChatCompletions: + def __init__(self, client: "GeminiNativeClient"): + self._client = client + + def create(self, **kwargs: Any) -> Any: + return self._client._create_chat_completion(**kwargs) + + +class _AsyncGeminiChatCompletions: + def __init__(self, client: "AsyncGeminiNativeClient"): + self._client = client + + async def create(self, **kwargs: Any) -> Any: + return await self._client._create_chat_completion(**kwargs) + + +class _GeminiChatNamespace: + def __init__(self, client: "GeminiNativeClient"): + self.completions = _GeminiChatCompletions(client) + + +class _AsyncGeminiChatNamespace: + def __init__(self, client: "AsyncGeminiNativeClient"): + self.completions = _AsyncGeminiChatCompletions(client) + + +class GeminiNativeClient: + """Minimal OpenAI-SDK-compatible facade over Gemini's native REST API.""" + + def __init__( + self, + *, + api_key: str, + base_url: Optional[str] = None, + default_headers: Optional[Dict[str, str]] = None, + timeout: Any = None, + http_client: Optional[httpx.Client] = None, + **_: Any, + ) -> None: + self.api_key = api_key + normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/") + if normalized_base.endswith("/openai"): + normalized_base = normalized_base[: -len("/openai")] + self.base_url = normalized_base + self._default_headers = dict(default_headers or {}) + self.chat = _GeminiChatNamespace(self) + self.is_closed = False + self._http = http_client or httpx.Client( + timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0) + ) + + def close(self) -> None: + self.is_closed = True + try: + self._http.close() + except Exception: + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def _headers(self) -> Dict[str, str]: + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "x-goog-api-key": self.api_key, + "User-Agent": "hermes-agent (gemini-native)", + } + headers.update(self._default_headers) + return headers + + @staticmethod + def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]: + try: + return False, next(iterator) + except StopIteration: + return True, None + + def _create_chat_completion( + self, + *, + model: str = "gemini-2.5-flash", + messages: Optional[List[Dict[str, Any]]] = None, + stream: bool = False, + tools: Any = None, + tool_choice: Any = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + top_p: Optional[float] = None, + stop: Any = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Any = None, + **_: Any, + ) -> Any: + thinking_config = None + if isinstance(extra_body, dict): + thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig") + + request = build_gemini_request( + messages=messages or [], + tools=tools, + tool_choice=tool_choice, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + stop=stop, + thinking_config=thinking_config, + ) + + if stream: + return self._stream_completion(model=model, request=request, timeout=timeout) + + url = f"{self.base_url}/models/{model}:generateContent" + response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout) + if response.status_code != 200: + raise gemini_http_error(response) + try: + payload = response.json() + except ValueError as exc: + raise GeminiAPIError( + f"Invalid JSON from Gemini native API: {exc}", + code="gemini_invalid_json", + status_code=response.status_code, + response=response, + ) from exc + return translate_gemini_response(payload, model=model) + + def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]: + url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse" + stream_headers = dict(self._headers()) + stream_headers["Accept"] = "text/event-stream" + + def _generator() -> Iterator[_GeminiStreamChunk]: + try: + with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response: + if response.status_code != 200: + response.read() + raise gemini_http_error(response) + tool_call_indices: Dict[str, Dict[str, Any]] = {} + for event in _iter_sse_events(response): + for chunk in translate_stream_event(event, model, tool_call_indices): + yield chunk + except httpx.HTTPError as exc: + raise GeminiAPIError( + f"Gemini streaming request failed: {exc}", + code="gemini_stream_error", + ) from exc + + return _generator() + + +class AsyncGeminiNativeClient: + """Async wrapper used by auxiliary_client for native Gemini calls.""" + + def __init__(self, sync_client: GeminiNativeClient): + self._sync = sync_client + self.api_key = sync_client.api_key + self.base_url = sync_client.base_url + self.chat = _AsyncGeminiChatNamespace(self) + + async def _create_chat_completion(self, **kwargs: Any) -> Any: + stream = bool(kwargs.get("stream")) + result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs) + if not stream: + return result + + async def _async_stream() -> Any: + while True: + done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result) + if done: + break + yield chunk + + return _async_stream() + + async def close(self) -> None: + await asyncio.to_thread(self._sync.close) diff --git a/agent/gemini_schema.py b/agent/gemini_schema.py new file mode 100644 index 0000000000..904c99d31b --- /dev/null +++ b/agent/gemini_schema.py @@ -0,0 +1,85 @@ +"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset.""" + +from __future__ import annotations + +from typing import Any, Dict, List + +# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema`` +# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields +# outside that subset before sending Hermes tool schemas to Google. +_GEMINI_SCHEMA_ALLOWED_KEYS = { + "type", + "format", + "title", + "description", + "nullable", + "enum", + "maxItems", + "minItems", + "properties", + "required", + "minProperties", + "maxProperties", + "minLength", + "maxLength", + "pattern", + "example", + "anyOf", + "propertyOrdering", + "default", + "items", + "minimum", + "maximum", +} + + +def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]: + """Return a Gemini-compatible copy of a tool parameter schema. + + Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys + such as ``$schema`` or ``additionalProperties`` that Google's Gemini + ``Schema`` object rejects. This helper preserves the documented Gemini + subset and recursively sanitizes nested ``properties`` / ``items`` / + ``anyOf`` definitions. + """ + + if not isinstance(schema, dict): + return {} + + cleaned: Dict[str, Any] = {} + for key, value in schema.items(): + if key not in _GEMINI_SCHEMA_ALLOWED_KEYS: + continue + if key == "properties": + if not isinstance(value, dict): + continue + props: Dict[str, Any] = {} + for prop_name, prop_schema in value.items(): + if not isinstance(prop_name, str): + continue + props[prop_name] = sanitize_gemini_schema(prop_schema) + cleaned[key] = props + continue + if key == "items": + cleaned[key] = sanitize_gemini_schema(value) + continue + if key == "anyOf": + if not isinstance(value, list): + continue + cleaned[key] = [ + sanitize_gemini_schema(item) + for item in value + if isinstance(item, dict) + ] + continue + cleaned[key] = value + return cleaned + + +def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]: + """Normalize tool parameters to a valid Gemini object schema.""" + + cleaned = sanitize_gemini_schema(parameters) + if not cleaned: + return {"type": "object", "properties": {}} + return cleaned diff --git a/agent/google_code_assist.py b/agent/google_code_assist.py new file mode 100644 index 0000000000..eba09b8f46 --- /dev/null +++ b/agent/google_code_assist.py @@ -0,0 +1,453 @@ +"""Google Code Assist API client — project discovery, onboarding, quota. + +The Code Assist API powers Google's official gemini-cli. It sits at +``cloudcode-pa.googleapis.com`` and provides: + +- Free tier access (generous daily quota) for personal Google accounts +- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise + +This module handles the control-plane dance needed before inference: + +1. ``load_code_assist()`` — probe the user's account to learn what tier they're on + and whether a ``cloudaicompanionProject`` is already assigned. +2. ``onboard_user()`` — if the user hasn't been onboarded yet (new account, fresh + free tier, etc.), call this with the chosen tier + project id. Supports LRO + polling for slow provisioning. +3. ``retrieve_user_quota()`` — fetch the ``buckets[]`` array showing remaining + quota per model, used by the ``/gquota`` slash command. + +VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter +will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this +and force the account to ``standard-tier`` so the call chain still succeeds. + +Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The +request/response shapes are specific to Google's internal Code Assist API, +documented nowhere public — we copy them from the reference implementations. +""" + +from __future__ import annotations + +import json +import logging +import os +import time +import urllib.error +import urllib.parse +import urllib.request +import uuid +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Constants +# ============================================================================= + +CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com" + +# Fallback endpoints tried when prod returns an error during project discovery +FALLBACK_ENDPOINTS = [ + "https://daily-cloudcode-pa.sandbox.googleapis.com", + "https://autopush-cloudcode-pa.sandbox.googleapis.com", +] + +# Tier identifiers that Google's API uses +FREE_TIER_ID = "free-tier" +LEGACY_TIER_ID = "legacy-tier" +STANDARD_TIER_ID = "standard-tier" + +# Default HTTP headers matching gemini-cli's fingerprint. +# Google may reject unrecognized User-Agents on these internal endpoints. +_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)" +_X_GOOG_API_CLIENT = "gl-node/24.0.0" +_DEFAULT_REQUEST_TIMEOUT = 30.0 +_ONBOARDING_POLL_ATTEMPTS = 12 +_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0 + + +class CodeAssistError(RuntimeError): + """Exception raised by the Code Assist (``cloudcode-pa``) integration. + + Carries HTTP status / response / retry-after metadata so the agent's + ``error_classifier._extract_status_code`` and the main loop's Retry-After + handling (which walks ``error.response.headers``) pick up the right + signals. Without these, 429s from the OAuth path look like opaque + ``RuntimeError`` and skip the rate-limit path. + """ + + def __init__( + self, + message: str, + *, + code: str = "code_assist_error", + status_code: Optional[int] = None, + response: Any = None, + retry_after: Optional[float] = None, + details: Optional[Dict[str, Any]] = None, + ) -> None: + super().__init__(message) + self.code = code + # ``status_code`` is picked up by ``agent.error_classifier._extract_status_code`` + # so a 429 from Code Assist classifies as FailoverReason.rate_limit and + # triggers the main loop's fallback_providers chain the same way SDK + # errors do. + self.status_code = status_code + # ``response`` is the underlying ``httpx.Response`` (or a shim with a + # ``.headers`` mapping and ``.json()`` method). The main loop reads + # ``error.response.headers["Retry-After"]`` to honor Google's retry + # hints when the backend throttles us. + self.response = response + # Parsed ``Retry-After`` seconds (kept separately for convenience — + # Google returns retry hints in both the header and the error body's + # ``google.rpc.RetryInfo`` details, and we pick whichever we found). + self.retry_after = retry_after + # Parsed structured error details from the Google error envelope + # (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``). + # Useful for logging and for tests that want to assert on specifics. + self.details = details or {} + + +class ProjectIdRequiredError(CodeAssistError): + def __init__(self, message: str = "GCP project id required for this tier") -> None: + super().__init__(message, code="code_assist_project_id_required") + + +# ============================================================================= +# HTTP primitive (auth via Bearer token passed per-call) +# ============================================================================= + +def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]: + ua = _GEMINI_CLI_USER_AGENT + if user_agent_model: + ua = f"{ua} model/{user_agent_model}" + return { + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": f"Bearer {access_token}", + "User-Agent": ua, + "X-Goog-Api-Client": _X_GOOG_API_CLIENT, + "x-activity-request-id": str(uuid.uuid4()), + } + + +def _client_metadata() -> Dict[str, str]: + """Match Google's gemini-cli exactly — unrecognized metadata may be rejected.""" + return { + "ideType": "IDE_UNSPECIFIED", + "platform": "PLATFORM_UNSPECIFIED", + "pluginType": "GEMINI", + } + + +def _post_json( + url: str, + body: Dict[str, Any], + access_token: str, + *, + timeout: float = _DEFAULT_REQUEST_TIMEOUT, + user_agent_model: str = "", +) -> Dict[str, Any]: + data = json.dumps(body).encode("utf-8") + request = urllib.request.Request( + url, data=data, method="POST", + headers=_build_headers(access_token, user_agent_model=user_agent_model), + ) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + raw = response.read().decode("utf-8", errors="replace") + return json.loads(raw) if raw else {} + except urllib.error.HTTPError as exc: + detail = "" + try: + detail = exc.read().decode("utf-8", errors="replace") + except Exception: + pass + # Special case: VPC-SC violation should be distinguishable + if _is_vpc_sc_violation(detail): + raise CodeAssistError( + f"VPC-SC policy violation: {detail}", + code="code_assist_vpc_sc", + ) from exc + raise CodeAssistError( + f"Code Assist HTTP {exc.code}: {detail or exc.reason}", + code=f"code_assist_http_{exc.code}", + ) from exc + except urllib.error.URLError as exc: + raise CodeAssistError( + f"Code Assist request failed: {exc}", + code="code_assist_network_error", + ) from exc + + +def _is_vpc_sc_violation(body: str) -> bool: + """Detect a VPC Service Controls violation from a response body.""" + if not body: + return False + try: + parsed = json.loads(body) + except (json.JSONDecodeError, ValueError): + return "SECURITY_POLICY_VIOLATED" in body + # Walk the nested error structure Google uses + error = parsed.get("error") if isinstance(parsed, dict) else None + if not isinstance(error, dict): + return False + details = error.get("details") or [] + if isinstance(details, list): + for item in details: + if isinstance(item, dict): + reason = item.get("reason") or "" + if reason == "SECURITY_POLICY_VIOLATED": + return True + msg = str(error.get("message", "")) + return "SECURITY_POLICY_VIOLATED" in msg + + +# ============================================================================= +# load_code_assist — discovers current tier + assigned project +# ============================================================================= + +@dataclass +class CodeAssistProjectInfo: + """Result from ``load_code_assist``.""" + current_tier_id: str = "" + cloudaicompanion_project: str = "" # Google-managed project (free tier) + allowed_tiers: List[str] = field(default_factory=list) + raw: Dict[str, Any] = field(default_factory=dict) + + +def load_code_assist( + access_token: str, + *, + project_id: str = "", + user_agent_model: str = "", +) -> CodeAssistProjectInfo: + """Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback. + + Returns whatever tier + project info Google reports. On VPC-SC violations, + returns a synthetic ``standard-tier`` result so the chain can continue. + """ + body: Dict[str, Any] = { + "metadata": { + "duetProject": project_id, + **_client_metadata(), + }, + } + if project_id: + body["cloudaicompanionProject"] = project_id + + endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS + last_err: Optional[Exception] = None + for endpoint in endpoints: + url = f"{endpoint}/v1internal:loadCodeAssist" + try: + resp = _post_json(url, body, access_token, user_agent_model=user_agent_model) + return _parse_load_response(resp) + except CodeAssistError as exc: + if exc.code == "code_assist_vpc_sc": + logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint) + return CodeAssistProjectInfo( + current_tier_id=STANDARD_TIER_ID, + cloudaicompanion_project=project_id, + ) + last_err = exc + logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc) + continue + if last_err: + raise last_err + return CodeAssistProjectInfo() + + +def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo: + current_tier = resp.get("currentTier") or {} + tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else "" + project = str(resp.get("cloudaicompanionProject") or "") + allowed = resp.get("allowedTiers") or [] + allowed_ids: List[str] = [] + if isinstance(allowed, list): + for t in allowed: + if isinstance(t, dict): + tid = str(t.get("id") or "") + if tid: + allowed_ids.append(tid) + return CodeAssistProjectInfo( + current_tier_id=tier_id, + cloudaicompanion_project=project, + allowed_tiers=allowed_ids, + raw=resp, + ) + + +# ============================================================================= +# onboard_user — provisions a new user on a tier (with LRO polling) +# ============================================================================= + +def onboard_user( + access_token: str, + *, + tier_id: str, + project_id: str = "", + user_agent_model: str = "", +) -> Dict[str, Any]: + """Call ``POST /v1internal:onboardUser`` to provision the user. + + For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError). + For free tiers, ``project_id`` is optional — Google will assign one. + + Returns the final operation response. Polls ``/v1internal/`` for up + to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS`` + (default: 12 × 5s = 1 min). + """ + if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id: + raise ProjectIdRequiredError( + f"Tier {tier_id!r} requires a GCP project id. " + "Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT." + ) + + body: Dict[str, Any] = { + "tierId": tier_id, + "metadata": _client_metadata(), + } + if project_id: + body["cloudaicompanionProject"] = project_id + + endpoint = CODE_ASSIST_ENDPOINT + url = f"{endpoint}/v1internal:onboardUser" + resp = _post_json(url, body, access_token, user_agent_model=user_agent_model) + + # Poll if LRO (long-running operation) + if not resp.get("done"): + op_name = resp.get("name", "") + if not op_name: + return resp + for attempt in range(_ONBOARDING_POLL_ATTEMPTS): + time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS) + poll_url = f"{endpoint}/v1internal/{op_name}" + try: + poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model) + except CodeAssistError as exc: + logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc) + continue + if poll_resp.get("done"): + return poll_resp + logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS) + return resp + + +# ============================================================================= +# retrieve_user_quota — for /gquota +# ============================================================================= + +@dataclass +class QuotaBucket: + model_id: str + token_type: str = "" + remaining_fraction: float = 0.0 + reset_time_iso: str = "" + raw: Dict[str, Any] = field(default_factory=dict) + + +def retrieve_user_quota( + access_token: str, + *, + project_id: str = "", + user_agent_model: str = "", +) -> List[QuotaBucket]: + """Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``.""" + body: Dict[str, Any] = {} + if project_id: + body["project"] = project_id + url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota" + resp = _post_json(url, body, access_token, user_agent_model=user_agent_model) + raw_buckets = resp.get("buckets") or [] + buckets: List[QuotaBucket] = [] + if not isinstance(raw_buckets, list): + return buckets + for b in raw_buckets: + if not isinstance(b, dict): + continue + buckets.append(QuotaBucket( + model_id=str(b.get("modelId") or ""), + token_type=str(b.get("tokenType") or ""), + remaining_fraction=float(b.get("remainingFraction") or 0.0), + reset_time_iso=str(b.get("resetTime") or ""), + raw=b, + )) + return buckets + + +# ============================================================================= +# Project context resolution +# ============================================================================= + +@dataclass +class ProjectContext: + """Resolved state for a given OAuth session.""" + project_id: str = "" # effective project id sent on requests + managed_project_id: str = "" # Google-assigned project (free tier) + tier_id: str = "" + source: str = "" # "env", "config", "discovered", "onboarded" + + +def resolve_project_context( + access_token: str, + *, + configured_project_id: str = "", + env_project_id: str = "", + user_agent_model: str = "", +) -> ProjectContext: + """Figure out what project id + tier to use for requests. + + Priority: + 1. If configured_project_id or env_project_id is set, use that directly + and short-circuit (no discovery needed). + 2. Otherwise call loadCodeAssist to see what Google says. + 3. If no tier assigned yet, onboard the user (free tier default). + """ + # Short-circuit: caller provided a project id + if configured_project_id: + return ProjectContext( + project_id=configured_project_id, + tier_id=STANDARD_TIER_ID, # assume paid since they specified one + source="config", + ) + if env_project_id: + return ProjectContext( + project_id=env_project_id, + tier_id=STANDARD_TIER_ID, + source="env", + ) + + # Discover via loadCodeAssist + info = load_code_assist(access_token, user_agent_model=user_agent_model) + + effective_project = info.cloudaicompanion_project + tier = info.current_tier_id + + if not tier: + # User hasn't been onboarded — provision them on free tier + onboard_resp = onboard_user( + access_token, + tier_id=FREE_TIER_ID, + project_id="", + user_agent_model=user_agent_model, + ) + # Re-parse from the onboard response + response_body = onboard_resp.get("response") or {} + if isinstance(response_body, dict): + effective_project = ( + effective_project + or str(response_body.get("cloudaicompanionProject") or "") + ) + tier = FREE_TIER_ID + source = "onboarded" + else: + source = "discovered" + + return ProjectContext( + project_id=effective_project, + managed_project_id=effective_project if tier == FREE_TIER_ID else "", + tier_id=tier, + source=source, + ) diff --git a/agent/google_oauth.py b/agent/google_oauth.py new file mode 100644 index 0000000000..4fda090fc6 --- /dev/null +++ b/agent/google_oauth.py @@ -0,0 +1,1048 @@ +"""Google OAuth PKCE flow for the Gemini (google-gemini-cli) inference provider. + +This module implements Authorization Code + PKCE (S256) OAuth against Google's +accounts.google.com endpoints. The resulting access token is used by +``agent.gemini_cloudcode_adapter`` to talk to ``cloudcode-pa.googleapis.com`` +(Google's Code Assist backend that powers the Gemini CLI's free and paid tiers). + +Synthesized from: +- jenslys/opencode-gemini-auth (MIT) — overall flow shape, public OAuth creds, request format +- clawdbot/extensions/google/ — refresh-token rotation, VPC-SC handling reference +- PRs #10176 (@sliverp) and #10779 (@newarthur) — PKCE module structure, cross-process lock + +Storage (``~/.hermes/auth/google_oauth.json``, chmod 0o600): + + { + "refresh": "refreshToken|projectId|managedProjectId", + "access": "...", + "expires": 1744848000000, // unix MILLIseconds + "email": "user@example.com" + } + +The ``refresh`` field packs the refresh_token together with the resolved GCP +project IDs so subsequent sessions don't need to re-discover the project. +This matches opencode-gemini-auth's storage contract exactly. + +The packed format stays parseable even if no project IDs are present — just +a bare refresh_token is treated as "packed with empty IDs". + +Public client credentials +------------------------- +The client_id and client_secret below are Google's PUBLIC desktop OAuth client +for their own open-source gemini-cli. They are baked into every copy of the +gemini-cli npm package and are NOT confidential — desktop OAuth clients have +no secret-keeping requirement (PKCE provides the security). Shipping them here +is consistent with opencode-gemini-auth and the official Google gemini-cli. + +Policy note: Google considers using this OAuth client with third-party software +a policy violation. Users see an upfront warning with ``confirm(default=False)`` +before authorization begins. +""" + +from __future__ import annotations + +import base64 +import contextlib +import hashlib +import http.server +import json +import logging +import os +import secrets +import socket +import stat +import threading +import time +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, Optional, Tuple + +from hermes_constants import get_hermes_home + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# OAuth client credential resolution. +# +# Resolution order: +# 1. HERMES_GEMINI_CLIENT_ID / HERMES_GEMINI_CLIENT_SECRET env vars (power users) +# 2. Shipped defaults — Google's public gemini-cli desktop OAuth client +# (baked into every copy of Google's open-source gemini-cli; NOT +# confidential — desktop OAuth clients use PKCE, not client_secret, for +# security). Using these matches opencode-gemini-auth behavior. +# 3. Fallback: scrape from a locally installed gemini-cli binary (helps forks +# that deliberately wipe the shipped defaults). +# 4. Fail with a helpful error. +# ============================================================================= + +ENV_CLIENT_ID = "HERMES_GEMINI_CLIENT_ID" +ENV_CLIENT_SECRET = "HERMES_GEMINI_CLIENT_SECRET" + +# Public gemini-cli desktop OAuth client (shipped in Google's open-source +# gemini-cli MIT repo). Composed piecewise to keep the constants readable and +# to pair each piece with an explicit comment about why it is non-confidential. +# See: https://github.com/google-gemini/gemini-cli/blob/main/packages/core/src/code_assist/oauth2.ts +_PUBLIC_CLIENT_ID_PROJECT_NUM = "681255809395" +_PUBLIC_CLIENT_ID_HASH = "oo8ft2oprdrnp9e3aqf6av3hmdib135j" +_PUBLIC_CLIENT_SECRET_SUFFIX = "4uHgMPm-1o7Sk-geV6Cu5clXFsxl" + +_DEFAULT_CLIENT_ID = ( + f"{_PUBLIC_CLIENT_ID_PROJECT_NUM}-{_PUBLIC_CLIENT_ID_HASH}" + ".apps.googleusercontent.com" +) +_DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}" + +# Regex patterns for fallback scraping from an installed gemini-cli. +import re as _re +_CLIENT_ID_PATTERN = _re.compile( + r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]" +) +_CLIENT_SECRET_PATTERN = _re.compile( + r"OAUTH_CLIENT_SECRET\s*=\s*['\"](GOCSPX-[A-Za-z0-9_-]+)['\"]" +) +_CLIENT_ID_SHAPE = _re.compile(r"([0-9]{8,}-[a-z0-9]{20,}\.apps\.googleusercontent\.com)") +_CLIENT_SECRET_SHAPE = _re.compile(r"(GOCSPX-[A-Za-z0-9_-]{20,})") + + +# ============================================================================= +# Endpoints & constants +# ============================================================================= + +AUTH_ENDPOINT = "https://accounts.google.com/o/oauth2/v2/auth" +TOKEN_ENDPOINT = "https://oauth2.googleapis.com/token" +USERINFO_ENDPOINT = "https://www.googleapis.com/oauth2/v1/userinfo" + +OAUTH_SCOPES = ( + "https://www.googleapis.com/auth/cloud-platform " + "https://www.googleapis.com/auth/userinfo.email " + "https://www.googleapis.com/auth/userinfo.profile" +) + +DEFAULT_REDIRECT_PORT = 8085 +REDIRECT_HOST = "127.0.0.1" +CALLBACK_PATH = "/oauth2callback" + +# 60-second clock skew buffer (matches opencode-gemini-auth). +REFRESH_SKEW_SECONDS = 60 + +TOKEN_REQUEST_TIMEOUT_SECONDS = 20.0 +CALLBACK_WAIT_SECONDS = 300 +LOCK_TIMEOUT_SECONDS = 30.0 + +# Headless env detection +_HEADLESS_ENV_VARS = ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY", "HERMES_HEADLESS") + + +# ============================================================================= +# Error type +# ============================================================================= + +class GoogleOAuthError(RuntimeError): + """Raised for any failure in the Google OAuth flow.""" + + def __init__(self, message: str, *, code: str = "google_oauth_error") -> None: + super().__init__(message) + self.code = code + + +# ============================================================================= +# File paths & cross-process locking +# ============================================================================= + +def _credentials_path() -> Path: + return get_hermes_home() / "auth" / "google_oauth.json" + + +def _lock_path() -> Path: + return _credentials_path().with_suffix(".json.lock") + + +_lock_state = threading.local() + + +@contextlib.contextmanager +def _credentials_lock(timeout_seconds: float = LOCK_TIMEOUT_SECONDS): + """Cross-process lock around the credentials file (fcntl POSIX / msvcrt Windows).""" + depth = getattr(_lock_state, "depth", 0) + if depth > 0: + _lock_state.depth = depth + 1 + try: + yield + finally: + _lock_state.depth -= 1 + return + + lock_file_path = _lock_path() + lock_file_path.parent.mkdir(parents=True, exist_ok=True) + fd = os.open(str(lock_file_path), os.O_CREAT | os.O_RDWR, 0o600) + acquired = False + try: + try: + import fcntl + except ImportError: + fcntl = None + + if fcntl is not None: + deadline = time.monotonic() + max(0.0, float(timeout_seconds)) + while True: + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + acquired = True + break + except BlockingIOError: + if time.monotonic() >= deadline: + raise TimeoutError( + f"Timed out acquiring Google OAuth credentials lock at {lock_file_path}." + ) + time.sleep(0.05) + else: + try: + import msvcrt # type: ignore[import-not-found] + + deadline = time.monotonic() + max(0.0, float(timeout_seconds)) + while True: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + acquired = True + break + except OSError: + if time.monotonic() >= deadline: + raise TimeoutError( + f"Timed out acquiring Google OAuth credentials lock at {lock_file_path}." + ) + time.sleep(0.05) + except ImportError: + acquired = True + + _lock_state.depth = 1 + yield + finally: + try: + if acquired: + try: + import fcntl + + fcntl.flock(fd, fcntl.LOCK_UN) + except ImportError: + try: + import msvcrt # type: ignore[import-not-found] + + try: + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + except OSError: + pass + except ImportError: + pass + finally: + os.close(fd) + _lock_state.depth = 0 + + +# ============================================================================= +# Client ID resolution +# ============================================================================= + +_scraped_creds_cache: Dict[str, str] = {} + + +def _locate_gemini_cli_oauth_js() -> Optional[Path]: + """Walk the user's gemini binary install to find its oauth2.js. + + Returns None if gemini isn't installed. Supports both the npm install + (``node_modules/@google/gemini-cli-core/dist/**/code_assist/oauth2.js``) + and the Homebrew ``bundle/`` layout. + """ + import shutil + + gemini = shutil.which("gemini") + if not gemini: + return None + + try: + real = Path(gemini).resolve() + except OSError: + return None + + # Walk up from the binary to find npm install root + search_dirs: list[Path] = [] + cur = real.parent + for _ in range(8): # don't walk too far + search_dirs.append(cur) + if (cur / "node_modules").exists(): + search_dirs.append(cur / "node_modules" / "@google" / "gemini-cli-core") + break + if cur.parent == cur: + break + cur = cur.parent + + for root in search_dirs: + if not root.exists(): + continue + # Common known paths + candidates = [ + root / "dist" / "src" / "code_assist" / "oauth2.js", + root / "dist" / "code_assist" / "oauth2.js", + root / "src" / "code_assist" / "oauth2.js", + ] + for c in candidates: + if c.exists(): + return c + # Recursive fallback: look for oauth2.js within 10 dirs deep + try: + for path in root.rglob("oauth2.js"): + return path + except (OSError, ValueError): + continue + + return None + + +def _scrape_client_credentials() -> Tuple[str, str]: + """Extract client_id + client_secret from the local gemini-cli install.""" + if _scraped_creds_cache.get("resolved"): + return _scraped_creds_cache.get("client_id", ""), _scraped_creds_cache.get("client_secret", "") + + oauth_js = _locate_gemini_cli_oauth_js() + if oauth_js is None: + _scraped_creds_cache["resolved"] = "1" # Don't retry on every call + return "", "" + + try: + content = oauth_js.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + logger.debug("Failed to read oauth2.js at %s: %s", oauth_js, exc) + _scraped_creds_cache["resolved"] = "1" + return "", "" + + # Precise pattern first, then fallback shape match + cid_match = _CLIENT_ID_PATTERN.search(content) or _CLIENT_ID_SHAPE.search(content) + cs_match = _CLIENT_SECRET_PATTERN.search(content) or _CLIENT_SECRET_SHAPE.search(content) + + client_id = cid_match.group(1) if cid_match else "" + client_secret = cs_match.group(1) if cs_match else "" + + _scraped_creds_cache["client_id"] = client_id + _scraped_creds_cache["client_secret"] = client_secret + _scraped_creds_cache["resolved"] = "1" + + if client_id: + logger.info("Scraped Gemini OAuth client from %s", oauth_js) + + return client_id, client_secret + + +def _get_client_id() -> str: + env_val = (os.getenv(ENV_CLIENT_ID) or "").strip() + if env_val: + return env_val + if _DEFAULT_CLIENT_ID: + return _DEFAULT_CLIENT_ID + scraped, _ = _scrape_client_credentials() + return scraped + + +def _get_client_secret() -> str: + env_val = (os.getenv(ENV_CLIENT_SECRET) or "").strip() + if env_val: + return env_val + if _DEFAULT_CLIENT_SECRET: + return _DEFAULT_CLIENT_SECRET + _, scraped = _scrape_client_credentials() + return scraped + + +def _require_client_id() -> str: + cid = _get_client_id() + if not cid: + raise GoogleOAuthError( + "Google OAuth client ID is not available.\n" + "Hermes looks for a locally installed gemini-cli to source the OAuth client. " + "Either:\n" + " 1. Install it: npm install -g @google/gemini-cli (or brew install gemini-cli)\n" + " 2. Set HERMES_GEMINI_CLIENT_ID and HERMES_GEMINI_CLIENT_SECRET in ~/.hermes/.env\n" + "\n" + "Register a Desktop OAuth client at:\n" + " https://console.cloud.google.com/apis/credentials\n" + "(enable the Generative Language API on the project).", + code="google_oauth_client_id_missing", + ) + return cid + + +# ============================================================================= +# PKCE +# ============================================================================= + +def _generate_pkce_pair() -> Tuple[str, str]: + """Generate a (verifier, challenge) pair using S256.""" + verifier = secrets.token_urlsafe(64) + digest = hashlib.sha256(verifier.encode("ascii")).digest() + challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode("ascii") + return verifier, challenge + + +# ============================================================================= +# Packed refresh format: refresh_token[|project_id[|managed_project_id]] +# ============================================================================= + +@dataclass +class RefreshParts: + refresh_token: str + project_id: str = "" + managed_project_id: str = "" + + @classmethod + def parse(cls, packed: str) -> "RefreshParts": + if not packed: + return cls(refresh_token="") + parts = packed.split("|", 2) + return cls( + refresh_token=parts[0], + project_id=parts[1] if len(parts) > 1 else "", + managed_project_id=parts[2] if len(parts) > 2 else "", + ) + + def format(self) -> str: + if not self.refresh_token: + return "" + if not self.project_id and not self.managed_project_id: + return self.refresh_token + return f"{self.refresh_token}|{self.project_id}|{self.managed_project_id}" + + +# ============================================================================= +# Credentials (dataclass wrapping the on-disk format) +# ============================================================================= + +@dataclass +class GoogleCredentials: + access_token: str + refresh_token: str + expires_ms: int # unix milliseconds + email: str = "" + project_id: str = "" + managed_project_id: str = "" + + def to_dict(self) -> Dict[str, Any]: + return { + "refresh": RefreshParts( + refresh_token=self.refresh_token, + project_id=self.project_id, + managed_project_id=self.managed_project_id, + ).format(), + "access": self.access_token, + "expires": int(self.expires_ms), + "email": self.email, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "GoogleCredentials": + refresh_packed = str(data.get("refresh", "") or "") + parts = RefreshParts.parse(refresh_packed) + return cls( + access_token=str(data.get("access", "") or ""), + refresh_token=parts.refresh_token, + expires_ms=int(data.get("expires", 0) or 0), + email=str(data.get("email", "") or ""), + project_id=parts.project_id, + managed_project_id=parts.managed_project_id, + ) + + def expires_unix_seconds(self) -> float: + return self.expires_ms / 1000.0 + + def access_token_expired(self, skew_seconds: int = REFRESH_SKEW_SECONDS) -> bool: + if not self.access_token or not self.expires_ms: + return True + return (time.time() + max(0, skew_seconds)) * 1000 >= self.expires_ms + + +# ============================================================================= +# Credential I/O (atomic + locked) +# ============================================================================= + +def load_credentials() -> Optional[GoogleCredentials]: + """Load credentials from disk. Returns None if missing or corrupt.""" + path = _credentials_path() + if not path.exists(): + return None + try: + with _credentials_lock(): + raw = path.read_text(encoding="utf-8") + data = json.loads(raw) + except (json.JSONDecodeError, OSError, IOError) as exc: + logger.warning("Failed to read Google OAuth credentials at %s: %s", path, exc) + return None + if not isinstance(data, dict): + return None + creds = GoogleCredentials.from_dict(data) + if not creds.access_token: + return None + return creds + + +def save_credentials(creds: GoogleCredentials) -> Path: + """Atomically write creds to disk with 0o600 permissions.""" + path = _credentials_path() + path.parent.mkdir(parents=True, exist_ok=True) + payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n" + + with _credentials_lock(): + tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}") + try: + with open(tmp_path, "w", encoding="utf-8") as fh: + fh.write(payload) + fh.flush() + os.fsync(fh.fileno()) + os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR) + os.replace(tmp_path, path) + finally: + try: + if tmp_path.exists(): + tmp_path.unlink() + except OSError: + pass + return path + + +def clear_credentials() -> None: + """Remove the creds file. Idempotent.""" + path = _credentials_path() + with _credentials_lock(): + try: + path.unlink() + except FileNotFoundError: + pass + except OSError as exc: + logger.warning("Failed to remove Google OAuth credentials at %s: %s", path, exc) + + +# ============================================================================= +# HTTP helpers +# ============================================================================= + +def _post_form(url: str, data: Dict[str, str], timeout: float) -> Dict[str, Any]: + """POST x-www-form-urlencoded and return parsed JSON response.""" + body = urllib.parse.urlencode(data).encode("ascii") + request = urllib.request.Request( + url, + data=body, + method="POST", + headers={ + "Content-Type": "application/x-www-form-urlencoded", + "Accept": "application/json", + }, + ) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + raw = response.read().decode("utf-8", errors="replace") + return json.loads(raw) + except urllib.error.HTTPError as exc: + detail = "" + try: + detail = exc.read().decode("utf-8", errors="replace") + except Exception: + pass + # Detect invalid_grant to signal credential revocation + code = "google_oauth_token_http_error" + if "invalid_grant" in detail.lower(): + code = "google_oauth_invalid_grant" + raise GoogleOAuthError( + f"Google OAuth token endpoint returned HTTP {exc.code}: {detail or exc.reason}", + code=code, + ) from exc + except urllib.error.URLError as exc: + raise GoogleOAuthError( + f"Google OAuth token request failed: {exc}", + code="google_oauth_token_network_error", + ) from exc + + +def exchange_code( + code: str, + verifier: str, + redirect_uri: str, + *, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + timeout: float = TOKEN_REQUEST_TIMEOUT_SECONDS, +) -> Dict[str, Any]: + """Exchange authorization code for access + refresh tokens.""" + cid = client_id if client_id is not None else _get_client_id() + csecret = client_secret if client_secret is not None else _get_client_secret() + data = { + "grant_type": "authorization_code", + "code": code, + "code_verifier": verifier, + "client_id": cid, + "redirect_uri": redirect_uri, + } + if csecret: + data["client_secret"] = csecret + return _post_form(TOKEN_ENDPOINT, data, timeout) + + +def refresh_access_token( + refresh_token: str, + *, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + timeout: float = TOKEN_REQUEST_TIMEOUT_SECONDS, +) -> Dict[str, Any]: + """Refresh the access token.""" + if not refresh_token: + raise GoogleOAuthError( + "Cannot refresh: refresh_token is empty. Re-run OAuth login.", + code="google_oauth_refresh_token_missing", + ) + cid = client_id if client_id is not None else _get_client_id() + csecret = client_secret if client_secret is not None else _get_client_secret() + data = { + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": cid, + } + if csecret: + data["client_secret"] = csecret + return _post_form(TOKEN_ENDPOINT, data, timeout) + + +def _fetch_user_email(access_token: str, timeout: float = TOKEN_REQUEST_TIMEOUT_SECONDS) -> str: + """Best-effort userinfo fetch for display. Failures return empty string.""" + try: + request = urllib.request.Request( + USERINFO_ENDPOINT + "?alt=json", + headers={"Authorization": f"Bearer {access_token}"}, + ) + with urllib.request.urlopen(request, timeout=timeout) as response: + raw = response.read().decode("utf-8", errors="replace") + data = json.loads(raw) + return str(data.get("email", "") or "") + except Exception as exc: + logger.debug("Userinfo fetch failed (non-fatal): %s", exc) + return "" + + +# ============================================================================= +# In-flight refresh deduplication +# ============================================================================= + +_refresh_inflight: Dict[str, threading.Event] = {} +_refresh_inflight_lock = threading.Lock() + + +def get_valid_access_token(*, force_refresh: bool = False) -> str: + """Load creds, refreshing if near expiry, and return a valid bearer token. + + Dedupes concurrent refreshes by refresh_token. On ``invalid_grant``, the + credential file is wiped and a ``google_oauth_invalid_grant`` error is raised + (caller is expected to trigger a re-login flow). + """ + creds = load_credentials() + if creds is None: + raise GoogleOAuthError( + "No Google OAuth credentials found. Run `hermes login --provider google-gemini-cli` first.", + code="google_oauth_not_logged_in", + ) + + if not force_refresh and not creds.access_token_expired(): + return creds.access_token + + # Dedupe concurrent refreshes by refresh_token + rt = creds.refresh_token + with _refresh_inflight_lock: + event = _refresh_inflight.get(rt) + if event is None: + event = threading.Event() + _refresh_inflight[rt] = event + owner = True + else: + owner = False + + if not owner: + # Another thread is refreshing — wait, then re-read from disk. + event.wait(timeout=LOCK_TIMEOUT_SECONDS) + fresh = load_credentials() + if fresh is not None and not fresh.access_token_expired(): + return fresh.access_token + # Fall through to do our own refresh if the other attempt failed + + try: + try: + resp = refresh_access_token(rt) + except GoogleOAuthError as exc: + if exc.code == "google_oauth_invalid_grant": + logger.warning( + "Google OAuth refresh token invalid (revoked/expired). " + "Clearing credentials at %s — user must re-login.", + _credentials_path(), + ) + clear_credentials() + raise + + new_access = str(resp.get("access_token", "") or "").strip() + if not new_access: + raise GoogleOAuthError( + "Refresh response did not include an access_token.", + code="google_oauth_refresh_empty", + ) + # Google sometimes rotates refresh_token; preserve existing if omitted. + new_refresh = str(resp.get("refresh_token", "") or "").strip() or creds.refresh_token + expires_in = int(resp.get("expires_in", 0) or 0) + + creds.access_token = new_access + creds.refresh_token = new_refresh + creds.expires_ms = int((time.time() + max(60, expires_in)) * 1000) + save_credentials(creds) + return creds.access_token + finally: + if owner: + with _refresh_inflight_lock: + _refresh_inflight.pop(rt, None) + event.set() + + +# ============================================================================= +# Update project IDs on stored creds +# ============================================================================= + +def update_project_ids(project_id: str = "", managed_project_id: str = "") -> None: + """Persist resolved/discovered project IDs back into the credential file.""" + creds = load_credentials() + if creds is None: + return + if project_id: + creds.project_id = project_id + if managed_project_id: + creds.managed_project_id = managed_project_id + save_credentials(creds) + + +# ============================================================================= +# Callback server +# ============================================================================= + +class _OAuthCallbackHandler(http.server.BaseHTTPRequestHandler): + expected_state: str = "" + captured_code: Optional[str] = None + captured_error: Optional[str] = None + ready: Optional[threading.Event] = None + + def log_message(self, format: str, *args: Any) -> None: # noqa: A002, N802 + logger.debug("OAuth callback: " + format, *args) + + def do_GET(self) -> None: # noqa: N802 + parsed = urllib.parse.urlparse(self.path) + if parsed.path != CALLBACK_PATH: + self.send_response(404) + self.end_headers() + return + + params = urllib.parse.parse_qs(parsed.query) + state = (params.get("state") or [""])[0] + error = (params.get("error") or [""])[0] + code = (params.get("code") or [""])[0] + + if state != type(self).expected_state: + type(self).captured_error = "state_mismatch" + self._respond_html(400, _ERROR_PAGE.format(message="State mismatch — aborting for safety.")) + elif error: + type(self).captured_error = error + # Simple HTML-escape of the error value + safe_err = ( + str(error) + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + ) + self._respond_html(400, _ERROR_PAGE.format(message=f"Authorization denied: {safe_err}")) + elif code: + type(self).captured_code = code + self._respond_html(200, _SUCCESS_PAGE) + else: + type(self).captured_error = "no_code" + self._respond_html(400, _ERROR_PAGE.format(message="Callback received no authorization code.")) + + if type(self).ready is not None: + type(self).ready.set() + + def _respond_html(self, status: int, body: str) -> None: + payload = body.encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + +_SUCCESS_PAGE = """ +Hermes — signed in + +

Signed in to Google.

+

You can close this tab and return to your terminal.

+""" + +_ERROR_PAGE = """ +Hermes — sign-in failed + +

Sign-in failed

{message}

+

Return to your terminal — Hermes will walk you through a manual paste fallback.

+""" + + +def _bind_callback_server(preferred_port: int = DEFAULT_REDIRECT_PORT) -> Tuple[http.server.HTTPServer, int]: + try: + server = http.server.HTTPServer((REDIRECT_HOST, preferred_port), _OAuthCallbackHandler) + return server, preferred_port + except OSError as exc: + logger.info( + "Preferred OAuth callback port %d unavailable (%s); requesting ephemeral port", + preferred_port, exc, + ) + server = http.server.HTTPServer((REDIRECT_HOST, 0), _OAuthCallbackHandler) + return server, server.server_address[1] + + +def _is_headless() -> bool: + return any(os.getenv(k) for k in _HEADLESS_ENV_VARS) + + +# ============================================================================= +# Main login flow +# ============================================================================= + +def start_oauth_flow( + *, + force_relogin: bool = False, + open_browser: bool = True, + callback_wait_seconds: float = CALLBACK_WAIT_SECONDS, + project_id: str = "", +) -> GoogleCredentials: + """Run the interactive browser OAuth flow and persist credentials. + + Args: + force_relogin: If False and valid creds already exist, return them. + open_browser: If False, skip webbrowser.open and print the URL only. + callback_wait_seconds: Max seconds to wait for the browser callback. + project_id: Initial GCP project ID to bake into the stored creds. + Can be discovered/updated later via update_project_ids(). + """ + if not force_relogin: + existing = load_credentials() + if existing and existing.access_token: + logger.info("Google OAuth credentials already present; skipping login.") + return existing + + client_id = _require_client_id() # raises GoogleOAuthError with install hints + client_secret = _get_client_secret() + + verifier, challenge = _generate_pkce_pair() + state = secrets.token_urlsafe(16) + + # If headless, skip the listener and go straight to paste mode + if _is_headless() and open_browser: + logger.info("Headless environment detected; using paste-mode OAuth fallback.") + return _paste_mode_login(verifier, challenge, state, client_id, client_secret, project_id) + + server, port = _bind_callback_server(DEFAULT_REDIRECT_PORT) + redirect_uri = f"http://{REDIRECT_HOST}:{port}{CALLBACK_PATH}" + + _OAuthCallbackHandler.expected_state = state + _OAuthCallbackHandler.captured_code = None + _OAuthCallbackHandler.captured_error = None + ready = threading.Event() + _OAuthCallbackHandler.ready = ready + + params = { + "client_id": client_id, + "redirect_uri": redirect_uri, + "response_type": "code", + "scope": OAUTH_SCOPES, + "state": state, + "code_challenge": challenge, + "code_challenge_method": "S256", + "access_type": "offline", + "prompt": "consent", + } + auth_url = AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params) + "#hermes" + + server_thread = threading.Thread(target=server.serve_forever, daemon=True) + server_thread.start() + + print() + print("Opening your browser to sign in to Google…") + print(f"If it does not open automatically, visit:\n {auth_url}") + print() + + if open_browser: + try: + import webbrowser + + webbrowser.open(auth_url, new=1, autoraise=True) + except Exception as exc: + logger.debug("webbrowser.open failed: %s", exc) + + code: Optional[str] = None + try: + if ready.wait(timeout=callback_wait_seconds): + code = _OAuthCallbackHandler.captured_code + error = _OAuthCallbackHandler.captured_error + if error: + raise GoogleOAuthError( + f"Authorization failed: {error}", + code="google_oauth_authorization_failed", + ) + else: + logger.info("Callback server timed out — offering manual paste fallback.") + code = _prompt_paste_fallback() + finally: + try: + server.shutdown() + except Exception: + pass + try: + server.server_close() + except Exception: + pass + server_thread.join(timeout=2.0) + + if not code: + raise GoogleOAuthError( + "No authorization code received. Aborting.", + code="google_oauth_no_code", + ) + + token_resp = exchange_code( + code, verifier, redirect_uri, + client_id=client_id, client_secret=client_secret, + ) + return _persist_token_response(token_resp, project_id=project_id) + + +def _paste_mode_login( + verifier: str, + challenge: str, + state: str, + client_id: str, + client_secret: str, + project_id: str, +) -> GoogleCredentials: + """Run OAuth flow without a local callback server.""" + # Use a placeholder redirect URI; user will paste the full URL back + redirect_uri = f"http://{REDIRECT_HOST}:{DEFAULT_REDIRECT_PORT}{CALLBACK_PATH}" + params = { + "client_id": client_id, + "redirect_uri": redirect_uri, + "response_type": "code", + "scope": OAUTH_SCOPES, + "state": state, + "code_challenge": challenge, + "code_challenge_method": "S256", + "access_type": "offline", + "prompt": "consent", + } + auth_url = AUTH_ENDPOINT + "?" + urllib.parse.urlencode(params) + "#hermes" + + print() + print("Open this URL in a browser on any device:") + print(f" {auth_url}") + print() + print("After signing in, Google will redirect to localhost (which won't load).") + print("Copy the full URL from your browser and paste it below.") + print() + + code = _prompt_paste_fallback() + if not code: + raise GoogleOAuthError("No authorization code provided.", code="google_oauth_no_code") + + token_resp = exchange_code( + code, verifier, redirect_uri, + client_id=client_id, client_secret=client_secret, + ) + return _persist_token_response(token_resp, project_id=project_id) + + +def _prompt_paste_fallback() -> Optional[str]: + print() + print("Paste the full redirect URL Google showed you, OR just the 'code=' parameter value.") + raw = input("Callback URL or code: ").strip() + if not raw: + return None + if raw.startswith("http://") or raw.startswith("https://"): + parsed = urllib.parse.urlparse(raw) + params = urllib.parse.parse_qs(parsed.query) + return (params.get("code") or [""])[0] or None + # Accept a bare query string as well + if raw.startswith("?"): + params = urllib.parse.parse_qs(raw[1:]) + return (params.get("code") or [""])[0] or None + return raw + + +def _persist_token_response( + token_resp: Dict[str, Any], + *, + project_id: str = "", +) -> GoogleCredentials: + access_token = str(token_resp.get("access_token", "") or "").strip() + refresh_token = str(token_resp.get("refresh_token", "") or "").strip() + expires_in = int(token_resp.get("expires_in", 0) or 0) + if not access_token or not refresh_token: + raise GoogleOAuthError( + "Google token response missing access_token or refresh_token.", + code="google_oauth_incomplete_token_response", + ) + creds = GoogleCredentials( + access_token=access_token, + refresh_token=refresh_token, + expires_ms=int((time.time() + max(60, expires_in)) * 1000), + email=_fetch_user_email(access_token), + project_id=project_id, + managed_project_id="", + ) + save_credentials(creds) + logger.info("Google OAuth credentials saved to %s", _credentials_path()) + return creds + + +# ============================================================================= +# Pool-compatible variant +# ============================================================================= + +def run_gemini_oauth_login_pure() -> Dict[str, Any]: + """Run the login flow and return a dict matching the credential pool shape.""" + creds = start_oauth_flow(force_relogin=True) + return { + "access_token": creds.access_token, + "refresh_token": creds.refresh_token, + "expires_at_ms": creds.expires_ms, + "email": creds.email, + "project_id": creds.project_id, + } + + +# ============================================================================= +# Project ID resolution +# ============================================================================= + +def resolve_project_id_from_env() -> str: + """Return a GCP project ID from env vars, in priority order.""" + for var in ( + "HERMES_GEMINI_PROJECT_ID", + "GOOGLE_CLOUD_PROJECT", + "GOOGLE_CLOUD_PROJECT_ID", + ): + val = (os.getenv(var) or "").strip() + if val: + return val + return "" diff --git a/agent/insights.py b/agent/insights.py index 8972f94a83..70907b4f3d 100644 --- a/agent/insights.py +++ b/agent/insights.py @@ -762,13 +762,7 @@ class InsightsEngine: lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}") lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}") lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}") - cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0) - if cache_total > 0: - lines.append(f" Cache read: {o['total_cache_read_tokens']:<12,} Cache write: {o['total_cache_write_tokens']:,}") - cost_str = f"${o['estimated_cost']:.2f}" - if o.get("models_without_pricing"): - cost_str += " *" - lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}") + lines.append(f" Total tokens: {o['total_tokens']:,}") if o["total_hours"] > 0: lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}") lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}") @@ -778,16 +772,10 @@ class InsightsEngine: if report["models"]: lines.append(" 🤖 Models Used") lines.append(" " + "─" * 56) - lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}") + lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12}") for m in report["models"]: model_name = m["model"][:28] - if m.get("has_pricing"): - cost_cell = f"${m['cost']:>6.2f}" - else: - cost_cell = " N/A" - lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}") - if o.get("models_without_pricing"): - lines.append(" * Cost N/A for custom/self-hosted models") + lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}") lines.append("") # Platform breakdown @@ -889,15 +877,7 @@ class InsightsEngine: # Overview lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}") - cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0) - if cache_total > 0: - lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})") - else: - lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})") - cost_note = "" - if o.get("models_without_pricing"): - cost_note = " _(excludes custom/self-hosted models)_" - lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}") + lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})") if o["total_hours"] > 0: lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}") lines.append("") @@ -906,8 +886,7 @@ class InsightsEngine: if report["models"]: lines.append("**🤖 Models:**") for m in report["models"][:5]: - cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A" - lines.append(f" {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}") + lines.append(f" {m['model'][:25]} — {m['sessions']} sessions, {m['total_tokens']:,} tokens") lines.append("") # Platforms (if multi-platform) diff --git a/agent/memory_manager.py b/agent/memory_manager.py index 6cd1c860b6..2435c3f248 100644 --- a/agent/memory_manager.py +++ b/agent/memory_manager.py @@ -28,6 +28,7 @@ Usage in run_agent.py: from __future__ import annotations +import json import logging import re from typing import Any, Dict, List, Optional @@ -43,11 +44,22 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- _FENCE_TAG_RE = re.compile(r'', re.IGNORECASE) +_INTERNAL_CONTEXT_RE = re.compile( + r'<\s*memory-context\s*>[\s\S]*?', + re.IGNORECASE, +) +_INTERNAL_NOTE_RE = re.compile( + r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*', + re.IGNORECASE, +) def sanitize_context(text: str) -> str: - """Strip fence-escape sequences from provider output.""" - return _FENCE_TAG_RE.sub('', text) + """Strip fence tags, injected context blocks, and system notes from provider output.""" + text = _INTERNAL_CONTEXT_RE.sub('', text) + text = _INTERNAL_NOTE_RE.sub('', text) + text = _FENCE_TAG_RE.sub('', text) + return text def build_memory_context_block(raw_context: str) -> str: diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 3b50066484..2b39be989b 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) # are preserved so the full model name reaches cache lookups and server queries. _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", - "gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek", + "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek", "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", "qwen-oauth", "xiaomi", @@ -33,9 +33,12 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "google", "google-gemini", "google-ai-studio", "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot", "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek", + "ollama", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen", "mimo", "xiaomi-mimo", "arcee-ai", "arceeai", + "xai", "x-ai", "x.ai", "grok", + "nvidia", "nim", "nvidia-nim", "nemotron", "qwen-portal", }) @@ -100,6 +103,8 @@ DEFAULT_CONTEXT_LENGTHS = { # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a # substring of "anthropic/claude-sonnet-4.6"). # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev. + "claude-opus-4-7": 1000000, + "claude-opus-4.7": 1000000, "claude-opus-4-6": 1000000, "claude-sonnet-4-6": 1000000, "claude-opus-4.6": 1000000, @@ -111,7 +116,6 @@ DEFAULT_CONTEXT_LENGTHS = { "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) - "gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context "gpt-5.1-chat": 128000, # Chat variant has 128k context "gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k) "gpt-4.1": 1047576, @@ -120,7 +124,6 @@ DEFAULT_CONTEXT_LENGTHS = { "gemini": 1048576, # Gemma (open models served via AI Studio) "gemma-4-31b": 256000, - "gemma-4-26b": 256000, "gemma-3": 131072, "gemma": 8192, # fallback for older gemma models # DeepSeek @@ -154,6 +157,8 @@ DEFAULT_CONTEXT_LENGTHS = { "grok": 131072, # catch-all (grok-beta, unknown grok-*) # Kimi "kimi": 262144, + # Nemotron — NVIDIA's open-weights series (128K context across all sizes) + "nemotron": 131072, # Arcee "trinity": 262144, # OpenRouter @@ -236,8 +241,10 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.fireworks.ai": "fireworks", "opencode.ai": "opencode-go", "api.x.ai": "xai", + "integrate.api.nvidia.com": "nvidia", "api.xiaomimimo.com": "xiaomi", "xiaomimimo.com": "xiaomi", + "ollama.com": "ollama-cloud", } @@ -1011,6 +1018,16 @@ def get_model_context_length( if ctx: return ctx + # 4b. AWS Bedrock — use static context length table. + # Bedrock's ListFoundationModels doesn't expose context window sizes, + # so we maintain a curated table in bedrock_adapter.py. + if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url): + try: + from agent.bedrock_adapter import get_bedrock_context_length + return get_bedrock_context_length(model) + except ImportError: + pass # boto3 not installed — fall through to generic resolution + # 5. Provider-aware lookups (before generic OpenRouter cache) # These are provider-specific and take priority over the generic OR cache, # since the same model can have different context limits per provider diff --git a/agent/models_dev.py b/agent/models_dev.py index 373daafc3f..3e5c911e7e 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "togetherai": "togetherai", "perplexity": "perplexity", "cohere": "cohere", + "ollama-cloud": "ollama-cloud", } # Reverse mapping: models.dev → Hermes (built lazily) @@ -419,7 +420,10 @@ def list_provider_models(provider: str) -> List[str]: models = _get_provider_models(provider) if models is None: return [] - return list(models.keys()) + return [ + mid for mid in models.keys() + if not _should_hide_from_provider_catalog(provider, mid) + ] # Patterns that indicate non-agentic or noise models (TTS, embedding, @@ -431,6 +435,43 @@ _NOISE_PATTERNS: re.Pattern = re.compile( re.IGNORECASE, ) +# Google's live Gemini catalogs currently include a mix of stale slugs and +# Gemma models whose TPM quotas are too small for normal Hermes agent traffic. +# Keep capability metadata available for direct/manual use, but hide these from +# the Gemini model catalogs we surface in setup and model selection. +_GOOGLE_HIDDEN_MODELS = frozenset({ + # Low-TPM Gemma models that trip Google input-token quota walls under + # agent-style traffic despite advertising large context windows. + "gemma-4-31b-it", + "gemma-4-26b-it", + "gemma-4-26b-a4b-it", + "gemma-3-1b", + "gemma-3-1b-it", + "gemma-3-2b", + "gemma-3-2b-it", + "gemma-3-4b", + "gemma-3-4b-it", + "gemma-3-12b", + "gemma-3-12b-it", + "gemma-3-27b", + "gemma-3-27b-it", + # Stale/retired Google slugs that still surface through models.dev-backed + # Gemini selection but 404 on the current Google endpoints. + "gemini-1.5-flash", + "gemini-1.5-pro", + "gemini-1.5-flash-8b", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", +}) + + +def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool: + provider_lower = (provider or "").strip().lower() + model_lower = (model_id or "").strip().lower() + if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_HIDDEN_MODELS: + return True + return False + def list_agentic_models(provider: str) -> List[str]: """Return model IDs suitable for agentic use from models.dev. @@ -447,6 +488,8 @@ def list_agentic_models(provider: str) -> List[str]: for mid, entry in models.items(): if not isinstance(entry, dict): continue + if _should_hide_from_provider_catalog(provider, mid): + continue if not entry.get("tool_call", False): continue if _NOISE_PATTERNS.search(mid): @@ -581,5 +624,3 @@ def get_model_info( return _parse_model_info(mid, mdata, mdev_id) return None - - diff --git a/agent/nous_rate_guard.py b/agent/nous_rate_guard.py new file mode 100644 index 0000000000..712d8a0f1f --- /dev/null +++ b/agent/nous_rate_guard.py @@ -0,0 +1,182 @@ +"""Cross-session rate limit guard for Nous Portal. + +Writes rate limit state to a shared file so all sessions (CLI, gateway, +cron, auxiliary) can check whether Nous Portal is currently rate-limited +before making requests. Prevents retry amplification when RPH is tapped. + +Each 429 from Nous triggers up to 9 API calls per conversation turn +(3 SDK retries x 3 Hermes retries), and every one of those calls counts +against RPH. By recording the rate limit state on first 429 and checking +it before subsequent attempts, we eliminate the amplification effect. +""" + +from __future__ import annotations + +import json +import logging +import os +import tempfile +import time +from typing import Any, Mapping, Optional + +logger = logging.getLogger(__name__) + +_STATE_SUBDIR = "rate_limits" +_STATE_FILENAME = "nous.json" + + +def _state_path() -> str: + """Return the path to the Nous rate limit state file.""" + try: + from hermes_constants import get_hermes_home + base = get_hermes_home() + except ImportError: + base = os.path.join(os.path.expanduser("~"), ".hermes") + return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME) + + +def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]: + """Extract the best available reset-time estimate from response headers. + + Priority: + 1. x-ratelimit-reset-requests-1h (hourly RPH window — most useful) + 2. x-ratelimit-reset-requests (per-minute RPM window) + 3. retry-after (generic HTTP header) + + Returns seconds-from-now, or None if no usable header found. + """ + if not headers: + return None + + lowered = {k.lower(): v for k, v in headers.items()} + + for key in ( + "x-ratelimit-reset-requests-1h", + "x-ratelimit-reset-requests", + "retry-after", + ): + raw = lowered.get(key) + if raw is not None: + try: + val = float(raw) + if val > 0: + return val + except (TypeError, ValueError): + pass + + return None + + +def record_nous_rate_limit( + *, + headers: Optional[Mapping[str, str]] = None, + error_context: Optional[dict[str, Any]] = None, + default_cooldown: float = 300.0, +) -> None: + """Record that Nous Portal is rate-limited. + + Parses the reset time from response headers or error context. + Falls back to ``default_cooldown`` (5 minutes) if no reset info + is available. Writes to a shared file that all sessions can read. + + Args: + headers: HTTP response headers from the 429 error. + error_context: Structured error context from _extract_api_error_context(). + default_cooldown: Fallback cooldown in seconds when no header data. + """ + now = time.time() + reset_at = None + + # Try headers first (most accurate) + header_seconds = _parse_reset_seconds(headers) + if header_seconds is not None: + reset_at = now + header_seconds + + # Try error_context reset_at (from body parsing) + if reset_at is None and isinstance(error_context, dict): + ctx_reset = error_context.get("reset_at") + if isinstance(ctx_reset, (int, float)) and ctx_reset > now: + reset_at = float(ctx_reset) + + # Default cooldown + if reset_at is None: + reset_at = now + default_cooldown + + path = _state_path() + try: + state_dir = os.path.dirname(path) + os.makedirs(state_dir, exist_ok=True) + + state = { + "reset_at": reset_at, + "recorded_at": now, + "reset_seconds": reset_at - now, + } + + # Atomic write: write to temp file + rename + fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp") + try: + with os.fdopen(fd, "w") as f: + json.dump(state, f) + os.replace(tmp_path, path) + except Exception: + # Clean up temp file on failure + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + logger.info( + "Nous rate limit recorded: resets in %.0fs (at %.0f)", + reset_at - now, reset_at, + ) + except Exception as exc: + logger.debug("Failed to write Nous rate limit state: %s", exc) + + +def nous_rate_limit_remaining() -> Optional[float]: + """Check if Nous Portal is currently rate-limited. + + Returns: + Seconds remaining until reset, or None if not rate-limited. + """ + path = _state_path() + try: + with open(path) as f: + state = json.load(f) + reset_at = state.get("reset_at", 0) + remaining = reset_at - time.time() + if remaining > 0: + return remaining + # Expired — clean up + try: + os.unlink(path) + except OSError: + pass + return None + except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError): + return None + + +def clear_nous_rate_limit() -> None: + """Clear the rate limit state (e.g., after a successful Nous request).""" + try: + os.unlink(_state_path()) + except FileNotFoundError: + pass + except OSError as exc: + logger.debug("Failed to clear Nous rate limit state: %s", exc) + + +def format_remaining(seconds: float) -> str: + """Format seconds remaining into human-readable duration.""" + s = max(0, int(seconds)) + if s < 60: + return f"{s}s" + if s < 3600: + m, sec = divmod(s, 60) + return f"{m}m {sec}s" if sec else f"{m}m" + h, remainder = divmod(s, 3600) + m = remainder // 60 + return f"{h}h {m}m" if m else f"{h}h" diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index c61d6995b6..2a21043494 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -152,7 +152,13 @@ MEMORY_GUIDANCE = ( "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " "state to memory; use session_search to recall those from past transcripts. " "If you've discovered a new way to do something, solved a problem that could be " - "necessary later, save it as a skill with the skill tool." + "necessary later, save it as a skill with the skill tool.\n" + "Write memories as declarative facts, not instructions to yourself. " + "'User prefers concise responses' ✓ — 'Always respond concisely' ✗. " + "'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. " + "Imperative phrasing gets re-read as a directive in later sessions and can " + "cause repeated work or override the user's current request. Procedures and " + "workflows belong in skills, not memory." ) SESSION_SEARCH_GUIDANCE = ( @@ -295,7 +301,9 @@ PLATFORM_HINTS = { ), "telegram": ( "You are on a text messaging communication platform, Telegram. " - "Please do not use markdown as it does not render. " + "Standard markdown is automatically converted to Telegram format. " + "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, " + "`inline code`, ```code blocks```, [links](url), and ## headers. " "You can send media files natively: to deliver a file to the user, " "include MEDIA:/absolute/path/to/file in your response. Images " "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice " @@ -611,12 +619,14 @@ def build_skills_system_prompt( or get_session_env("HERMES_SESSION_PLATFORM") or "" ) + disabled = get_disabled_skill_names() cache_key = ( str(skills_dir.resolve()), tuple(str(d) for d in external_dirs), tuple(sorted(str(t) for t in (available_tools or set()))), tuple(sorted(str(ts) for ts in (available_toolsets or set()))), _platform_hint, + tuple(sorted(disabled)), ) with _SKILLS_PROMPT_CACHE_LOCK: cached = _SKILLS_PROMPT_CACHE.get(cache_key) @@ -624,8 +634,6 @@ def build_skills_system_prompt( _SKILLS_PROMPT_CACHE.move_to_end(cache_key) return cached - disabled = get_disabled_skill_names() - # ── Layer 2: disk snapshot ──────────────────────────────────────── snapshot = _load_skills_snapshot(skills_dir) @@ -652,7 +660,7 @@ def build_skills_system_prompt( ): continue skills_by_category.setdefault(category, []).append( - (skill_name, entry.get("description", "")) + (frontmatter_name, entry.get("description", "")) ) category_descriptions = { str(k): str(v) @@ -677,7 +685,7 @@ def build_skills_system_prompt( ): continue skills_by_category.setdefault(entry["category"], []).append( - (skill_name, entry["description"]) + (entry["frontmatter_name"], entry["description"]) ) # Read category-level DESCRIPTION.md files @@ -720,9 +728,10 @@ def build_skills_system_prompt( continue entry = _build_snapshot_entry(skill_file, ext_dir, frontmatter, desc) skill_name = entry["skill_name"] - if skill_name in seen_skill_names: + frontmatter_name = entry["frontmatter_name"] + if frontmatter_name in seen_skill_names: continue - if entry["frontmatter_name"] in disabled or skill_name in disabled: + if frontmatter_name in disabled or skill_name in disabled: continue if not _skill_should_show( extract_skill_conditions(frontmatter), @@ -730,9 +739,9 @@ def build_skills_system_prompt( available_toolsets, ): continue - seen_skill_names.add(skill_name) + seen_skill_names.add(frontmatter_name) skills_by_category.setdefault(entry["category"], []).append( - (skill_name, entry["description"]) + (frontmatter_name, entry["description"]) ) except Exception as e: logger.debug("Error reading external skill %s: %s", skill_file, e) diff --git a/agent/redact.py b/agent/redact.py index 04d35e3c93..af3b7bb93c 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile( re.IGNORECASE, ) +# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{") +# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs. +_JWT_RE = re.compile( + r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ) + r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature +) + +# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678> +# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts. +_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>") + # E.164 phone numbers: +, 7-15 digits # Negative lookahead prevents matching hex strings or identifiers _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])") @@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str: # Database connection string passwords text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text) + # JWT tokens (eyJ... — base64-encoded JSON headers) + text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text) + + # Discord user/role mentions (<@snowflake_id>) + text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text) + # E.164 phone numbers (Signal, WhatsApp) def _redact_phone(m): phone = m.group(1) diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 1f000eefed..280105daca 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -12,6 +12,8 @@ from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional +from hermes_constants import display_hermes_home + logger = logging.getLogger(__name__) _skill_commands: Dict[str, Dict[str, Any]] = {} @@ -70,7 +72,14 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu skill_name = str(loaded_skill.get("name") or normalized) skill_path = str(loaded_skill.get("path") or "") skill_dir = None - if skill_path: + # Prefer the absolute skill_dir returned by skill_view() — this is + # correct for both local and external skills. Fall back to the old + # SKILLS_DIR-relative reconstruction only when skill_dir is absent + # (e.g. legacy skill_view responses). + abs_skill_dir = loaded_skill.get("skill_dir") + if abs_skill_dir: + skill_dir = Path(abs_skill_dir) + elif skill_path: try: skill_dir = SKILLS_DIR / Path(skill_path).parent except Exception: @@ -108,7 +117,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None if not resolved: return - lines = ["", "[Skill config (from ~/.hermes/config.yaml):"] + lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"] for key, value in resolved.items(): display_val = str(value) if value else "(not set)" lines.append(f" {key} = {display_val}") diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py deleted file mode 100644 index 6d482be270..0000000000 --- a/agent/smart_model_routing.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Helpers for optional cheap-vs-strong model routing.""" - -from __future__ import annotations - -import os -import re -from typing import Any, Dict, Optional - -from utils import is_truthy_value - -_COMPLEX_KEYWORDS = { - "debug", - "debugging", - "implement", - "implementation", - "refactor", - "patch", - "traceback", - "stacktrace", - "exception", - "error", - "analyze", - "analysis", - "investigate", - "architecture", - "design", - "compare", - "benchmark", - "optimize", - "optimise", - "review", - "terminal", - "shell", - "tool", - "tools", - "pytest", - "test", - "tests", - "plan", - "planning", - "delegate", - "subagent", - "cron", - "docker", - "kubernetes", -} - -_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE) - - -def _coerce_bool(value: Any, default: bool = False) -> bool: - return is_truthy_value(value, default=default) - - -def _coerce_int(value: Any, default: int) -> int: - try: - return int(value) - except (TypeError, ValueError): - return default - - -def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: - """Return the configured cheap-model route when a message looks simple. - - Conservative by design: if the message has signs of code/tool/debugging/ - long-form work, keep the primary model. - """ - cfg = routing_config or {} - if not _coerce_bool(cfg.get("enabled"), False): - return None - - cheap_model = cfg.get("cheap_model") or {} - if not isinstance(cheap_model, dict): - return None - provider = str(cheap_model.get("provider") or "").strip().lower() - model = str(cheap_model.get("model") or "").strip() - if not provider or not model: - return None - - text = (user_message or "").strip() - if not text: - return None - - max_chars = _coerce_int(cfg.get("max_simple_chars"), 160) - max_words = _coerce_int(cfg.get("max_simple_words"), 28) - - if len(text) > max_chars: - return None - if len(text.split()) > max_words: - return None - if text.count("\n") > 1: - return None - if "```" in text or "`" in text: - return None - if _URL_RE.search(text): - return None - - lowered = text.lower() - words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()} - if words & _COMPLEX_KEYWORDS: - return None - - route = dict(cheap_model) - route["provider"] = provider - route["model"] = model - route["routing_reason"] = "simple_turn" - return route - - -def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]: - """Resolve the effective model/runtime for one turn. - - Returns a dict with model/runtime/signature/label fields. - """ - route = choose_cheap_model_route(user_message, routing_config) - if not route: - return { - "model": primary.get("model"), - "runtime": { - "api_key": primary.get("api_key"), - "base_url": primary.get("base_url"), - "provider": primary.get("provider"), - "api_mode": primary.get("api_mode"), - "command": primary.get("command"), - "args": list(primary.get("args") or []), - "credential_pool": primary.get("credential_pool"), - }, - "label": None, - "signature": ( - primary.get("model"), - primary.get("provider"), - primary.get("base_url"), - primary.get("api_mode"), - primary.get("command"), - tuple(primary.get("args") or ()), - ), - } - - from hermes_cli.runtime_provider import resolve_runtime_provider - - explicit_api_key = None - api_key_env = str(route.get("api_key_env") or "").strip() - if api_key_env: - explicit_api_key = os.getenv(api_key_env) or None - - try: - runtime = resolve_runtime_provider( - requested=route.get("provider"), - explicit_api_key=explicit_api_key, - explicit_base_url=route.get("base_url"), - ) - except Exception: - return { - "model": primary.get("model"), - "runtime": { - "api_key": primary.get("api_key"), - "base_url": primary.get("base_url"), - "provider": primary.get("provider"), - "api_mode": primary.get("api_mode"), - "command": primary.get("command"), - "args": list(primary.get("args") or []), - "credential_pool": primary.get("credential_pool"), - }, - "label": None, - "signature": ( - primary.get("model"), - primary.get("provider"), - primary.get("base_url"), - primary.get("api_mode"), - primary.get("command"), - tuple(primary.get("args") or ()), - ), - } - - return { - "model": route.get("model"), - "runtime": { - "api_key": runtime.get("api_key"), - "base_url": runtime.get("base_url"), - "provider": runtime.get("provider"), - "api_mode": runtime.get("api_mode"), - "command": runtime.get("command"), - "args": list(runtime.get("args") or []), - "credential_pool": runtime.get("credential_pool"), - }, - "label": f"smart route → {route.get('model')} ({runtime.get('provider')})", - "signature": ( - route.get("model"), - runtime.get("provider"), - runtime.get("base_url"), - runtime.get("api_mode"), - runtime.get("command"), - tuple(runtime.get("args") or ()), - ), - } diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py index 736c2dc35e..29c75b172a 100644 --- a/agent/usage_pricing.py +++ b/agent/usage_pricing.py @@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { source_url="https://ai.google.dev/pricing", pricing_version="google-pricing-2026-03-16", ), + # AWS Bedrock — pricing per the Bedrock pricing page. + # Bedrock charges the same per-token rates as the model provider but + # through AWS billing. These are the on-demand prices (no commitment). + # Source: https://aws.amazon.com/bedrock/pricing/ + ( + "bedrock", + "anthropic.claude-opus-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("15.00"), + output_cost_per_million=Decimal("75.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "anthropic.claude-sonnet-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "anthropic.claude-sonnet-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "anthropic.claude-haiku-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("0.80"), + output_cost_per_million=Decimal("4.00"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "amazon.nova-pro", + ): PricingEntry( + input_cost_per_million=Decimal("0.80"), + output_cost_per_million=Decimal("3.20"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "amazon.nova-lite", + ): PricingEntry( + input_cost_per_million=Decimal("0.06"), + output_cost_per_million=Decimal("0.24"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), + ( + "bedrock", + "amazon.nova-micro", + ): PricingEntry( + input_cost_per_million=Decimal("0.035"), + output_cost_per_million=Decimal("0.14"), + source="official_docs_snapshot", + source_url="https://aws.amazon.com/bedrock/pricing/", + pricing_version="bedrock-pricing-2026-04", + ), } diff --git a/batch_runner.py b/batch_runner.py index 195452c0ae..c8f275a14f 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -444,6 +444,7 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]: if not reasoning.get("has_any_reasoning", True): print(f" 🚫 Prompt {prompt_index} discarded (no reasoning in any turn)") discarded_no_reasoning += 1 + completed_in_batch.append(prompt_index) continue # Get and normalize tool stats for consistent schema across all entries @@ -561,7 +562,10 @@ class BatchRunner: provider_sort (str): Sort providers by price/throughput/latency (optional) max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set) reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking) - prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming) + prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming). + NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a trailing assistant-role prefill + (400 error). For those models use output_config.format or structured-output + schemas instead. Safe here for user-role priming and for older Claude / non-Claude models. max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set) """ self.dataset_file = Path(dataset_file) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 6574236793..6d8750a2d0 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -16,7 +16,7 @@ model: # "nous" - Nous Portal OAuth (requires: hermes login) # "nous-api" - Nous Portal API key (requires: NOUS_API_KEY) # "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY) - # "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex) + # "openai-codex" - OpenAI Codex (requires: hermes auth) # "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN) # "gemini" - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY) # "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY) @@ -24,8 +24,10 @@ model: # "minimax" - MiniMax global (requires: MINIMAX_API_KEY) # "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY) # "huggingface" - Hugging Face Inference (requires: HF_TOKEN) + # "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY) # "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY) # "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY) + # "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings) # "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY) # "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY) # @@ -37,12 +39,6 @@ model: # base_url: "http://localhost:1234/v1" # No API key needed — local servers typically ignore auth. # - # For Ollama Cloud (https://ollama.com/pricing): - # provider: "custom" - # base_url: "https://ollama.com/v1" - # Set OLLAMA_API_KEY in .env — automatically picked up when base_url - # points to ollama.com. - # # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var. provider: "auto" @@ -67,7 +63,38 @@ model: # Leave unset to use the model's native output ceiling (recommended). # Set only if you want to deliberately limit individual response length. # - # max_tokens: 8192 +# max_tokens: 8192 + +# Named provider overrides (optional) +# Use this for per-provider request timeouts, non-stream stale timeouts, +# and per-model exceptions. +# Applies to the primary turn client on every api_mode (OpenAI-wire, native +# Anthropic, and Anthropic-compatible providers), the fallback chain, and +# client rebuilds during credential rotation. For OpenAI-wire chat +# completions (streaming and non-streaming) the configured value is also +# used as the per-request ``timeout=`` kwarg so it wins over the legacy +# HERMES_API_TIMEOUT env var (which still applies when no config is set). +# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and +# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these +# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s, +# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s). +# +# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock +# SDK paths) — those use boto3 with its own timeout configuration. +# +# providers: +# ollama-local: +# request_timeout_seconds: 300 # Longer timeout for local cold-starts +# stale_timeout_seconds: 900 # Explicitly re-enable stale detection on local endpoints +# anthropic: +# request_timeout_seconds: 30 # Fast-fail cloud requests +# models: +# claude-opus-4.6: +# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls +# openai-codex: +# models: +# gpt-5.4: +# stale_timeout_seconds: 1800 # Longer non-stream stale timeout for slow large-context turns # ============================================================================= # OpenRouter Provider Routing (only applies when using OpenRouter) @@ -95,20 +122,6 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" -# ============================================================================= -# Smart Model Routing (optional) -# ============================================================================= -# Use a cheaper model for short/simple turns while keeping your main model for -# more complex requests. Disabled by default. -# -# smart_model_routing: -# enabled: true -# max_simple_chars: 160 -# max_simple_words: 28 -# cheap_model: -# provider: openrouter -# model: google/gemini-2.5-flash - # ============================================================================= # Git Worktree Isolation # ============================================================================= @@ -337,6 +350,7 @@ compression: # "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY) # "nous" - Force Nous Portal (requires: hermes login) # "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY) +# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY) # "codex" - Force Codex OAuth (requires: hermes model → Codex). # Uses gpt-5.3-codex which supports vision. # "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY). @@ -360,6 +374,18 @@ compression: # web_extract: # provider: "auto" # model: "" +# +# # Session search — summarizes matching past sessions +# session_search: +# provider: "auto" +# model: "" +# timeout: 30 +# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s +# extra_body: {} # Provider-specific OpenAI-compatible request fields +# # Example for providers that support request-body +# # reasoning controls: +# # extra_body: +# # enable_thinking: false # ============================================================================= # Persistent Memory @@ -564,6 +590,18 @@ platform_toolsets: homeassistant: [hermes-homeassistant] qqbot: [hermes-qqbot] +# ============================================================================= +# Gateway Platform Settings +# ============================================================================= +# Optional per-platform messaging settings. +# Platform-specific knobs live under `extra`. +# +# platforms: +# telegram: +# reply_to_mode: "first" # off | first | all +# extra: +# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages + # ───────────────────────────────────────────────────────────────────────────── # Available toolsets (use these names in platform_toolsets or the toolsets list) # diff --git a/cli.py b/cli.py index 970c98b060..7f93f07361 100644 --- a/cli.py +++ b/cli.py @@ -18,6 +18,8 @@ import os import shutil import sys import json +import re +import base64 import atexit import tempfile import time @@ -78,6 +80,76 @@ _project_env = Path(__file__).parent / '.env' load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) +_REASONING_TAGS = ( + "REASONING_SCRATCHPAD", + "think", + "thinking", + "reasoning", + "thought", +) + + +def _strip_reasoning_tags(text: str) -> str: + """Remove reasoning/thinking blocks from displayed text. + + Handles every case: + * Closed pairs ```` (case-insensitive, multi-line). + * Unterminated open tags that run to end-of-text (e.g. truncated + generations on NIM/MiniMax where the close tag is dropped). + * Stray orphan close tags (``stuffanswer``) left behind by + partial-content dumps. + + Covers the variants emitted by reasoning models today: ````, + ````, ````, ````, and + ```` (Gemma 4). Must stay in sync with + ``run_agent.py::_strip_think_blocks`` and the stream consumer's + ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples. + """ + cleaned = text + for tag in _REASONING_TAGS: + # Closed pair — case-insensitive so is handled too. + cleaned = re.sub( + rf"<{tag}>.*?\s*", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Unterminated open tag — strip from the tag to end of text. + cleaned = re.sub( + rf"<{tag}>.*$", + "", + cleaned, + flags=re.DOTALL | re.IGNORECASE, + ) + # Stray orphan close tag left behind by partial dumps. + cleaned = re.sub( + rf"\s*", + "", + cleaned, + flags=re.IGNORECASE, + ) + return cleaned.strip() + + +def _assistant_content_as_text(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [ + str(part.get("text", "")) + for part in content + if isinstance(part, dict) and part.get("type") == "text" + ] + return "\n".join(p for p in parts if p) + return str(content) + + +def _assistant_copy_text(content: Any) -> str: + return _strip_reasoning_tags(_assistant_content_as_text(content)) + + # ============================================================================= # Configuration Loading # ============================================================================= @@ -238,12 +310,6 @@ def load_cli_config() -> Dict[str, Any]: "enabled": True, # Auto-compress when approaching context limit "threshold": 0.50, # Compress at 50% of model's context limit }, - "smart_model_routing": { - "enabled": False, - "max_simple_chars": 160, - "max_simple_words": 28, - "cheap_model": {}, - }, "agent": { "max_turns": 90, # Default max tool-calling iterations (shared with subagents) "verbose": False, @@ -401,14 +467,27 @@ def load_cli_config() -> Dict[str, Any]: # filesystem is directly accessible. For ALL remote/container backends # (ssh, docker, modal, singularity), the host path doesn't exist on the # target -- remove the key so terminal_tool.py uses its per-backend default. - if terminal_config.get("cwd") in (".", "auto", "cwd"): - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] + # + # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the + # gateway's config bridge earlier in the process), don't clobber it. + # This prevents a lazy import of cli.py during gateway runtime from + # rewriting TERMINAL_CWD to the service's working directory. + # See issue #10817. + _CWD_PLACEHOLDERS = (".", "auto", "cwd") + if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + _existing_cwd = os.environ.get("TERMINAL_CWD", "") + if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): + # Gateway (or earlier startup) already resolved a real path — keep it + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd else: - # Remove so TERMINAL_CWD stays unset → tool picks backend default - terminal_config.pop("cwd", None) + effective_backend = terminal_config.get("env_type", "local") + if effective_backend == "local": + terminal_config["cwd"] = os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + else: + # Remove so TERMINAL_CWD stays unset → tool picks backend default + terminal_config.pop("cwd", None) env_mappings = { "env_type": "TERMINAL_ENV", @@ -989,6 +1068,7 @@ def _prune_orphaned_branches(repo_root: str) -> None: _ACCENT_ANSI_DEFAULT = "\033[1;38;2;255;215;0m" # True-color #FFD700 bold — fallback _BOLD = "\033[1m" _RST = "\033[0m" +_STREAM_PAD = " " # 4-space indent for streamed response text (matches Panel padding) def _hex_to_ansi(hex_color: str, *, bold: bool = False) -> str: @@ -1061,6 +1141,43 @@ def _rich_text_from_ansi(text: str) -> _RichText: return _RichText.from_ansi(text or "") +def _strip_markdown_syntax(text: str) -> str: + """Best-effort markdown marker removal for plain-text display.""" + import re + + plain = _rich_text_from_ansi(text or "").plain + plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE) + plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE) + # Preserve blockquotes, lists, and checkboxes because they carry structure. + plain = re.sub(r"(```+|~~~+)", "", plain) + plain = re.sub(r"`([^`]*)`", r"\1", plain) + plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain) + plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain) + plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain) + plain = re.sub(r"___([^_]+)___", r"\1", plain) + plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain) + plain = re.sub(r"__([^_]+)__", r"\1", plain) + plain = re.sub(r"\*([^*]+)\*", r"\1", plain) + plain = re.sub(r"_([^_]+)_", r"\1", plain) + plain = re.sub(r"~~([^~]+)~~", r"\1", plain) + plain = re.sub(r"\n{3,}", "\n\n", plain) + return plain.strip("\n") + + +def _render_final_assistant_content(text: str, mode: str = "render"): + """Render final assistant content as markdown, stripped text, or raw text.""" + from rich.markdown import Markdown + + normalized_mode = str(mode or "render").strip().lower() + if normalized_mode == "strip": + return _RichText(_strip_markdown_syntax(text)) + if normalized_mode == "raw": + return _rich_text_from_ansi(text or "") + + plain = _rich_text_from_ansi(text or "").plain + return Markdown(plain) + + def _cprint(text: str): """Print ANSI-colored text through prompt_toolkit's native renderer. @@ -1158,6 +1275,10 @@ def _resolve_attachment_path(raw_path: str) -> Path | None: return None expanded = os.path.expandvars(os.path.expanduser(token)) + if os.name != "nt": + normalized = expanded.replace("\\", "/") + if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha(): + expanded = f"/mnt/{normalized[0].lower()}/{normalized[3:]}" path = Path(expanded) if not path.is_absolute(): base_dir = Path(os.getenv("TERMINAL_CWD", os.getcwd())) @@ -1240,10 +1361,12 @@ def _detect_file_drop(user_input: str) -> "dict | None": or stripped.startswith("~") or stripped.startswith("./") or stripped.startswith("../") + or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha()) or stripped.startswith('"/') or stripped.startswith('"~') or stripped.startswith("'/") or stripped.startswith("'~") + or (len(stripped) >= 4 and stripped[0] in ("'", '"') and stripped[2] == ":" and stripped[3] in ("\\", "/") and stripped[1].isalpha()) ) if not starts_like_path: return None @@ -1632,10 +1755,30 @@ class HermesCLI: # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml) self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False) + self.final_response_markdown = str( + CLI_CONFIG["display"].get("final_response_markdown", "strip") + ).strip().lower() or "strip" + if self.final_response_markdown not in {"render", "strip", "raw"}: + self.final_response_markdown = "strip" # Inline diff previews for write actions (display.inline_diffs in config.yaml) self._inline_diffs_enabled = CLI_CONFIG["display"].get("inline_diffs", True) + # Submitted multiline user-message preview (display.user_message_preview in config.yaml) + _ump = CLI_CONFIG["display"].get("user_message_preview", {}) + if not isinstance(_ump, dict): + _ump = {} + try: + _ump_first_lines = int(_ump.get("first_lines", 2)) + except (TypeError, ValueError): + _ump_first_lines = 2 + try: + _ump_last_lines = int(_ump.get("last_lines", 2)) + except (TypeError, ValueError): + _ump_last_lines = 2 + self.user_message_preview_first_lines = max(1, _ump_first_lines) + self.user_message_preview_last_lines = max(0, _ump_last_lines) + # Streaming display state self._stream_buf = "" # Partial line buffer for line-buffered rendering self._stream_started = False # True once first delta arrives @@ -1712,13 +1855,13 @@ class HermesCLI: # Parse and validate toolsets self.enabled_toolsets = toolsets if toolsets and "all" not in toolsets and "*" not in toolsets: - # Validate each toolset — MCP server names are added by - # _get_platform_tools() but aren't registered in TOOLSETS yet - # (that happens later in _sync_mcp_toolsets), so exclude them. + # Validate each toolset — MCP server names are resolved via + # live registry aliases (registered during discover_mcp_tools), + # but discovery hasn't run yet at this point, so exclude them. mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys()) invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names] if invalid: - self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") + self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") # Filesystem checkpoints: CLI flag > config cp_cfg = CLI_CONFIG.get("checkpoints", {}) @@ -1765,8 +1908,9 @@ class HermesCLI: fb = [fb] if fb.get("provider") and fb.get("model") else [] self._fallback_model = fb - # Optional cheap-vs-strong routing for simple turns - self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {} + # Signature of the currently-initialised agent's runtime. Used to + # rebuild the agent when provider / model / base_url changes across + # turns (e.g. after /model or credential rotation). self._active_agent_route_signature = None # Agent will be initialized on first use @@ -1777,6 +1921,10 @@ class HermesCLI: self.conversation_history: List[Dict[str, Any]] = [] self.session_start = datetime.now() self._resumed = False + # Per-prompt elapsed timer — started at the beginning of each chat turn, + # frozen when the agent thread completes, displayed in the status bar. + self._prompt_start_time: Optional[float] = None # time.time() when turn started + self._prompt_duration: float = 0.0 # frozen duration of last completed turn # Initialize SQLite session store early so /title works before first message self._session_db = None try: @@ -1875,6 +2023,44 @@ class HermesCLI: filled = round((safe_percent / 100) * width) return f"[{('█' * filled) + ('░' * max(0, width - filled))}]" + @staticmethod + def _format_prompt_elapsed(prompt_start_time: Optional[float], prompt_duration: float, live: bool = False) -> str: + """Format per-prompt elapsed time for the status bar. + + Always returns a string — shows 0s on fresh start before first turn. + Keeps seconds visible at all scales so it increments smoothly: + 59s → 1m → 1m 1s → ... → 1m 59s → 2m → 2m 1s → ... + 59m 59s → 1h → 1h 0m 1s → ... + 23h 59m 59s → 1d → 1d 0h 1m → ... + + Emoji prefix: ⏱ when turn is live, ⏲ when frozen or fresh start. + Uses width-1 (no variation selector) glyphs so the status bar stays + aligned in monospace terminals. + """ + if prompt_start_time is None and prompt_duration == 0.0: + return "⏲ 0s" + elapsed = time.time() - prompt_start_time if prompt_start_time is not None else prompt_duration + elapsed = max(0.0, elapsed) + + days = int(elapsed // 86400) + remaining = elapsed % 86400 + hours = int(remaining // 3600) + remaining = remaining % 3600 + minutes = int(remaining // 60) + seconds = int(remaining % 60) + + if days > 0: + time_str = f"{days}d {hours}h {minutes}m" + elif hours > 0: + time_str = f"{hours}h {minutes}m {seconds}s" if seconds else f"{hours}h {minutes}m" + elif minutes > 0: + time_str = f"{minutes}m {seconds}s" if seconds else f"{minutes}m" + else: + time_str = f"{int(elapsed)}s" + + emoji = "⏱" if live else "⏲" + return f"{emoji} {time_str}" + def _get_status_bar_snapshot(self) -> Dict[str, Any]: # Prefer the agent's model name — it updates on fallback. # self.model reflects the originally configured model and never @@ -1893,6 +2079,11 @@ class HermesCLI: "model_name": model_name, "model_short": model_short, "duration": format_duration_compact(elapsed_seconds), + "prompt_elapsed": self._format_prompt_elapsed( + getattr(self, "_prompt_start_time", None), + getattr(self, "_prompt_duration", 0.0), + live=getattr(self, "_prompt_start_time", None) is not None, + ), "context_tokens": 0, "context_length": None, "context_percent": None, @@ -2010,9 +2201,34 @@ class HermesCLI: def _spinner_widget_height(self, width: Optional[int] = None) -> int: """Return the visible height for the spinner/status text line above the status bar.""" - if not getattr(self, "_spinner_text", ""): + spinner_line = self._render_spinner_text() + if not spinner_line: return 0 - return 0 if self._use_minimal_tui_chrome(width=width) else 1 + if self._use_minimal_tui_chrome(width=width): + return 0 + width = width or self._get_tui_terminal_width() + if width and width > 10: + import math + text_width = self._status_bar_display_width(spinner_line) + return max(1, math.ceil(text_width / width)) + return 1 + + def _render_spinner_text(self) -> str: + """Return the live spinner/status text exactly as rendered in the TUI.""" + txt = getattr(self, "_spinner_text", "") + if not txt: + return "" + t0 = getattr(self, "_tool_start_time", 0) or 0 + if t0 > 0: + import time as _time + elapsed = _time.monotonic() - t0 + if elapsed >= 60: + _m, _s = int(elapsed // 60), int(elapsed % 60) + elapsed_str = f"{_m}m {_s}s" + else: + elapsed_str = f"{elapsed:.1f}s" + return f" {txt} ({elapsed_str})" + return f" {txt}" def _get_voice_status_fragments(self, width: Optional[int] = None): """Return the voice status bar fragments for the interactive TUI.""" @@ -2059,6 +2275,9 @@ class HermesCLI: parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label] parts.append(duration_label) + prompt_elapsed = snapshot.get("prompt_elapsed") + if prompt_elapsed: + parts.append(prompt_elapsed) return self._trim_status_bar_text(" │ ".join(parts), width) except Exception: return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -2117,8 +2336,13 @@ class HermesCLI: (bar_style, percent_label), ("class:status-bar-dim", " │ "), ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), ] + # Position 7: per-prompt elapsed timer (live or frozen) + prompt_elapsed = snapshot.get("prompt_elapsed") + if prompt_elapsed: + frags.append(("class:status-bar-dim", " │ ")) + frags.append(("class:status-bar-dim", prompt_elapsed)) + frags.append(("class:status-bar", " ")) total_width = sum(self._status_bar_display_width(text) for _, text in frags) if total_width > width: @@ -2144,7 +2368,7 @@ class HermesCLI: normalized_model = normalize_model_for_provider(current_model, resolved_provider) if normalized_model and normalized_model != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]" ) self.model = normalized_model @@ -2160,7 +2384,7 @@ class HermesCLI: canonical = normalize_copilot_model_id(current_model, api_key=self.api_key) if canonical and canonical != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Normalized Copilot model '{current_model}' to '{canonical}'.[/]" ) self.model = canonical @@ -2182,7 +2406,7 @@ class HermesCLI: canonical = normalize_opencode_model_id(resolved_provider, current_model) if canonical and canonical != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]" ) self.model = canonical @@ -2204,7 +2428,7 @@ class HermesCLI: if "/" in current_model: slug = current_model.split("/", 1)[1] if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; " f"using '{slug}' for OpenAI Codex.[/]" ) @@ -2337,6 +2561,61 @@ class HermesCLI: if flush_text: self._emit_reasoning_preview(flush_text) + def _format_submitted_user_message_preview(self, user_input: str) -> str: + """Format the submitted user-message scrollback preview.""" + lines = user_input.split("\n") + if len(lines) <= 1: + return f"[bold {_accent_hex()}]●[/] [bold]{_escape(user_input)}[/]" + + first_lines = int(getattr(self, "user_message_preview_first_lines", 2)) + last_lines = int(getattr(self, "user_message_preview_last_lines", 2)) + first_lines = max(1, first_lines) + last_lines = max(0, last_lines) + head = lines[:first_lines] + remaining_after_head = max(0, len(lines) - len(head)) + tail_count = min(last_lines, remaining_after_head) + tail = lines[-tail_count:] if tail_count else [] + + hidden_middle_count = len(lines) - len(head) - len(tail) + if hidden_middle_count < 0: + hidden_middle_count = 0 + tail = [] + + preview_lines = [ + f"[bold {_accent_hex()}]●[/] [bold]{_escape(head[0])}[/]" + ] + preview_lines.extend(f"[bold]{_escape(line)}[/]" for line in head[1:]) + + if hidden_middle_count > 0: + noun = "line" if hidden_middle_count == 1 else "lines" + preview_lines.append(f"[dim]... (+{hidden_middle_count} more {noun})[/]") + + preview_lines.extend(f"[bold]{_escape(line)}[/]" for line in tail) + return "\n".join(preview_lines) + + def _expand_paste_references(self, text: str | None) -> str: + """Expand [Pasted text #N -> file] placeholders into file contents.""" + if not isinstance(text, str) or "[Pasted text #" not in text: + return text or "" + import re as _re + + paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') + + def _expand_ref(match): + path = Path(match.group(1)) + return path.read_text(encoding="utf-8") if path.exists() else match.group(0) + + return paste_ref_re.sub(_expand_ref, text) + + def _print_user_message_preview(self, user_input: str) -> None: + """Render a user message using the normal chat scrollback style.""" + ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]") + text = str(user_input or "") + if "\n" in text: + ChatConsole().print(self._format_submitted_user_message_preview(text)) + else: + ChatConsole().print(f"[bold {_accent_hex()}]●[/] [bold]{_escape(text)}[/]") + def _stream_reasoning_delta(self, text: str) -> None: """Stream reasoning/thinking tokens into a dim box above the response. @@ -2580,7 +2859,9 @@ class HermesCLI: _tc = getattr(self, "_stream_text_ansi", "") while "\n" in self._stream_buf: line, self._stream_buf = self._stream_buf.split("\n", 1) - _cprint(f"{_tc}{line}{_RST}" if _tc else line) + if self.final_response_markdown == "strip": + line = _strip_markdown_syntax(line) + _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}") def _flush_stream(self) -> None: """Emit any remaining partial line from the stream buffer and close the box.""" @@ -2597,7 +2878,8 @@ class HermesCLI: if self._stream_buf: _tc = getattr(self, "_stream_text_ansi", "") - _cprint(f"{_tc}{self._stream_buf}{_RST}" if _tc else self._stream_buf) + line = _strip_markdown_syntax(self._stream_buf) if self.final_response_markdown == "strip" else self._stream_buf + _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}") self._stream_buf = "" # Close the response box @@ -2659,6 +2941,39 @@ class HermesCLI: self._command_status = "" self._invalidate(min_interval=0.0) + def _open_external_editor(self, buffer=None) -> bool: + """Open the active input buffer in an external editor.""" + app = getattr(self, "_app", None) + if not app: + _cprint(f"{_DIM}External editor is only available inside the interactive CLI.{_RST}") + return False + if self._command_running: + _cprint(f"{_DIM}Wait for the current command to finish before opening the editor.{_RST}") + return False + if self._sudo_state or self._secret_state or self._approval_state or self._clarify_state: + _cprint(f"{_DIM}Finish the active prompt before opening the editor.{_RST}") + return False + target_buffer = buffer or getattr(app, "current_buffer", None) + if target_buffer is None: + _cprint(f"{_DIM}No active input buffer is available for the external editor.{_RST}") + return False + try: + existing_text = getattr(target_buffer, "text", "") + expanded_text = self._expand_paste_references(existing_text) + if expanded_text != existing_text and hasattr(target_buffer, "text"): + self._skip_paste_collapse = True + target_buffer.text = expanded_text + if hasattr(target_buffer, "cursor_position"): + target_buffer.cursor_position = len(expanded_text) + # Set skip flag (again) so the text-change event fired when the + # editor closes does not re-collapse the returned content. + self._skip_paste_collapse = True + target_buffer.open_in_editor(validate_and_handle=False) + return True + except Exception as exc: + _cprint(f"{_DIM}Failed to open external editor: {exc}{_RST}") + return False + def _ensure_runtime_credentials(self) -> bool: """ Ensure runtime credentials are resolved before agent use. @@ -2766,24 +3081,36 @@ class HermesCLI: return True def _resolve_turn_agent_config(self, user_message: str) -> dict: - """Resolve model/runtime overrides for a single user turn.""" - from agent.smart_model_routing import resolve_turn_route + """Build the effective model/runtime config for a single user turn. + + Always uses the session's primary model/provider. If the user has + toggled `/fast` on and the current model supports Priority + Processing / Anthropic fast mode, attach `request_overrides` so the + API call is marked accordingly. + """ from hermes_cli.models import resolve_fast_mode_overrides - route = resolve_turn_route( - user_message, - self._smart_model_routing, - { - "model": self.model, - "api_key": self.api_key, - "base_url": self.base_url, - "provider": self.provider, - "api_mode": self.api_mode, - "command": self.acp_command, - "args": list(self.acp_args or []), - "credential_pool": getattr(self, "_credential_pool", None), - }, - ) + runtime = { + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + "command": self.acp_command, + "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), + } + route = { + "model": self.model, + "runtime": runtime, + "signature": ( + self.model, + runtime["provider"], + runtime["base_url"], + runtime["api_mode"], + runtime["command"], + tuple(runtime["args"]), + ), + } service_tier = getattr(self, "service_tier", None) if not service_tier: @@ -2791,13 +3118,13 @@ class HermesCLI: return route try: - overrides = resolve_fast_mode_overrides(route.get("model")) + overrides = resolve_fast_mode_overrides(route["model"]) except Exception: overrides = None route["request_overrides"] = overrides return route - def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool: + def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, request_overrides: dict | None = None) -> bool: """ Initialize the agent on first use. When resuming a session, restores conversation history from SQLite. @@ -2953,7 +3280,7 @@ class HermesCLI: use_compact = self.compact or term_width < 80 if use_compact: - self.console.print(_build_compact_banner()) + self._console_print(_build_compact_banner()) self._show_status() else: # Get tools for display @@ -2978,25 +3305,25 @@ class HermesCLI: # Warn about very low context lengths (common with local servers) if ctx_len and ctx_len <= 8192: - self.console.print() - self.console.print( + self._console_print() + self._console_print( f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — " f"this is likely too low for agent use with tools.[/]" ) - self.console.print( + self._console_print( "[dim] Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]" ) base_url = getattr(self, "base_url", "") or "" if "11434" in base_url or "ollama" in base_url.lower(): - self.console.print( + self._console_print( "[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]" ) elif "1234" in base_url: - self.console.print( + self._console_print( "[dim] LM Studio fix: Set context length in model settings → reload model[/]" ) else: - self.console.print( + self._console_print( "[dim] Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]" ) @@ -3005,20 +3332,20 @@ class HermesCLI: model_name = getattr(self, "model", "") or "" if is_nous_hermes_non_agentic(model_name): - self.console.print() - self.console.print( + self._console_print() + self._console_print( "[bold yellow]⚠ Nous Research Hermes 3 & 4 models are NOT agentic and are not " "designed for use with Hermes Agent.[/]" ) - self.console.print( + self._console_print( "[dim] They lack tool-calling capabilities required for agent workflows. " "Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]" ) - self.console.print( + self._console_print( "[dim] Switch with: /model sonnet or /model gpt5[/]" ) - self.console.print() + self._console_print() def _preload_resumed_session(self) -> bool: """Load a resumed session's history from the DB early (before first chat). @@ -3036,10 +3363,10 @@ class HermesCLI: session_meta = self._session_db.get_session(self.session_id) if not session_meta: - self.console.print( + self._console_print( f"[bold red]Session not found: {self.session_id}[/]" ) - self.console.print( + self._console_print( "[dim]Use a session ID from a previous CLI run " "(hermes sessions list).[/]" ) @@ -3054,7 +3381,7 @@ class HermesCLI: if session_meta.get("title"): title_part = f' "{session_meta["title"]}"' accent_color = _accent_hex() - self.console.print( + self._console_print( f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]" f"{title_part} " f"({msg_count} user message{'s' if msg_count != 1 else ''}, " @@ -3062,7 +3389,7 @@ class HermesCLI: ) else: accent_color = _accent_hex() - self.console.print( + self._console_print( f"[{accent_color}]Session {self.session_id} found but has no " f"messages. Starting fresh.[/]" ) @@ -3101,21 +3428,6 @@ class HermesCLI: MAX_ASST_LEN = 200 # truncate assistant text MAX_ASST_LINES = 3 # max lines of assistant text - def _strip_reasoning(text: str) -> str: - """Remove ... blocks - from displayed text (reasoning model internal thoughts).""" - import re - cleaned = re.sub( - r".*?\s*", - "", text, flags=re.DOTALL, - ) - # Also strip unclosed reasoning tags at the end - cleaned = re.sub( - r".*$", - "", cleaned, flags=re.DOTALL, - ) - return cleaned.strip() - # Collect displayable entries (skip system, tool-result messages) entries = [] # list of (role, display_text) _last_asst_idx = None # index of last assistant entry @@ -3147,7 +3459,7 @@ class HermesCLI: elif role == "assistant": text = "" if content is None else str(content) - text = _strip_reasoning(text) + text = _strip_reasoning_tags(text) parts = [] full_parts = [] # un-truncated version if text: @@ -3252,7 +3564,7 @@ class HermesCLI: padding=(0, 1), style=_history_text_c, ) - self.console.print(panel) + self._console_print(panel) def _try_attach_clipboard_image(self) -> bool: """Check clipboard for an image and attach it if found. @@ -3486,6 +3798,26 @@ class HermesCLI: killed = process_registry.kill_all() print(f" ✅ Stopped {killed} process(es).") + def _handle_agents_command(self): + """Handle /agents — show background processes and agent status.""" + from tools.process_registry import format_uptime_short, process_registry + + processes = process_registry.list_sessions() + running = [p for p in processes if p.get("status") == "running"] + finished = [p for p in processes if p.get("status") != "running"] + + _cprint(f" Running processes: {len(running)}") + for p in running: + cmd = p.get("command", "")[:80] + up = format_uptime_short(p.get("uptime_seconds", 0)) + _cprint(f" {p.get('session_id', '?')} · {up} · {cmd}") + + if finished: + _cprint(f" Recently finished: {len(finished)}") + + agent_running = getattr(self, "_agent_running", False) + _cprint(f" Agent: {'running' if agent_running else 'idle'}") + def _handle_paste_command(self): """Handle /paste — explicitly check clipboard for an image. @@ -3511,6 +3843,61 @@ class HermesCLI: else: _cprint(f" {_DIM}(._.) No image found in clipboard{_RST}") + def _write_osc52_clipboard(self, text: str) -> None: + """Copy *text* to terminal clipboard via OSC 52.""" + payload = base64.b64encode(text.encode("utf-8")).decode("ascii") + seq = f"\x1b]52;c;{payload}\x07" + out = getattr(self, "_app", None) + output = getattr(out, "output", None) if out else None + if output and hasattr(output, "write_raw"): + output.write_raw(seq) + output.flush() + return + if output and hasattr(output, "write"): + output.write(seq) + output.flush() + return + sys.stdout.write(seq) + sys.stdout.flush() + + def _handle_copy_command(self, cmd_original: str) -> None: + """Handle /copy [number] — copy assistant output to clipboard.""" + parts = cmd_original.split(maxsplit=1) + arg = parts[1].strip() if len(parts) > 1 else "" + + assistant = [m for m in self.conversation_history if m.get("role") == "assistant"] + if not assistant: + _cprint(" Nothing to copy yet.") + return + + if arg: + try: + idx = int(arg) - 1 + except ValueError: + _cprint(" Usage: /copy [number]") + return + if idx < 0 or idx >= len(assistant): + _cprint(f" Invalid response number. Use 1-{len(assistant)}.") + return + else: + idx = len(assistant) - 1 + while idx >= 0 and not _assistant_copy_text(assistant[idx].get("content")): + idx -= 1 + if idx < 0: + _cprint(" Nothing to copy in assistant responses yet.") + return + + text = _assistant_copy_text(assistant[idx].get("content")) + if not text: + _cprint(" Nothing to copy in that assistant response.") + return + + try: + self._write_osc52_clipboard(text) + _cprint(f" Copied assistant response #{idx + 1} to clipboard") + except Exception as e: + _cprint(f" Clipboard copy failed: {e}") + def _handle_image_command(self, cmd_original: str): """Handle /image — attach a local image file for the next prompt.""" raw_args = (cmd_original.split(None, 1)[1].strip() if " " in cmd_original else "") @@ -3613,14 +4000,14 @@ class HermesCLI: api_key_missing = [u for u in unavailable if u["missing_vars"]] if api_key_missing: - self.console.print() - self.console.print("[yellow]⚠️ Some tools disabled (missing API keys):[/]") + self._console_print() + self._console_print("[yellow]⚠️ Some tools disabled (missing API keys):[/]") for item in api_key_missing: tools_str = ", ".join(item["tools"][:2]) # Show first 2 tools if len(item["tools"]) > 2: tools_str += f", +{len(item['tools'])-2} more" - self.console.print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]") - self.console.print("[dim] Run 'hermes setup' to configure[/]") + self._console_print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]") + self._console_print("[dim] Run 'hermes setup' to configure[/]") except Exception: pass # Don't crash on import errors @@ -3647,7 +4034,7 @@ class HermesCLI: skin = get_active_skin() separator_color = skin.get_color("banner_dim", "#B8860B") accent_color = skin.get_color("ui_accent", "#FFBF00") - label_color = skin.get_color("ui_label", "#4dd0e1") + label_color = skin.get_color("ui_label", "#DAA520") except Exception: separator_color, accent_color, label_color = "#B8860B", "#FFBF00", "cyan" toolsets_info = "" @@ -3658,7 +4045,7 @@ class HermesCLI: if self._provider_source: provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]" - self.console.print( + self._console_print( f" {api_indicator} [{accent_color}]{model_short}[/] " f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]" f"{toolsets_info}{provider_info}" @@ -3715,7 +4102,7 @@ class HermesCLI: f"Tokens: {total_tokens:,}", f"Agent Running: {'Yes' if is_running else 'No'}", ]) - self.console.print("\n".join(lines), highlight=False, markup=False) + self._console_print("\n".join(lines), highlight=False, markup=False) def _fast_command_available(self) -> bool: try: @@ -3764,6 +4151,7 @@ class HermesCLI: _cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}") _cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}") + _cprint(f" {_DIM}Draft editor: Ctrl+G{_RST}") if _is_termux_environment(): _cprint(f" {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n") else: @@ -3896,23 +4284,14 @@ class HermesCLI: def _handle_profile_command(self): """Display active profile name and home directory.""" - from hermes_constants import get_hermes_home, display_hermes_home + from hermes_constants import display_hermes_home + from hermes_cli.profiles import get_active_profile_name - home = get_hermes_home() display = display_hermes_home() - - profiles_parent = Path.home() / ".hermes" / "profiles" - try: - rel = home.relative_to(profiles_parent) - profile_name = str(rel).split("/")[0] - except ValueError: - profile_name = None + profile_name = get_active_profile_name() print() - if profile_name: - print(f" Profile: {profile_name}") - else: - print(" Profile: default") + print(f" Profile: {profile_name}") print(f" Home: {display}") print() @@ -4099,6 +4478,8 @@ class HermesCLI: self.agent.flush_memories(self.conversation_history) except (Exception, KeyboardInterrupt): pass + # Trigger memory extraction on the old session before session_id rotates. + self.agent.commit_memory_session(self.conversation_history) self._notify_session_boundary("on_session_finalize") elif self.agent: # First session or empty history — still finalize the old session @@ -4497,6 +4878,34 @@ class HermesCLI: self._restore_modal_input_snapshot() self._invalidate(min_interval=0.0) + @staticmethod + def _compute_model_picker_viewport( + selected: int, + scroll_offset: int, + n: int, + term_rows: int, + reserved_below: int = 6, + panel_chrome: int = 6, + min_visible: int = 3, + ) -> tuple[int, int]: + """Resolve (scroll_offset, visible) for the /model picker viewport. + + ``reserved_below`` matches the approval / clarify panels — input area, + status bar, and separators below the panel. ``panel_chrome`` covers + this panel's own borders + blanks + hint row. The remaining rows hold + the scrollable list, with the offset slid to keep ``selected`` on screen. + """ + max_visible = max(min_visible, term_rows - reserved_below - panel_chrome) + if n <= max_visible: + return 0, n + visible = max_visible + if selected < scroll_offset: + scroll_offset = selected + elif selected >= scroll_offset + visible: + scroll_offset = selected - visible + 1 + scroll_offset = max(0, min(scroll_offset, n - visible)) + return scroll_offset, visible + def _apply_model_switch_result(self, result, persist_global: bool) -> None: if not result.success: _cprint(f" ✗ {result.error_message}") @@ -4587,16 +4996,19 @@ class HermesCLI: self._close_model_picker() return provider_data = providers[selected] - model_list = [] - try: - from hermes_cli.models import provider_model_ids - live = provider_model_ids(provider_data["slug"]) - if live: - model_list = live - except Exception: - pass + # Use the curated model list from list_authenticated_providers() + # (same lists as `hermes model` and gateway pickers). + # Only fall back to the live provider catalog when the curated + # list is empty (e.g. user-defined endpoints with no curated list). + model_list = provider_data.get("models", []) if not model_list: - model_list = provider_data.get("models", []) + try: + from hermes_cli.models import provider_model_ids + live = provider_model_ids(provider_data["slug"]) + if live: + model_list = live + except Exception: + pass state["stage"] = "model" state["provider_data"] = provider_data state["model_list"] = model_list @@ -4889,8 +5301,15 @@ class HermesCLI: print(" To change model or provider, use: hermes model") + def _output_console(self): + """Use prompt_toolkit-safe Rich rendering once the TUI is live.""" + if getattr(self, "_app", None): + return ChatConsole() + return self.console - + def _console_print(self, *args, **kwargs): + """Print through the active command-safe console.""" + self._output_console().print(*args, **kwargs) @staticmethod def _resolve_personality_prompt(value) -> str: @@ -4904,6 +5323,52 @@ class HermesCLI: return "\n".join(p for p in parts if p) return str(value) + def _handle_gquota_command(self, cmd_original: str) -> None: + """Show Google Gemini Code Assist quota usage for the current OAuth account.""" + try: + from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials + from agent.google_code_assist import retrieve_user_quota, CodeAssistError + except ImportError as exc: + self._console_print(f" [red]Gemini modules unavailable: {exc}[/]") + return + + try: + access_token = get_valid_access_token() + except GoogleOAuthError as exc: + self._console_print(f" [yellow]{exc}[/]") + self._console_print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.") + return + + creds = load_credentials() + project_id = (creds.project_id if creds else "") or "" + + try: + buckets = retrieve_user_quota(access_token, project_id=project_id) + except CodeAssistError as exc: + self._console_print(f" [red]Quota lookup failed:[/] {exc}") + return + + if not buckets: + self._console_print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]") + return + + # Sort for stable display, group by model + buckets.sort(key=lambda b: (b.model_id, b.token_type)) + self._console_print() + self._console_print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})") + self._console_print() + for b in buckets: + pct = max(0.0, min(1.0, b.remaining_fraction)) + width = 20 + filled = int(round(pct * width)) + bar = "▓" * filled + "░" * (width - filled) + pct_str = f"{int(pct * 100):3d}%" + header = b.model_id + if b.token_type: + header += f" [{b.token_type}]" + self._console_print(f" {header:40s} {bar} {pct_str}") + self._console_print() + def _handle_personality_command(self, cmd: str): """Handle the /personality command to set predefined personalities.""" parts = cmd.split(maxsplit=1) @@ -5033,7 +5498,7 @@ class HermesCLI: print(" /cron list") print(' /cron add "every 2h" "Check server status" [--skill blogwatcher]') print(' /cron edit --schedule "every 4h" --prompt "New task"') - print(" /cron edit --skill blogwatcher --skill find-nearby") + print(" /cron edit --skill blogwatcher --skill maps") print(" /cron edit --remove-skill blogwatcher") print(" /cron edit --clear-skills") print(" /cron pause ") @@ -5350,7 +5815,7 @@ class HermesCLI: _tip_color = get_active_skin().get_color("banner_dim", "#B8860B") except Exception: _tip_color = "#B8860B" - self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") + self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") except Exception: pass elif canonical == "history": @@ -5413,6 +5878,8 @@ class HermesCLI: self._handle_model_switch(cmd_original) elif canonical == "provider": self._show_model_and_providers() + elif canonical == "gquota": + self._handle_gquota_command(cmd_original) elif canonical == "personality": # Use original case (handler lowercases the personality name itself) @@ -5442,7 +5909,7 @@ class HermesCLI: elif canonical == "statusbar": self._status_bar_visible = not self._status_bar_visible state = "visible" if self._status_bar_visible else "hidden" - self.console.print(f" Status bar {state}") + self._console_print(f" Status bar {state}") elif canonical == "verbose": self._toggle_verbose() elif canonical == "yolo": @@ -5457,6 +5924,8 @@ class HermesCLI: self._show_usage() elif canonical == "insights": self._show_insights(cmd_original) + elif canonical == "copy": + self._handle_copy_command(cmd_original) elif canonical == "debug": self._handle_debug_command() elif canonical == "paste": @@ -5487,7 +5956,8 @@ class HermesCLI: version = f" v{p['version']}" if p["version"] else "" tools = f"{p['tools']} tools" if p["tools"] else "" hooks = f"{p['hooks']} hooks" if p["hooks"] else "" - parts = [x for x in [tools, hooks] if x] + commands = f"{p['commands']} commands" if p.get("commands") else "" + parts = [x for x in [tools, hooks, commands] if x] detail = f" ({', '.join(parts)})" if parts else "" error = f" — {p['error']}" if p["error"] else "" print(f" {status} {p['name']}{version}{detail}{error}") @@ -5499,6 +5969,8 @@ class HermesCLI: self._handle_snapshot_command(cmd_original) elif canonical == "stop": self._handle_stop_command() + elif canonical == "agents": + self._handle_agents_command() elif canonical == "background": self._handle_background_command(cmd_original) elif canonical == "btw": @@ -5515,6 +5987,30 @@ class HermesCLI: _cprint(f" Queued for the next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}") else: _cprint(f" Queued: {payload[:80]}{'...' if len(payload) > 80 else ''}") + elif canonical == "steer": + # Inject a message after the next tool call without interrupting. + # If the agent is actively running, push the text into the agent's + # pending_steer slot — the drain hook in _execute_tool_calls_* + # will append it to the next tool result's content. If no agent + # is running, fall back to queue semantics (same as /queue). + parts = cmd_original.split(None, 1) + payload = parts[1].strip() if len(parts) > 1 else "" + if not payload: + _cprint(" Usage: /steer ") + elif self._agent_running and self.agent is not None and hasattr(self.agent, "steer"): + try: + accepted = self.agent.steer(payload) + except Exception as exc: + _cprint(f" Steer failed: {exc}") + else: + if accepted: + _cprint(f" ⏩ Steer queued — arrives after the next tool call: {payload[:80]}{'...' if len(payload) > 80 else ''}") + else: + _cprint(" Steer rejected (empty payload).") + else: + # No active run — treat as a normal next-turn message. + self._pending_input.put(payload) + _cprint(f" No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}") elif canonical == "skin": self._handle_skin_command(cmd_original) elif canonical == "voice": @@ -5536,15 +6032,15 @@ class HermesCLI: ) output = result.stdout.strip() or result.stderr.strip() if output: - self.console.print(_rich_text_from_ansi(output)) + self._console_print(_rich_text_from_ansi(output)) else: - self.console.print("[dim]Command returned no output[/]") + self._console_print("[dim]Command returned no output[/]") except subprocess.TimeoutExpired: - self.console.print("[bold red]Quick command timed out (30s)[/]") + self._console_print("[bold red]Quick command timed out (30s)[/]") except Exception as e: - self.console.print(f"[bold red]Quick command error: {e}[/]") + self._console_print(f"[bold red]Quick command error: {e}[/]") else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]") elif qcmd.get("type") == "alias": target = qcmd.get("target", "").strip() if target: @@ -5553,9 +6049,9 @@ class HermesCLI: aliased_command = f"{target} {user_args}".strip() return self.process_command(aliased_command) else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]") else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]") # Check for plugin-registered slash commands elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names(): from hermes_cli.plugins import get_plugin_command_handler @@ -5755,13 +6251,13 @@ class HermesCLI: _chat_console = ChatConsole() _chat_console.print(Panel( - _rich_text_from_ansi(response), + _render_final_assistant_content(response, mode=self.final_response_markdown), title=f"[{_resp_color} bold]{label} (background #{task_num})[/]", title_align="left", border_style=_resp_color, style=_resp_text, box=rich_box.HORIZONTALS, - padding=(1, 2), + padding=(1, 4), )) else: _cprint(" (No response generated)") @@ -5880,12 +6376,12 @@ class HermesCLI: _resp_color = "#4F6D4A" ChatConsole().print(Panel( - _rich_text_from_ansi(response), + _render_final_assistant_content(response, mode=self.final_response_markdown), title=f"[{_resp_color} bold]⚕ /btw[/]", title_align="left", border_style=_resp_color, box=rich_box.HORIZONTALS, - padding=(1, 2), + padding=(1, 4), )) else: _cprint(" 💬 /btw: (no response)") @@ -5952,7 +6448,7 @@ class HermesCLI: parts = cmd.strip().split(None, 1) sub = parts[1].lower().strip() if len(parts) > 1 else "status" - _DEFAULT_CDP = "http://localhost:9222" + _DEFAULT_CDP = "http://127.0.0.1:9222" current = os.environ.get("BROWSER_CDP_URL", "").strip() if sub.startswith("connect"): @@ -6199,13 +6695,21 @@ class HermesCLI: def _toggle_yolo(self): """Toggle YOLO mode — skip all dangerous command approval prompts.""" import os + from hermes_cli.colors import Colors as _Colors + current = bool(os.environ.get("HERMES_YOLO_MODE")) if current: os.environ.pop("HERMES_YOLO_MODE", None) - self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.") + _cprint( + f" ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}" + " — dangerous commands will require approval." + ) else: os.environ["HERMES_YOLO_MODE"] = "1" - self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.") + _cprint( + f" ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}" + " — all commands auto-approved. Use with caution." + ) def _handle_reasoning_command(self, cmd: str): """Handle /reasoning — manage effort level and display toggle. @@ -6364,6 +6868,18 @@ class HermesCLI: focus_topic=focus_topic or None, ) self.conversation_history = compressed + # _compress_context ends the old session and creates a new child + # session on the agent (run_agent.py::_compress_context). Sync the + # CLI's session_id so /status, /resume, exit summary, and title + # generation all point at the live continuation session, not the + # ended parent. Without this, subsequent end_session() calls target + # the already-closed parent and the child is orphaned. + if ( + getattr(self.agent, "session_id", None) + and self.agent.session_id != self.session_id + ): + self.session_id = self.agent.session_id + self._pending_title = None new_tokens = estimate_messages_tokens_rough(self.conversation_history) summary = summarize_manual_compression( original_history, @@ -6804,8 +7320,7 @@ class HermesCLI: ) raise RuntimeError( "Voice mode requires sounddevice and numpy.\n" - "Install with: pip install sounddevice numpy\n" - "Or: pip install hermes-agent[voice]" + f"Install with: {sys.executable} -m pip install sounddevice numpy" ) if not reqs.get("stt_available", reqs.get("stt_key_set")): raise RuntimeError( @@ -7081,8 +7596,7 @@ class HermesCLI: _cprint(f" {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}") _cprint(f" {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}") else: - _cprint(f"\n {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}") - _cprint(f" {_DIM}Or: pip install hermes-agent[voice]{_RST}") + _cprint(f"\n {_BOLD}Install: {sys.executable} -m pip install {' '.join(reqs['missing_packages'])}{_RST}") return with self._voice_lock: @@ -7382,7 +7896,15 @@ class HermesCLI: self._invalidate() def _get_approval_display_fragments(self): - """Render the dangerous-command approval panel for the prompt_toolkit UI.""" + """Render the dangerous-command approval panel for the prompt_toolkit UI. + + Layout priority: title + command + choices must always render, even if + the terminal is short or the description is long. Description is placed + at the bottom of the panel and gets truncated to fit the remaining row + budget. This prevents HSplit from clipping approve/deny off-screen when + tirith findings produce multi-paragraph descriptions or when the user + runs in a compact terminal pane. + """ state = self._approval_state if not state: return [] @@ -7441,22 +7963,89 @@ class HermesCLI: box_width = _panel_box_width(title, preview_lines) inner_text_width = max(8, box_width - 2) + # Pre-wrap the mandatory content — command + choices must always render. + cmd_wrapped = _wrap_panel_text(cmd_display, inner_text_width) + + # (choice_index, wrapped_line) so we can re-apply selected styling below + choice_wrapped: list[tuple[int, str]] = [] + for i, choice in enumerate(choices): + label = choice_labels.get(choice, choice) + prefix = '❯ ' if i == selected else ' ' + for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): + choice_wrapped.append((i, wrapped)) + + # Budget vertical space so HSplit never clips the command or choices. + # Panel chrome (full layout with separators): + # top border + title + blank_after_title + # + blank_between_cmd_choices + bottom border = 5 rows. + # In tight terminals we collapse to: + # top border + title + bottom border = 3 rows (no blanks). + # + # reserved_below: rows consumed below the approval panel by the + # spinner/tool-progress line, status bar, input area, separators, and + # prompt symbol. Measured at ~6 rows during live PTY approval prompts; + # budget 6 so we don't overestimate the panel's room. + term_rows = shutil.get_terminal_size((100, 24)).lines + chrome_full = 5 + chrome_tight = 3 + reserved_below = 6 + + available = max(0, term_rows - reserved_below) + mandatory_full = chrome_full + len(cmd_wrapped) + len(choice_wrapped) + + # If the full-chrome panel doesn't fit, drop the separator blanks. + # This keeps the command and every choice on-screen in compact terminals. + use_compact_chrome = mandatory_full > available + chrome_rows = chrome_tight if use_compact_chrome else chrome_full + + # If the command itself is too long to leave room for choices (e.g. user + # hit "view" on a multi-hundred-character command), truncate it so the + # approve/deny buttons still render. Keep at least 1 row of command. + max_cmd_rows = max(1, available - chrome_rows - len(choice_wrapped)) + if len(cmd_wrapped) > max_cmd_rows: + keep = max(1, max_cmd_rows - 1) if max_cmd_rows > 1 else 1 + cmd_wrapped = cmd_wrapped[:keep] + ["… (command truncated — use /logs or /debug for full text)"] + + # Allocate any remaining rows to description. The extra -1 in full mode + # accounts for the blank separator between choices and description. + mandatory_no_desc = chrome_rows + len(cmd_wrapped) + len(choice_wrapped) + desc_sep_cost = 0 if use_compact_chrome else 1 + available_for_desc = available - mandatory_no_desc - desc_sep_cost + # Even on huge terminals, cap description height so the panel stays compact. + available_for_desc = max(0, min(available_for_desc, 10)) + + desc_wrapped = _wrap_panel_text(description, inner_text_width) if description else [] + if available_for_desc < 1 or not desc_wrapped: + desc_wrapped = [] + elif len(desc_wrapped) > available_for_desc: + keep = max(1, available_for_desc - 1) + desc_wrapped = desc_wrapped[:keep] + ["… (description truncated)"] + + # Render: title → command → choices → description (description last so + # any remaining overflow clips from the bottom of the least-critical + # content, never from the command or choices). Use compact chrome (no + # blank separators) when the terminal is tight. lines = [] lines.append(('class:approval-border', '╭' + ('─' * box_width) + '╮\n')) _append_panel_line(lines, 'class:approval-border', 'class:approval-title', title, box_width) - _append_blank_panel_line(lines, 'class:approval-border', box_width) - for wrapped in _wrap_panel_text(description, inner_text_width): - _append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width) - for wrapped in _wrap_panel_text(cmd_display, inner_text_width): + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:approval-border', box_width) + + for wrapped in cmd_wrapped: _append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width) - _append_blank_panel_line(lines, 'class:approval-border', box_width) - for i, choice in enumerate(choices): - label = choice_labels.get(choice, choice) + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:approval-border', box_width) + + for i, wrapped in choice_wrapped: style = 'class:approval-selected' if i == selected else 'class:approval-choice' - prefix = '❯ ' if i == selected else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): - _append_panel_line(lines, 'class:approval-border', style, wrapped, box_width) - _append_blank_panel_line(lines, 'class:approval-border', box_width) + _append_panel_line(lines, 'class:approval-border', style, wrapped, box_width) + + if desc_wrapped: + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:approval-border', box_width) + for wrapped in desc_wrapped: + _append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width) + lines.append(('class:approval-border', '╰' + ('─' * box_width) + '╯\n')) return lines @@ -7545,7 +8134,6 @@ class HermesCLI: if not self._init_agent( model_override=turn_route["model"], runtime_override=turn_route["runtime"], - route_label=turn_route["label"], request_overrides=turn_route.get("request_overrides"), ): return None @@ -7648,7 +8236,7 @@ class HermesCLI: label = " ⚕ Hermes " fill = w - 2 - len(label) _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}") - _cprint(sentence.rstrip()) + _cprint(f"{_STREAM_PAD}{sentence.rstrip()}") tts_thread = threading.Thread( target=stream_tts_to_speaker, @@ -7703,6 +8291,10 @@ class HermesCLI: # Start agent in background thread (daemon so it cannot keep the # process alive when the user closes the terminal tab — SIGHUP # exits the main thread and daemon threads are reaped automatically). + # Start per-prompt elapsed timer — frozen after the agent thread + # finishes; reset on the next turn. + self._prompt_start_time = time.time() + self._prompt_duration = 0.0 agent_thread = threading.Thread(target=run_agent, daemon=True) agent_thread.start() @@ -7752,7 +8344,39 @@ class HermesCLI: # Fallback for non-interactive mode (e.g., single-query) agent_thread.join(0.1) - agent_thread.join() # Ensure agent thread completes + # Wait for the agent thread to finish. After an interrupt the + # agent may take a few seconds to clean up (kill subprocess, persist + # session). Poll instead of a blocking join so the process_loop + # stays responsive — if the user sent another interrupt or the + # agent gets stuck, we can break out instead of freezing forever. + if interrupt_msg is not None: + # Interrupt path: poll briefly, then move on. The agent + # thread is daemon — it dies on process exit regardless. + for _wait_tick in range(50): # 50 * 0.2s = 10s max + agent_thread.join(timeout=0.2) + if not agent_thread.is_alive(): + break + # Check if user fired ANOTHER interrupt (Ctrl+C sets + # _should_exit which process_loop checks on next pass). + if getattr(self, '_should_exit', False): + break + if agent_thread.is_alive(): + logger.warning( + "Agent thread still alive after interrupt " + "(thread %s). Daemon thread will be cleaned up " + "on exit.", + agent_thread.ident, + ) + else: + # Normal completion: agent thread should be done already, + # but guard against edge cases. + agent_thread.join(timeout=30) + + # Freeze per-prompt elapsed timer once the agent thread has + # exited (or been abandoned as a daemon after interrupt). + if self._prompt_start_time is not None: + self._prompt_duration = max(0.0, time.time() - self._prompt_start_time) + self._prompt_start_time = None # Proactively clean up async clients whose event loop is dead. # The agent thread may have created AsyncOpenAI clients bound @@ -7784,6 +8408,20 @@ class HermesCLI: # Update history with full conversation self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history + # If auto-compression fired mid-turn, the agent created a new + # continuation session and mutated self.agent.session_id. Sync + # the CLI's session_id so /status, /resume, title generation, + # and the exit summary all target the live child session rather + # than the ended parent. Mirrors the gateway's post-run sync + # (gateway/run.py around line 9983). + if ( + self.agent + and getattr(self.agent, "session_id", None) + and self.agent.session_id != self.session_id + ): + self.session_id = self.agent.session_id + self._pending_title = None + # Get the final response response = result.get("final_response", "") if result else "" @@ -7873,13 +8511,13 @@ class HermesCLI: else: _chat_console = ChatConsole() _chat_console.print(Panel( - _rich_text_from_ansi(response), + _render_final_assistant_content(response, mode=self.final_response_markdown), title=f"[{_resp_color} bold]{label}[/]", title_align="left", border_style=_resp_color, style=_resp_text, box=rich_box.HORIZONTALS, - padding=(1, 2), + padding=(1, 4), )) @@ -7932,7 +8570,15 @@ class HermesCLI: else: print(f"\n⚡ Sending after interrupt: '{preview}'") self._pending_input.put(combined) - + + # If a /steer was left over (agent finished before another tool + # batch could absorb it), deliver it as the next user turn. + _leftover_steer = result.get("pending_steer") if result else None + if _leftover_steer and hasattr(self, '_pending_input'): + preview = _leftover_steer[:60] + ("..." if len(_leftover_steer) > 60 else "") + print(f"\n⏩ Delivering leftover /steer as next turn: '{preview}'") + self._pending_input.put(_leftover_steer) + return response except Exception as e: @@ -8210,7 +8856,7 @@ class HermesCLI: except Exception: _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands." _welcome_color = "#FFF8DC" - self.console.print(f"[{_welcome_color}]{_welcome_text}[/]") + self._console_print(f"[{_welcome_color}]{_welcome_text}[/]") # Show a random tip to help users discover features try: from hermes_cli.tips import get_random_tip @@ -8219,16 +8865,16 @@ class HermesCLI: _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B") except Exception: _tip_color = "#B8860B" - self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") + self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") except Exception: pass # Tips are non-critical — never break startup if self.preloaded_skills and not self._startup_skills_line_shown: skills_label = ", ".join(self.preloaded_skills) - self.console.print( + self._console_print( f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}" ) self._startup_skills_line_shown = True - self.console.print() + self._console_print() # State for async operation self._agent_running = False @@ -8350,6 +8996,7 @@ class HermesCLI: # --- /model picker modal --- if self._model_picker_state: self._handle_model_picker_selection() + event.app.current_buffer.reset() event.app.invalidate() return @@ -8430,6 +9077,16 @@ class HermesCLI: """Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter.""" event.current_buffer.insert_text('\n') + @kb.add( + 'c-g', + filter=Condition( + lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state + ), + ) + def handle_open_in_editor(event): + """Ctrl+G opens the current draft in an external editor.""" + cli_ref._open_external_editor(event.current_buffer) + @kb.add('tab', eager=True) def handle_tab(event): """Tab: accept completion, auto-suggestion, or start completions. @@ -8515,6 +9172,13 @@ class HermesCLI: state["selected"] = min(max_idx, state.get("selected", 0) + 1) event.app.invalidate() + @kb.add('escape', filter=Condition(lambda: bool(self._model_picker_state)), eager=True) + def model_picker_escape(event): + """ESC closes the /model picker.""" + self._close_model_picker() + event.app.current_buffer.reset() + event.app.invalidate() + # --- History navigation: up/down browse history in normal input mode --- # The TextArea is multiline, so by default up/down only move the cursor. # Buffer.auto_up/auto_down handle both: cursor movement when multi-line, @@ -8631,6 +9295,24 @@ class HermesCLI: self._should_exit = True event.app.exit() + _modal_prompt_active = Condition( + lambda: bool(self._secret_state or self._sudo_state) + ) + + @kb.add('escape', filter=_modal_prompt_active, eager=True) + def handle_escape_modal(event): + """ESC cancels active secret/sudo prompts.""" + if self._secret_state: + self._cancel_secret_capture() + event.app.current_buffer.reset() + event.app.invalidate() + return + if self._sudo_state: + self._sudo_state["response_queue"].put("") + self._sudo_state = None + event.app.invalidate() + return + @kb.add('c-z') def handle_ctrl_z(event): """Handle Ctrl+Z - suspend process to background (Unix only).""" @@ -8856,6 +9538,7 @@ class HermesCLI: _prev_text_len = [0] _prev_newline_count = [0] _paste_just_collapsed = [False] + self._skip_paste_collapse = False def _on_text_changed(buf): """Detect large pastes and collapse them to a file reference. @@ -8875,8 +9558,9 @@ class HermesCLI: text = buf.text chars_added = len(text) - _prev_text_len[0] _prev_text_len[0] = len(text) - if _paste_just_collapsed[0]: + if _paste_just_collapsed[0] or self._skip_paste_collapse: _paste_just_collapsed[0] = False + self._skip_paste_collapse = False _prev_newline_count[0] = text.count('\n') return line_count = text.count('\n') @@ -8885,12 +9569,10 @@ class HermesCLI: is_paste = chars_added > 1 or newlines_added >= 4 if line_count >= 5 and is_paste and not text.startswith('/'): _paste_counter[0] += 1 - # Save to temp file paste_dir = _hermes_home / "pastes" paste_dir.mkdir(parents=True, exist_ok=True) paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt" paste_file.write_text(text, encoding="utf-8") - # Replace buffer with compact reference _paste_just_collapsed[0] = True buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]" buf.cursor_position = len(buf.text) @@ -8928,9 +9610,9 @@ class HermesCLI: if cli_ref._voice_processing: return "transcribing..." if cli_ref._sudo_state: - return "type password (hidden), Enter to skip" + return "type password (hidden), Enter to submit · ESC to skip" if cli_ref._secret_state: - return "type secret (hidden), Enter to skip" + return "type secret (hidden), Enter to submit · ESC to skip" if cli_ref._approval_state: return "" if cli_ref._clarify_freetext: @@ -9005,21 +9687,10 @@ class HermesCLI: return cli_ref._agent_spacer_height() def get_spinner_text(): - txt = cli_ref._spinner_text - if not txt: + spinner_line = cli_ref._render_spinner_text() + if not spinner_line: return [] - # Append live elapsed timer when a tool is running - t0 = cli_ref._tool_start_time - if t0 > 0: - import time as _time - elapsed = _time.monotonic() - t0 - if elapsed >= 60: - _m, _s = int(elapsed // 60), int(elapsed % 60) - elapsed_str = f"{_m}m {_s}s" - else: - elapsed_str = f"{elapsed:.1f}s" - return [('class:hint', f' {txt} ({elapsed_str})')] - return [('class:hint', f' {txt}')] + return [('class:hint', spinner_line)] def get_spinner_height(): return cli_ref._spinner_widget_height() @@ -9027,6 +9698,7 @@ class HermesCLI: spinner_widget = Window( content=FormattedTextControl(get_spinner_text), height=get_spinner_height, + wrap_lines=True, ) spacer = Window( @@ -9063,7 +9735,13 @@ class HermesCLI: lines.append((border_style, "│" + (" " * box_width) + "│\n")) def _get_clarify_display(): - """Build styled text for the clarify question/choices panel.""" + """Build styled text for the clarify question/choices panel. + + Layout priority: choices + Other option must always render even if + the question is very long. The question is budgeted to leave enough + rows for the choices and trailing chrome; anything over the budget + is truncated with a marker. + """ state = cli_ref._clarify_state if not state: return [] @@ -9084,48 +9762,97 @@ class HermesCLI: box_width = _panel_box_width("Hermes needs your input", preview_lines) inner_text_width = max(8, box_width - 2) + # Pre-wrap choices + Other option — these are mandatory. + choice_wrapped: list[tuple[int, str]] = [] + if choices: + for i, choice in enumerate(choices): + prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' ' + for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): + choice_wrapped.append((i, wrapped)) + # Trailing Other row(s) + other_idx = len(choices) + if selected == other_idx and not cli_ref._clarify_freetext: + other_label_mand = '❯ Other (type your answer)' + elif cli_ref._clarify_freetext: + other_label_mand = '❯ Other (type below)' + else: + other_label_mand = ' Other (type your answer)' + other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") + elif cli_ref._clarify_freetext: + # Freetext-only mode: the guidance line takes the place of choices. + other_wrapped = _wrap_panel_text( + "Type your answer in the prompt below, then press Enter.", + inner_text_width, + ) + else: + other_wrapped = [] + + # Budget the question so mandatory rows always render. + # Chrome layouts: + # full : top border + blank_after_title + blank_after_question + # + blank_before_bottom + bottom border = 5 rows + # tight: top border + bottom border = 2 rows (drop all blanks) + # + # reserved_below matches the approval-panel budget (~6 rows for + # spinner/tool-progress + status + input + separators + prompt). + term_rows = shutil.get_terminal_size((100, 24)).lines + chrome_full = 5 + chrome_tight = 2 + reserved_below = 6 + + available = max(0, term_rows - reserved_below) + mandatory_full = chrome_full + len(choice_wrapped) + len(other_wrapped) + + use_compact_chrome = mandatory_full > available + chrome_rows = chrome_tight if use_compact_chrome else chrome_full + + max_question_rows = max(1, available - chrome_rows - len(choice_wrapped) - len(other_wrapped)) + max_question_rows = min(max_question_rows, 12) # soft cap on huge terminals + + question_wrapped = _wrap_panel_text(question, inner_text_width) + if len(question_wrapped) > max_question_rows: + keep = max(1, max_question_rows - 1) + question_wrapped = question_wrapped[:keep] + ["… (question truncated)"] + lines = [] # Box top border lines.append(('class:clarify-border', '╭─ ')) lines.append(('class:clarify-title', 'Hermes needs your input')) lines.append(('class:clarify-border', ' ' + ('─' * max(0, box_width - len("Hermes needs your input") - 3)) + '╮\n')) - _append_blank_panel_line(lines, 'class:clarify-border', box_width) + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:clarify-border', box_width) - # Question text - for wrapped in _wrap_panel_text(question, inner_text_width): + # Question text (bounded) + for wrapped in question_wrapped: _append_panel_line(lines, 'class:clarify-border', 'class:clarify-question', wrapped, box_width) - _append_blank_panel_line(lines, 'class:clarify-border', box_width) + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:clarify-border', box_width) if cli_ref._clarify_freetext and not choices: - guidance = "Type your answer in the prompt below, then press Enter." - for wrapped in _wrap_panel_text(guidance, inner_text_width): + for wrapped in other_wrapped: _append_panel_line(lines, 'class:clarify-border', 'class:clarify-choice', wrapped, box_width) - _append_blank_panel_line(lines, 'class:clarify-border', box_width) + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:clarify-border', box_width) if choices: # Multiple-choice mode: show selectable options - for i, choice in enumerate(choices): + for i, wrapped in choice_wrapped: style = 'class:clarify-selected' if i == selected and not cli_ref._clarify_freetext else 'class:clarify-choice' - prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' ' - wrapped_lines = _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" ") - for wrapped in wrapped_lines: - _append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width) + _append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width) - # "Other" option (5th line, only shown when choices exist) + # "Other" option (trailing row(s), only shown when choices exist) other_idx = len(choices) if selected == other_idx and not cli_ref._clarify_freetext: other_style = 'class:clarify-selected' - other_label = '❯ Other (type your answer)' elif cli_ref._clarify_freetext: other_style = 'class:clarify-active-other' - other_label = '❯ Other (type below)' else: other_style = 'class:clarify-choice' - other_label = ' Other (type your answer)' - for wrapped in _wrap_panel_text(other_label, inner_text_width, subsequent_indent=" "): + for wrapped in other_wrapped: _append_panel_line(lines, 'class:clarify-border', other_style, wrapped, box_width) - _append_blank_panel_line(lines, 'class:clarify-border', box_width) + if not use_compact_chrome: + _append_blank_panel_line(lines, 'class:clarify-border', box_width) lines.append(('class:clarify-border', '╰' + ('─' * box_width) + '╯\n')) return lines @@ -9173,7 +9900,7 @@ class HermesCLI: prompt = state.get("prompt") or f"Enter value for {state.get('var_name', 'secret')}" metadata = state.get("metadata") or {} help_text = metadata.get("help") - body = 'Enter secret below (hidden), or press Enter to skip' + body = 'Enter secret below (hidden), ESC or Ctrl+C to skip' content_lines = [prompt, body] if help_text: content_lines.insert(1, str(help_text)) @@ -9242,6 +9969,22 @@ class HermesCLI: box_width = _panel_box_width(title, [hint] + choices, min_width=46, max_width=84) inner_text_width = max(8, box_width - 6) + selected = state.get("selected", 0) + + # Scrolling viewport: the panel renders into a Window with no max + # height, so without limiting visible items the bottom border and + # any items past the available terminal rows get clipped on long + # provider catalogs (e.g. Ollama Cloud's 36+ models). + try: + from prompt_toolkit.application import get_app + term_rows = get_app().output.get_size().rows + except Exception: + term_rows = shutil.get_terminal_size((100, 24)).lines + scroll_offset, visible = HermesCLI._compute_model_picker_viewport( + selected, state.get("_scroll_offset", 0), len(choices), term_rows, + ) + state["_scroll_offset"] = scroll_offset + lines = [] lines.append(('class:clarify-border', '╭─ ')) lines.append(('class:clarify-title', title)) @@ -9249,8 +9992,8 @@ class HermesCLI: _append_blank_panel_line(lines, 'class:clarify-border', box_width) _append_panel_line(lines, 'class:clarify-border', 'class:clarify-hint', hint, box_width) _append_blank_panel_line(lines, 'class:clarify-border', box_width) - selected = state.get("selected", 0) - for idx, choice in enumerate(choices): + for idx in range(scroll_offset, scroll_offset + visible): + choice = choices[idx] style = 'class:clarify-selected' if idx == selected else 'class:clarify-choice' prefix = '❯ ' if idx == selected else ' ' for wrapped in _wrap_panel_text(prefix + choice, inner_text_width, subsequent_indent=' '): @@ -9551,45 +10294,9 @@ class HermesCLI: _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else [] if paste_refs: - def _expand_ref(m): - p = Path(m.group(1)) - return p.read_text(encoding="utf-8") if p.exists() else m.group(0) - expanded = _paste_ref_re.sub(_expand_ref, user_input) - total_lines = expanded.count('\n') + 1 - n_pastes = len(paste_refs) - _user_bar = f"[{_accent_hex()}]{'─' * 40}[/]" - print() - ChatConsole().print(_user_bar) - # Show any surrounding user text alongside the paste summary - split_parts = _paste_ref_re.split(user_input) - visible_user_text = " ".join( - split_parts[i].strip() for i in range(0, len(split_parts), 2) if split_parts[i].strip() - ) - if visible_user_text: - ChatConsole().print( - f"[bold {_accent_hex()}]\u25cf[/] [bold]{_escape(visible_user_text)}[/] " - f"[dim]({n_pastes} pasted block{'s' if n_pastes > 1 else ''}, {total_lines} lines total)[/]" - ) - else: - ChatConsole().print( - f"[bold {_accent_hex()}]\u25cf[/] [bold]{_escape(f'[Pasted text: {total_lines} lines]')}[/]" - ) - user_input = expanded - else: - _user_bar = f"[{_accent_hex()}]{'─' * 40}[/]" - if '\n' in user_input: - first_line = user_input.split('\n')[0] - line_count = user_input.count('\n') + 1 - print() - ChatConsole().print(_user_bar) - ChatConsole().print( - f"[bold {_accent_hex()}]●[/] [bold]{_escape(first_line)}[/] " - f"[dim](+{line_count - 1} lines)[/]" - ) - else: - print() - ChatConsole().print(_user_bar) - ChatConsole().print(f"[bold {_accent_hex()}]●[/] [bold]{_escape(user_input)}[/]") + user_input = self._expand_paste_references(user_input) + print() + self._print_user_message_preview(user_input) # Show image attachment count if submit_images: @@ -9655,8 +10362,36 @@ class HermesCLI: # Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM def _signal_handler(signum, frame): - """Handle SIGHUP/SIGTERM by triggering graceful cleanup.""" + """Handle SIGHUP/SIGTERM by triggering graceful cleanup. + + Calls ``self.agent.interrupt()`` first so the agent daemon + thread's poll loop sees the per-thread interrupt and kills the + tool's subprocess group via ``_kill_process`` (os.killpg). + Without this, the main thread dies from KeyboardInterrupt and + the daemon thread is killed with it — before it can run one + more poll iteration to clean up the subprocess, which was + spawned with ``os.setsid`` and therefore survives as an orphan + with PPID=1. + + Grace window (``HERMES_SIGTERM_GRACE``, default 1.5 s) gives + the daemon time to: detect the interrupt (next 200 ms poll) → + call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed) → + return from _wait_for_process. ``time.sleep`` releases the + GIL so the daemon actually runs during the window. + """ logger.debug("Received signal %s, triggering graceful shutdown", signum) + try: + if getattr(self, "agent", None) and getattr(self, "_agent_running", False): + self.agent.interrupt(f"received signal {signum}") + import time as _t + try: + _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5")) + except (TypeError, ValueError): + _grace = 1.5 + if _grace > 0: + _t.sleep(_grace) + except Exception: + pass # never block signal handling raise KeyboardInterrupt() try: @@ -9959,6 +10694,45 @@ def main( # Register cleanup for single-query mode (interactive mode registers in run()) atexit.register(_run_cleanup) + + # Also install signal handlers in single-query / `-q` mode. Interactive + # mode registers its own inside HermesCLI.run(), but `-q` runs + # cli.agent.run_conversation() below and AIAgent spawns worker threads + # for tools — so when SIGTERM arrives on the main thread, raising + # KeyboardInterrupt only unwinds the main thread, not the worker + # running _wait_for_process. Python then exits, the child subprocess + # (spawned with os.setsid, its own process group) is reparented to + # init and keeps running as an orphan. + # + # Fix: route SIGTERM/SIGHUP through agent.interrupt() which sets the + # per-thread interrupt flag the worker's poll loop checks every 200 ms. + # Give the worker a grace window to call _kill_process (SIGTERM to the + # process group, then SIGKILL after 1 s), then raise KeyboardInterrupt + # so main unwinds normally. HERMES_SIGTERM_GRACE overrides the 1.5 s + # default for debugging. + def _signal_handler_q(signum, frame): + logger.debug("Received signal %s in single-query mode", signum) + try: + _agent = getattr(cli, "agent", None) + if _agent is not None: + _agent.interrupt(f"received signal {signum}") + import time as _t + try: + _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5")) + except (TypeError, ValueError): + _grace = 1.5 + if _grace > 0: + _t.sleep(_grace) + except Exception: + pass # never block signal handling + raise KeyboardInterrupt() + try: + import signal as _signal + _signal.signal(_signal.SIGTERM, _signal_handler_q) + if hasattr(_signal, "SIGHUP"): + _signal.signal(_signal.SIGHUP, _signal_handler_q) + except Exception: + pass # signal handler may fail in restricted environments # Handle single query mode if query or image: @@ -9981,19 +10755,33 @@ def main( if cli._init_agent( model_override=turn_route["model"], runtime_override=turn_route["runtime"], - route_label=turn_route["label"], request_overrides=turn_route.get("request_overrides"), ): cli.agent.quiet_mode = True cli.agent.suppress_status_output = True + # Suppress streaming display callbacks so stdout stays + # machine-readable (no styled "Hermes" box, no tool-gen + # status lines). The response is printed once below. + cli.agent.stream_delta_callback = None + cli.agent.tool_gen_callback = None result = cli.agent.run_conversation( user_message=effective_query, conversation_history=cli.conversation_history, ) + # Sync session_id if mid-run compression created a + # continuation session. The exit line below reports + # session_id to stderr for automation wrappers; without + # this sync it would point at the ended parent. + if ( + getattr(cli.agent, "session_id", None) + and cli.agent.session_id != cli.session_id + ): + cli.session_id = cli.agent.session_id response = result.get("final_response", "") if isinstance(result, dict) else str(result) if response: print(response) - print(f"\nsession_id: {cli.session_id}") + # Session ID goes to stderr so piped stdout is clean. + print(f"\nsession_id: {cli.session_id}", file=sys.stderr) # Ensure proper exit code for automation wrappers sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0) diff --git a/cron/jobs.py b/cron/jobs.py index 47e0b66efa..06d782888f 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -501,6 +501,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]] if schedule_changed: updated_schedule = updated["schedule"] + # The API may pass schedule as a raw string (e.g. "every 10m") + # instead of a pre-parsed dict. Normalize it the same way + # create_job() does so downstream code can call .get() safely. + if isinstance(updated_schedule, str): + updated_schedule = parse_schedule(updated_schedule) + updated["schedule"] = updated_schedule updated["schedule_display"] = updates.get( "schedule_display", updated_schedule.get("display", updated.get("schedule_display")), diff --git a/cron/scheduler.py b/cron/scheduler.py index 83b7abb9b1..ebeb29dd41 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -10,6 +10,7 @@ runs at a time if multiple processes overlap. import asyncio import concurrent.futures +import contextvars import json import logging import os @@ -26,7 +27,7 @@ except ImportError: except ImportError: msvcrt = None from pathlib import Path -from typing import Optional +from typing import List, Optional # Add parent directory to path for imports BEFORE repo-level imports. # Without this, standalone invocations (e.g. after `hermes update` reloads @@ -48,6 +49,33 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({ "qqbot", }) +# Platforms that support a configured cron/notification home target, mapped to +# the environment variable used by gateway setup/runtime config. +_HOME_TARGET_ENV_VARS = { + "matrix": "MATRIX_HOME_ROOM", + "telegram": "TELEGRAM_HOME_CHANNEL", + "discord": "DISCORD_HOME_CHANNEL", + "slack": "SLACK_HOME_CHANNEL", + "signal": "SIGNAL_HOME_CHANNEL", + "mattermost": "MATTERMOST_HOME_CHANNEL", + "sms": "SMS_HOME_CHANNEL", + "email": "EMAIL_HOME_ADDRESS", + "dingtalk": "DINGTALK_HOME_CHANNEL", + "feishu": "FEISHU_HOME_CHANNEL", + "wecom": "WECOM_HOME_CHANNEL", + "weixin": "WEIXIN_HOME_CHANNEL", + "bluebubbles": "BLUEBUBBLES_HOME_CHANNEL", + "qqbot": "QQBOT_HOME_CHANNEL", +} + +# Legacy env var names kept for back-compat. Each entry is the current +# primary env var → the previous name. _get_home_target_chat_id falls +# back to the legacy name if the primary is unset, so users who set the +# old name before the rename keep working until they migrate. +_LEGACY_HOME_TARGET_ENV_VARS = { + "QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL", +} + from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run # Sentinel: when a cron agent has nothing new to report, it can start its @@ -75,15 +103,28 @@ def _resolve_origin(job: dict) -> Optional[dict]: return None -def _resolve_delivery_target(job: dict) -> Optional[dict]: - """Resolve the concrete auto-delivery target for a cron job, if any.""" - deliver = job.get("deliver", "local") +def _get_home_target_chat_id(platform_name: str) -> str: + """Return the configured home target chat/room ID for a delivery platform.""" + env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower()) + if not env_var: + return "" + value = os.getenv(env_var, "") + if not value: + legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var) + if legacy: + value = os.getenv(legacy, "") + return value + + +def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]: + """Resolve one concrete auto-delivery target for a cron job.""" + origin = _resolve_origin(job) - if deliver == "local": + if deliver_value == "local": return None - if deliver == "origin": + if deliver_value == "origin": if origin: return { "platform": origin["platform"], @@ -92,8 +133,8 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]: } # Origin missing (e.g. job created via API/script) — try each # platform's home channel as a fallback instead of silently dropping. - for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"): - chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "") + for platform_name in _HOME_TARGET_ENV_VARS: + chat_id = _get_home_target_chat_id(platform_name) if chat_id: logger.info( "Job '%s' has deliver=origin but no origin; falling back to %s home channel", @@ -107,8 +148,8 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]: } return None - if ":" in deliver: - platform_name, rest = deliver.split(":", 1) + if ":" in deliver_value: + platform_name, rest = deliver_value.split(":", 1) platform_key = platform_name.lower() from tools.send_message_tool import _parse_target_ref @@ -138,7 +179,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]: "thread_id": thread_id, } - platform_name = deliver + platform_name = deliver_value if origin and origin.get("platform") == platform_name: return { "platform": platform_name, @@ -148,7 +189,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]: if platform_name.lower() not in _KNOWN_DELIVERY_PLATFORMS: return None - chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "") + chat_id = _get_home_target_chat_id(platform_name) if not chat_id: return None @@ -159,6 +200,30 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]: } +def _resolve_delivery_targets(job: dict) -> List[dict]: + """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver).""" + deliver = job.get("deliver", "local") + if deliver == "local": + return [] + parts = [p.strip() for p in str(deliver).split(",") if p.strip()] + seen = set() + targets = [] + for part in parts: + target = _resolve_single_delivery_target(job, part) + if target: + key = (target["platform"].lower(), str(target["chat_id"]), target.get("thread_id")) + if key not in seen: + seen.add(key) + targets.append(target) + return targets + + +def _resolve_delivery_target(job: dict) -> Optional[dict]: + """Resolve the concrete auto-delivery target for a cron job, if any.""" + targets = _resolve_delivery_targets(job) + return targets[0] if targets else None + + # Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background _AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'}) _VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}) @@ -199,7 +264,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]: """ - Deliver job output to the configured target (origin chat, specific platform, etc.). + Deliver job output to the configured target(s) (origin chat, specific platform, etc.). When ``adapters`` and ``loop`` are provided (gateway is running), tries to use the live adapter first — this supports E2EE rooms (e.g. Matrix) where @@ -208,33 +273,14 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option Returns None on success, or an error string on failure. """ - target = _resolve_delivery_target(job) - if not target: + targets = _resolve_delivery_targets(job) + if not targets: if job.get("deliver", "local") != "local": msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}" logger.warning("Job '%s': %s", job["id"], msg) return msg return None # local-only jobs don't deliver — not a failure - platform_name = target["platform"] - chat_id = target["chat_id"] - thread_id = target.get("thread_id") - - # Diagnostic: log thread_id for topic-aware delivery debugging - origin = job.get("origin") or {} - origin_thread = origin.get("thread_id") - if origin_thread and not thread_id: - logger.warning( - "Job '%s': origin has thread_id=%s but delivery target lost it " - "(deliver=%s, target=%s)", - job["id"], origin_thread, job.get("deliver", "local"), target, - ) - elif thread_id: - logger.debug( - "Job '%s': delivering to %s:%s thread_id=%s", - job["id"], platform_name, chat_id, thread_id, - ) - from tools.send_message_tool import _send_to_platform from gateway.config import load_gateway_config, Platform @@ -257,24 +303,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option "bluebubbles": Platform.BLUEBUBBLES, "qqbot": Platform.QQBOT, } - platform = platform_map.get(platform_name.lower()) - if not platform: - msg = f"unknown platform '{platform_name}'" - logger.warning("Job '%s': %s", job["id"], msg) - return msg - - try: - config = load_gateway_config() - except Exception as e: - msg = f"failed to load gateway config: {e}" - logger.error("Job '%s': %s", job["id"], msg) - return msg - - pconfig = config.platforms.get(platform) - if not pconfig or not pconfig.enabled: - msg = f"platform '{platform_name}' not configured/enabled" - logger.warning("Job '%s': %s", job["id"], msg) - return msg # Optionally wrap the content with a header/footer so the user knows this # is a cron delivery. Wrapping is on by default; set cron.wrap_response: false @@ -288,11 +316,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option if wrap_response: task_name = job.get("name", job["id"]) + job_id = job.get("id", "") delivery_content = ( f"Cronjob Response: {task_name}\n" + f"(job_id: {job_id})\n" f"-------------\n\n" f"{content}\n\n" - f"Note: The agent cannot see this message, and therefore cannot respond to it." + f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")." ) else: delivery_content = content @@ -301,67 +331,117 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option from gateway.platforms.base import BasePlatformAdapter media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content) - # Prefer the live adapter when the gateway is running — this supports E2EE - # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt. - runtime_adapter = (adapters or {}).get(platform) - if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)(): - send_metadata = {"thread_id": thread_id} if thread_id else None - try: - # Send cleaned text (MEDIA tags stripped) — not the raw content - text_to_send = cleaned_delivery_content.strip() - adapter_ok = True - if text_to_send: - future = asyncio.run_coroutine_threadsafe( - runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata), - loop, - ) - send_result = future.result(timeout=60) - if send_result and not getattr(send_result, "success", True): - err = getattr(send_result, "error", "unknown") - logger.warning( - "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone", - job["id"], platform_name, chat_id, err, - ) - adapter_ok = False # fall through to standalone path + try: + config = load_gateway_config() + except Exception as e: + msg = f"failed to load gateway config: {e}" + logger.error("Job '%s': %s", job["id"], msg) + return msg - # Send extracted media files as native attachments via the live adapter - if adapter_ok and media_files: - _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job) + delivery_errors = [] - if adapter_ok: - logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id) - return None - except Exception as e: + for target in targets: + platform_name = target["platform"] + chat_id = target["chat_id"] + thread_id = target.get("thread_id") + + # Diagnostic: log thread_id for topic-aware delivery debugging + origin = job.get("origin") or {} + origin_thread = origin.get("thread_id") + if origin_thread and not thread_id: logger.warning( - "Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone", - job["id"], platform_name, chat_id, e, + "Job '%s': origin has thread_id=%s but delivery target lost it " + "(deliver=%s, target=%s)", + job["id"], origin_thread, job.get("deliver", "local"), target, + ) + elif thread_id: + logger.debug( + "Job '%s': delivering to %s:%s thread_id=%s", + job["id"], platform_name, chat_id, thread_id, ) - # Standalone path: run the async send in a fresh event loop (safe from any thread) - coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files) - try: - result = asyncio.run(coro) - except RuntimeError: - # asyncio.run() checks for a running loop before awaiting the coroutine; - # when it raises, the original coro was never started — close it to - # prevent "coroutine was never awaited" RuntimeWarning, then retry in a - # fresh thread that has no running loop. - coro.close() - import concurrent.futures - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: - future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)) - result = future.result(timeout=30) - except Exception as e: - msg = f"delivery to {platform_name}:{chat_id} failed: {e}" - logger.error("Job '%s': %s", job["id"], msg) - return msg + platform = platform_map.get(platform_name.lower()) + if not platform: + msg = f"unknown platform '{platform_name}'" + logger.warning("Job '%s': %s", job["id"], msg) + delivery_errors.append(msg) + continue - if result and result.get("error"): - msg = f"delivery error: {result['error']}" - logger.error("Job '%s': %s", job["id"], msg) - return msg + # Prefer the live adapter when the gateway is running — this supports E2EE + # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt. + runtime_adapter = (adapters or {}).get(platform) + delivered = False + if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)(): + send_metadata = {"thread_id": thread_id} if thread_id else None + try: + # Send cleaned text (MEDIA tags stripped) — not the raw content + text_to_send = cleaned_delivery_content.strip() + adapter_ok = True + if text_to_send: + future = asyncio.run_coroutine_threadsafe( + runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata), + loop, + ) + send_result = future.result(timeout=60) + if send_result and not getattr(send_result, "success", True): + err = getattr(send_result, "error", "unknown") + logger.warning( + "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone", + job["id"], platform_name, chat_id, err, + ) + adapter_ok = False # fall through to standalone path - logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id) + # Send extracted media files as native attachments via the live adapter + if adapter_ok and media_files: + _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job) + + if adapter_ok: + logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id) + delivered = True + except Exception as e: + logger.warning( + "Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone", + job["id"], platform_name, chat_id, e, + ) + + if not delivered: + pconfig = config.platforms.get(platform) + if not pconfig or not pconfig.enabled: + msg = f"platform '{platform_name}' not configured/enabled" + logger.warning("Job '%s': %s", job["id"], msg) + delivery_errors.append(msg) + continue + + # Standalone path: run the async send in a fresh event loop (safe from any thread) + coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files) + try: + result = asyncio.run(coro) + except RuntimeError: + # asyncio.run() checks for a running loop before awaiting the coroutine; + # when it raises, the original coro was never started — close it to + # prevent "coroutine was never awaited" RuntimeWarning, then retry in a + # fresh thread that has no running loop. + coro.close() + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)) + result = future.result(timeout=30) + except Exception as e: + msg = f"delivery to {platform_name}:{chat_id} failed: {e}" + logger.error("Job '%s': %s", job["id"], msg) + delivery_errors.append(msg) + continue + + if result and result.get("error"): + msg = f"delivery error: {result['error']}" + logger.error("Job '%s': %s", job["id"], msg) + delivery_errors.append(msg) + continue + + logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id) + + if delivery_errors: + return "; ".join(delivery_errors) return None @@ -484,15 +564,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]: return False, f"Script execution failed: {exc}" -def _build_job_prompt(job: dict) -> str: - """Build the effective prompt for a cron job, optionally loading one or more skills first.""" +def _parse_wake_gate(script_output: str) -> bool: + """Parse the last non-empty stdout line of a cron job's pre-check script + as a wake gate. + + The convention (ported from nanoclaw #1232): if the last stdout line is + JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no + LLM run, no delivery. Any other output (non-JSON, missing flag, gate + absent, or ``wakeAgent: true``) means wake the agent normally. + + Returns True if the agent should wake, False to skip. + """ + if not script_output: + return True + stripped_lines = [line for line in script_output.splitlines() if line.strip()] + if not stripped_lines: + return True + last_line = stripped_lines[-1].strip() + try: + gate = json.loads(last_line) + except (json.JSONDecodeError, ValueError): + return True + if not isinstance(gate, dict): + return True + return gate.get("wakeAgent", True) is not False + + +def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: + """Build the effective prompt for a cron job, optionally loading one or more skills first. + + Args: + job: The cron job dict. + prerun_script: Optional ``(success, stdout)`` from a script that has + already been executed by the caller (e.g. for a wake-gate check). + When provided, the script is not re-executed and the cached + result is used for prompt injection. When omitted, the script + (if any) runs inline as before. + """ prompt = job.get("prompt", "") skills = job.get("skills") # Run data-collection script if configured, inject output as context. script_path = job.get("script") if script_path: - success, script_output = _run_job_script(script_path) + if prerun_script is not None: + success, script_output = prerun_script + else: + success, script_output = _run_job_script(script_path) if success: if script_output: prompt = ( @@ -594,13 +712,41 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: job_id = job["id"] job_name = job["name"] - prompt = _build_job_prompt(job) + + # Wake-gate: if this job has a pre-check script, run it BEFORE building + # the prompt so a ``{"wakeAgent": false}`` response can short-circuit + # the whole agent run. We pass the result into _build_job_prompt so + # the script is only executed once. + prerun_script = None + script_path = job.get("script") + if script_path: + prerun_script = _run_job_script(script_path) + _ran_ok, _script_output = prerun_script + if _ran_ok and not _parse_wake_gate(_script_output): + logger.info( + "Job '%s' (ID: %s): wakeAgent=false, skipping agent run", + job_name, job_id, + ) + silent_doc = ( + f"# Cron Job: {job_name}\n\n" + f"**Job ID:** {job_id}\n" + f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" + "Script gate returned `wakeAgent=false` — agent skipped.\n" + ) + return True, silent_doc, SILENT_MARKER, None + + prompt = _build_job_prompt(job, prerun_script=prerun_script) origin = _resolve_origin(job) _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}" logger.info("Running job '%s' (ID: %s)", job_name, job_id) logger.info("Prompt: %s", prompt[:100]) + # Mark this as a cron session so the approval system can apply cron_mode. + # This env var is process-wide and persists for the lifetime of the + # scheduler process — every job this process runs is a cron job. + os.environ["HERMES_CRON_SESSION"] = "1" + try: # Inject origin context so the agent's send_message tool knows the chat. # Must be INSIDE the try block so the finally cleanup always runs. @@ -680,7 +826,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: # Provider routing pr = _cfg.get("provider_routing", {}) - smart_routing = _cfg.get("smart_model_routing", {}) or {} from hermes_cli.runtime_provider import ( resolve_runtime_provider, @@ -697,24 +842,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc - from agent.smart_model_routing import resolve_turn_route - turn_route = resolve_turn_route( - prompt, - smart_routing, - { - "model": model, - "api_key": runtime.get("api_key"), - "base_url": runtime.get("base_url"), - "provider": runtime.get("provider"), - "api_mode": runtime.get("api_mode"), - "command": runtime.get("command"), - "args": list(runtime.get("args") or []), - }, - ) - fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None credential_pool = None - runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower() + runtime_provider = str(runtime.get("provider") or "").strip().lower() if runtime_provider: try: from agent.credential_pool import load_pool @@ -731,13 +861,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e) agent = AIAgent( - model=turn_route["model"], - api_key=turn_route["runtime"].get("api_key"), - base_url=turn_route["runtime"].get("base_url"), - provider=turn_route["runtime"].get("provider"), - api_mode=turn_route["runtime"].get("api_mode"), - acp_command=turn_route["runtime"].get("command"), - acp_args=turn_route["runtime"].get("args"), + model=model, + api_key=runtime.get("api_key"), + base_url=runtime.get("base_url"), + provider=runtime.get("provider"), + api_mode=runtime.get("api_mode"), + acp_command=runtime.get("command"), + acp_args=runtime.get("args"), max_iterations=max_iterations, reasoning_config=reasoning_config, prefill_messages=prefill_messages, @@ -768,7 +898,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None _POLL_INTERVAL = 5.0 _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1) - _cron_future = _cron_pool.submit(agent.run_conversation, prompt) + # Preserve scheduler-scoped ContextVar state (for example skill-declared + # env passthrough registrations) when the cron run hops into the worker + # thread used for inactivity timeout monitoring. + _cron_context = contextvars.copy_context() + _cron_future = _cron_pool.submit(_cron_context.run, agent.run_conversation, prompt) _inactivity_timeout = False try: if _cron_inactivity_limit is None: @@ -830,6 +964,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: ) final_response = result.get("final_response", "") or "" + # Strip leaked placeholder text that upstream may inject on empty completions. + if final_response.strip() == "(No response generated)": + final_response = "" # Use a separate variable for log display; keep final_response clean # for delivery logic (empty response = no delivery). logged_response = final_response if final_response else "(No response generated)" @@ -969,6 +1106,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: delivery_error = str(de) logger.error("Delivery failed for job %s: %s", job["id"], de) + # Treat empty final_response as a soft failure so last_status + # is not "ok" — the agent ran but produced nothing useful. + # (issue #8585) + if success and not final_response: + success = False + error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)" + mark_job_run(job["id"], success, error, delivery_error=delivery_error) executed += 1 diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh old mode 100644 new mode 100755 index dc1edd32c2..c46497dcc8 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -1,13 +1,14 @@ #!/bin/bash -# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes. +# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes. set -e -HERMES_HOME="/opt/data" +HERMES_HOME="${HERMES_HOME:-/opt/data}" INSTALL_DIR="/opt/hermes" # --- Privilege dropping via gosu --- -# When started as root (the default), optionally remap the hermes user/group -# to match host-side ownership, fix volume permissions, then re-exec as hermes. +# When started as root (the default for Docker, or fakeroot in rootless Podman), +# optionally remap the hermes user/group to match host-side ownership, fix volume +# permissions, then re-exec as hermes. if [ "$(id -u)" = "0" ]; then if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then echo "Changing hermes UID to $HERMES_UID" @@ -16,13 +17,19 @@ if [ "$(id -u)" = "0" ]; then if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then echo "Changing hermes GID to $HERMES_GID" - groupmod -g "$HERMES_GID" hermes + # -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist + # as "dialout" in the Debian-based container image) + groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true fi actual_hermes_uid=$(id -u hermes) if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing" - chown -R hermes:hermes "$HERMES_HOME" + # In rootless Podman the container's "root" is mapped to an unprivileged + # host UID — chown will fail. That's fine: the volume is already owned + # by the mapped user on the host side. + chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \ + echo "Warning: chown failed (rootless container?) — continuing anyway" fi echo "Dropping root privileges" diff --git a/docs/acp-setup.md b/docs/acp-setup.md deleted file mode 100644 index 8da4e2a215..0000000000 --- a/docs/acp-setup.md +++ /dev/null @@ -1,228 +0,0 @@ -# Hermes Agent — ACP (Agent Client Protocol) Setup Guide - -Hermes Agent supports the **Agent Client Protocol (ACP)**, allowing it to run as -a coding agent inside your editor. ACP lets your IDE send tasks to Hermes, and -Hermes responds with file edits, terminal commands, and explanations — all shown -natively in the editor UI. - ---- - -## Prerequisites - -- Hermes Agent installed and configured (`hermes setup` completed) -- An API key / provider set up in `~/.hermes/.env` or via `hermes login` -- Python 3.11+ - -Install the ACP extra: - -```bash -pip install -e ".[acp]" -``` - ---- - -## VS Code Setup - -### 1. Install the ACP Client extension - -Open VS Code and install **ACP Client** from the marketplace: - -- Press `Ctrl+Shift+X` (or `Cmd+Shift+X` on macOS) -- Search for **"ACP Client"** -- Click **Install** - -Or install from the command line: - -```bash -code --install-extension anysphere.acp-client -``` - -### 2. Configure settings.json - -Open your VS Code settings (`Ctrl+,` → click the `{}` icon for JSON) and add: - -```json -{ - "acpClient.agents": [ - { - "name": "hermes-agent", - "registryDir": "/path/to/hermes-agent/acp_registry" - } - ] -} -``` - -Replace `/path/to/hermes-agent` with the actual path to your Hermes Agent -installation (e.g. `~/.hermes/hermes-agent`). - -Alternatively, if `hermes` is on your PATH, the ACP Client can discover it -automatically via the registry directory. - -### 3. Restart VS Code - -After configuring, restart VS Code. You should see **Hermes Agent** appear in -the ACP agent picker in the chat/agent panel. - ---- - -## Zed Setup - -Zed has built-in ACP support. - -### 1. Configure Zed settings - -Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your -`settings.json`: - -```json -{ - "agent_servers": { - "hermes-agent": { - "type": "custom", - "command": "hermes", - "args": ["acp"], - }, - }, -} -``` - -### 2. Restart Zed - -Hermes Agent will appear in the agent panel. Select it and start a conversation. - ---- - -## JetBrains Setup (IntelliJ, PyCharm, WebStorm, etc.) - -### 1. Install the ACP plugin - -- Open **Settings** → **Plugins** → **Marketplace** -- Search for **"ACP"** or **"Agent Client Protocol"** -- Install and restart the IDE - -### 2. Configure the agent - -- Open **Settings** → **Tools** → **ACP Agents** -- Click **+** to add a new agent -- Set the registry directory to your `acp_registry/` folder: - `/path/to/hermes-agent/acp_registry` -- Click **OK** - -### 3. Use the agent - -Open the ACP panel (usually in the right sidebar) and select **Hermes Agent**. - ---- - -## What You Will See - -Once connected, your editor provides a native interface to Hermes Agent: - -### Chat Panel -A conversational interface where you can describe tasks, ask questions, and -give instructions. Hermes responds with explanations and actions. - -### File Diffs -When Hermes edits files, you see standard diffs in the editor. You can: -- **Accept** individual changes -- **Reject** changes you don't want -- **Review** the full diff before applying - -### Terminal Commands -When Hermes needs to run shell commands (builds, tests, installs), the editor -shows them in an integrated terminal. Depending on your settings: -- Commands may run automatically -- Or you may be prompted to **approve** each command - -### Approval Flow -For potentially destructive operations, the editor will prompt you for -approval before Hermes proceeds. This includes: -- File deletions -- Shell commands -- Git operations - ---- - -## Configuration - -Hermes Agent under ACP uses the **same configuration** as the CLI: - -- **API keys / providers**: `~/.hermes/.env` -- **Agent config**: `~/.hermes/config.yaml` -- **Skills**: `~/.hermes/skills/` -- **Sessions**: `~/.hermes/state.db` - -You can run `hermes setup` to configure providers, or edit `~/.hermes/.env` -directly. - -### Changing the model - -Edit `~/.hermes/config.yaml`: - -```yaml -model: openrouter/nous/hermes-3-llama-3.1-70b -``` - -Or set the `HERMES_MODEL` environment variable. - -### Toolsets - -ACP sessions use the curated `hermes-acp` toolset by default. It is designed for editor workflows and intentionally excludes things like messaging delivery, cronjob management, and audio-first UX features. - ---- - -## Troubleshooting - -### Agent doesn't appear in the editor - -1. **Check the registry path** — make sure the `acp_registry/` directory path - in your editor settings is correct and contains `agent.json`. -2. **Check `hermes` is on PATH** — run `which hermes` in a terminal. If not - found, you may need to activate your virtualenv or add it to PATH. -3. **Restart the editor** after changing settings. - -### Agent starts but errors immediately - -1. Run `hermes doctor` to check your configuration. -2. Check that you have a valid API key: `hermes status` -3. Try running `hermes acp` directly in a terminal to see error output. - -### "Module not found" errors - -Make sure you installed the ACP extra: - -```bash -pip install -e ".[acp]" -``` - -### Slow responses - -- ACP streams responses, so you should see incremental output. If the agent - appears stuck, check your network connection and API provider status. -- Some providers have rate limits. Try switching to a different model/provider. - -### Permission denied for terminal commands - -If the editor blocks terminal commands, check your ACP Client extension -settings for auto-approval or manual-approval preferences. - -### Logs - -Hermes logs are written to stderr when running in ACP mode. Check: -- VS Code: **Output** panel → select **ACP Client** or **Hermes Agent** -- Zed: **View** → **Toggle Terminal** and check the process output -- JetBrains: **Event Log** or the ACP tool window - -You can also enable verbose logging: - -```bash -HERMES_LOG_LEVEL=DEBUG hermes acp -``` - ---- - -## Further Reading - -- [ACP Specification](https://github.com/anysphere/acp) -- [Hermes Agent Documentation](https://github.com/NousResearch/hermes-agent) -- Run `hermes --help` for all CLI options diff --git a/docs/honcho-integration-spec.html b/docs/honcho-integration-spec.html deleted file mode 100644 index 455fb84f23..0000000000 --- a/docs/honcho-integration-spec.html +++ /dev/null @@ -1,698 +0,0 @@ - - - - - -honcho-integration-spec - - - - - - - -
- -
- -
-

honcho-integration-spec

-

Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.

-
- hermes-agent / openclaw-honcho - Python + TypeScript - 2026-03-09 -
-
- - - - -
-

Overview

- -

Two independent Honcho integrations have been built for two different agent runtimes: Hermes Agent (Python, baked into the runner) and openclaw-honcho (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, session.context(), peer.chat() — but they made different tradeoffs at every layer.

- -

This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.

- -
- Scope Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive. -
-
- - -
-

Architecture comparison

- -

Hermes: baked-in runner

-

Honcho is initialised directly inside AIAgent.__init__. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into _cached_system_prompt) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.

- -
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%% -flowchart TD - U["user message"] --> P["_honcho_prefetch()
(reads cache — no HTTP)"] - P --> SP["_build_system_prompt()
(first turn only, cached)"] - SP --> LLM["LLM call"] - LLM --> R["response"] - R --> FP["_honcho_fire_prefetch()
(daemon threads, turn end)"] - FP --> C1["prefetch_context() thread"] - FP --> C2["prefetch_dialectic() thread"] - C1 --> CACHE["_context_cache / _dialectic_cache"] - C2 --> CACHE - - style U fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style P fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9 - style SP fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9 - style LLM fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style R fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style FP fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9 - style C1 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9 - style C2 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9 - style CACHE fill:#11151c,stroke:#484f58,color:#6e7681 -
- -

openclaw-honcho: hook-based plugin

-

The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside before_prompt_build on every turn. Message capture happens in agent_end. The multi-agent hierarchy is tracked via subagent_spawned. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.

- -
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%% -flowchart TD - U2["user message"] --> BPB["before_prompt_build
(BLOCKING HTTP — every turn)"] - BPB --> CTX["session.context()"] - CTX --> SP2["system prompt assembled"] - SP2 --> LLM2["LLM call"] - LLM2 --> R2["response"] - R2 --> AE["agent_end hook"] - AE --> SAVE["session.addMessages()
session.setMetadata()"] - - style U2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style BPB fill:#3a1515,stroke:#f47067,color:#c9d1d9 - style CTX fill:#3a1515,stroke:#f47067,color:#c9d1d9 - style SP2 fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9 - style LLM2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style R2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style AE fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style SAVE fill:#11151c,stroke:#484f58,color:#6e7681 -
-
- - -
-

Diff table

- -
-
A real terminal interfaceFull TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DimensionHermes Agentopenclaw-honcho
Context injection timingOnce per session (cached). Zero HTTP on response path after turn 1.Every turn, blocking. Fresh context per turn but adds latency.
Prefetch strategyDaemon threads fire at turn end; consumed next turn from cache.None. Blocking call at prompt-build time.
Dialectic (peer.chat)Prefetched async; result injected into system prompt next turn.On-demand via honcho_recall / honcho_analyze tools.
Reasoning levelDynamic: scales with message length. Floor = config default. Cap = "high".Fixed per tool: recall=minimal, analyze=medium.
Memory modesuser_memory_mode / agent_memory_mode: hybrid / honcho / local.None. Always writes to Honcho.
Write frequencyasync (background queue), turn, session, N turns.After every agent_end (no control).
AI peer identityobserve_me=True, seed_ai_identity(), get_ai_representation(), SOUL.md → AI peer.Agent files uploaded to agent peer at setup. No ongoing self-observation seeding.
Context scopeUser peer + AI peer representation, both injected.User peer (owner) representation + conversation summary. peerPerspective on context call.
Session namingper-directory / global / manual map / title-based.Derived from platform session key.
Multi-agentSingle-agent only.Parent observer hierarchy via subagent_spawned.
Tool surfaceSingle query_user_context tool (on-demand dialectic).6 tools: session, profile, search, context (fast) + recall, analyze (LLM).
Platform metadataNot stripped.Explicitly stripped before Honcho storage.
Message dedupNone (sends on every save cycle).lastSavedIndex in session metadata prevents re-sending.
CLI surface in promptManagement commands injected into system prompt. Agent knows its own CLI.Not injected.
AI peer name in identityReplaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured.Not implemented.
QMD / local file searchNot implemented.Passthrough tools when QMD backend configured.
Workspace metadataNot implemented.agentPeerMap in workspace metadata tracks agent→peer ID.
- - - - -
-

Hermes patterns to port

- -

Six patterns from Hermes are worth adopting in any Honcho integration. They are described below as integration-agnostic interfaces — the implementation will differ per runtime, but the contract is the same.

- -
-
-

Patterns Hermes contributes

-
    -
  • Async prefetch (zero-latency)
  • -
  • Dynamic reasoning level
  • -
  • Per-peer memory modes
  • -
  • AI peer identity formation
  • -
  • Session naming strategies
  • -
  • CLI surface injection
  • -
-
-
-

Patterns openclaw contributes back

-
    -
  • lastSavedIndex dedup
  • -
  • Platform metadata stripping
  • -
  • Multi-agent observer hierarchy
  • -
  • peerPerspective on context()
  • -
  • Tiered tool surface (fast/LLM)
  • -
  • Workspace agentPeerMap
  • -
-
-
-
- - -
-

Spec: async prefetch

- -

Problem

-

Calling session.context() and peer.chat() synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn. Users experience this as the agent "thinking slowly."

- -

Pattern

-

Fire both calls as non-blocking background work at the end of each turn. Store results in a per-session cache keyed by session ID. At the start of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.

- -

Interface contract

-
// TypeScript (openclaw / nanobot plugin shape)
-
-interface AsyncPrefetch {
-  // Fire context + dialectic fetches at turn end. Non-blocking.
-  firePrefetch(sessionId: string, userMessage: string): void;
-
-  // Pop cached results at turn start. Returns empty if cache is cold.
-  popContextResult(sessionId: string): ContextResult | null;
-  popDialecticResult(sessionId: string): string | null;
-}
-
-type ContextResult = {
-  representation: string;
-  card: string[];
-  aiRepresentation?: string;  // AI peer context if enabled
-  summary?: string;            // conversation summary if fetched
-};
- -

Implementation notes

-
    -
  • Python: threading.Thread(daemon=True). Write to dict[session_id, result] — GIL makes this safe for simple writes.
  • -
  • TypeScript: Promise stored in Map<string, Promise<ContextResult>>. Await at pop time. If not resolved yet, skip (return null) — do not block.
  • -
  • The pop is destructive: clears the cache entry after reading so stale data never accumulates.
  • -
  • Prefetch should also fire on first turn (even though it won't be consumed until turn 2) — this ensures turn 2 is never cold.
  • -
- -

openclaw-honcho adoption

-

Move session.context() from before_prompt_build to a post-agent_end background task. Store result in state.contextCache. In before_prompt_build, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.

-
- - -
-

Spec: dynamic reasoning level

- -

Problem

-

Honcho's dialectic endpoint supports reasoning levels from minimal to max. A fixed level per tool wastes budget on simple queries and under-serves complex ones.

- -

Pattern

-

Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at high — never select max automatically.

- -

Interface contract

-
// Shared helper — identical logic in any language
-
-const LEVELS = ["minimal", "low", "medium", "high", "max"];
-
-function dynamicReasoningLevel(
-  query: string,
-  configDefault: string = "low"
-): string {
-  const baseIdx = Math.max(0, LEVELS.indexOf(configDefault));
-  const n = query.length;
-  const bump = n < 120 ? 0 : n < 400 ? 1 : 2;
-  return LEVELS[Math.min(baseIdx + bump, 3)]; // cap at "high" (idx 3)
-}
- -

Config key

-

Add a dialecticReasoningLevel config field (string, default "low"). This sets the floor. Users can raise or lower it. The dynamic bump always applies on top.

- -

openclaw-honcho adoption

-

Apply in honcho_recall and honcho_analyze: replace the fixed reasoningLevel with the dynamic selector. honcho_recall should use floor "minimal" and honcho_analyze floor "medium" — both still bump with message length.

-
- - -
-

Spec: per-peer memory modes

- -

Problem

-

Users want independent control over whether user context and agent context are written locally, to Honcho, or both. A single memoryMode shorthand is not granular enough.

- -

Pattern

-

Three modes per peer: hybrid (write both local + Honcho), honcho (Honcho only, disable local files), local (local files only, skip Honcho sync for this peer). Two orthogonal axes: user peer and agent peer.

- -

Config schema

-
// ~/.openclaw/openclaw.json  (or ~/.nanobot/config.json)
-{
-  "plugins": {
-    "openclaw-honcho": {
-      "config": {
-        "apiKey": "...",
-        "memoryMode": "hybrid",          // shorthand: both peers
-        "userMemoryMode": "honcho",       // override for user peer
-        "agentMemoryMode": "hybrid"       // override for agent peer
-      }
-    }
-  }
-}
- -

Resolution order

-
    -
  1. Per-peer field (userMemoryMode / agentMemoryMode) — wins if present.
  2. -
  3. Shorthand memoryMode — applies to both peers as default.
  4. -
  5. Hardcoded default: "hybrid".
  6. -
- -

Effect on Honcho sync

-
    -
  • userMemoryMode=local: skip adding user peer messages to Honcho.
  • -
  • agentMemoryMode=local: skip adding assistant peer messages to Honcho.
  • -
  • Both local: skip session.addMessages() entirely.
  • -
  • userMemoryMode=honcho: disable local USER.md writes.
  • -
  • agentMemoryMode=honcho: disable local MEMORY.md / SOUL.md writes.
  • -
-
- - -
-

Spec: AI peer identity formation

- -

Problem

-

Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if observe_me=True is set for the agent peer. Without it, the agent peer accumulates nothing and Honcho's AI-side model never forms.

- -

Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation, rather than waiting for it to emerge from scratch.

- -

Part A: observe_me=True for agent peer

-
// TypeScript — in session.addPeers() call
-await session.addPeers([
-  [ownerPeer.id, { observeMe: true,  observeOthers: false }],
-  [agentPeer.id, { observeMe: true,  observeOthers: true  }], // was false
-]);
- -

This is a one-line change but foundational. Without it, Honcho's AI peer representation stays empty regardless of what the agent says.

- -

Part B: seedAiIdentity()

-
async function seedAiIdentity(
-  session: HonchoSession,
-  agentPeer: Peer,
-  content: string,
-  source: string
-): Promise<boolean> {
-  const wrapped = [
-    `<ai_identity_seed>`,
-    `<source>${source}</source>`,
-    ``,
-    content.trim(),
-    `</ai_identity_seed>`,
-  ].join("\n");
-
-  await agentPeer.addMessage("assistant", wrapped);
-  return true;
-}
- -

Part C: migrate agent files at setup

-

During openclaw honcho setup, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md, BOOTSTRAP.md) to the agent peer using seedAiIdentity() instead of session.uploadFile(). This routes the content through Honcho's observation pipeline rather than the file store.

- -

Part D: AI peer name in identity

-

When the agent has a configured name (non-default), inject it into the agent's self-identity prefix. In OpenClaw this means adding to the injected system prompt section:

-
// In context hook return value
-return {
-  systemPrompt: [
-    agentName ? `You are ${agentName}.` : "",
-    "## User Memory Context",
-    ...sections,
-  ].filter(Boolean).join("\n\n")
-};
- -

CLI surface: honcho identity subcommand

-
openclaw honcho identity <file>    # seed from file
-openclaw honcho identity --show    # show current AI peer representation
-
- - -
-

Spec: session naming strategies

- -

Problem

-

When Honcho is used across multiple projects or directories, a single global session means every project shares the same context. Per-directory sessions provide isolation without requiring users to name sessions manually.

- -

Strategies

-
- - - - - - - - -
StrategySession keyWhen to use
per-directorybasename of CWDDefault. Each project gets its own session.
globalfixed string "global"Single cross-project session.
manual mapuser-configured per pathsessions config map overrides directory basename.
title-basedsanitized session titleWhen agent supports named sessions; title set mid-conversation.
-
- -

Config schema

-
{
-  "sessionStrategy": "per-directory",   // "per-directory" | "global"
-  "sessionPeerPrefix": false,            // prepend peer name to session key
-  "sessions": {                            // manual overrides
-    "/home/user/projects/foo": "foo-project"
-  }
-}
- -

CLI surface

-
openclaw honcho sessions              # list all mappings
-openclaw honcho map <name>           # map cwd to session name
-openclaw honcho map                   # no-arg = list mappings
- -

Resolution order: manual map wins → session title → directory basename → platform key.

-
- - -
-

Spec: CLI surface injection

- -

Problem

-

When a user asks "how do I change my memory settings?" or "what Honcho commands are available?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.

- -

Pattern

-

When Honcho is active, append a compact command reference to the system prompt. The agent can cite these commands directly instead of guessing.

- -
// In context hook, append to systemPrompt
-const honchoSection = [
-  "# Honcho memory integration",
-  `Active. Session: ${sessionKey}. Mode: ${mode}.`,
-  "Management commands:",
-  "  openclaw honcho status                    — show config + connection",
-  "  openclaw honcho mode [hybrid|honcho|local] — show or set memory mode",
-  "  openclaw honcho sessions                  — list session mappings",
-  "  openclaw honcho map <name>                — map directory to session",
-  "  openclaw honcho identity [file] [--show]  — seed or show AI identity",
-  "  openclaw honcho setup                     — full interactive wizard",
-].join("\n");
- -
- Keep it compact. This section is injected every turn. Keep it under 300 chars of context. List commands, not explanations — the agent can explain them on request. -
-
- - -
-

openclaw-honcho checklist

- -

Ordered by impact. Each item maps to a spec section above.

- -
    -
  • Async prefetch — move session.context() out of before_prompt_build into post-agent_end background Promise. Pop from cache at prompt build. (spec)
  • -
  • observe_me=True for agent peer — one-line change in session.addPeers() config for agent peer. (spec)
  • -
  • Dynamic reasoning level — add dynamicReasoningLevel() helper; apply in honcho_recall and honcho_analyze. Add dialecticReasoningLevel to config schema. (spec)
  • -
  • Per-peer memory modes — add userMemoryMode / agentMemoryMode to config; gate Honcho sync and local writes accordingly. (spec)
  • -
  • seedAiIdentity() — add helper; apply during setup migration for SOUL.md / IDENTITY.md instead of session.uploadFile(). (spec)
  • -
  • Session naming strategies — add sessionStrategy, sessions map, sessionPeerPrefix to config; implement resolution function. (spec)
  • -
  • CLI surface injection — append command reference to before_prompt_build return value when Honcho is active. (spec)
  • -
  • honcho identity subcommand — add openclaw honcho identity CLI command. (spec)
  • -
  • AI peer name injection — if aiPeer name configured, prepend to injected system prompt. (spec)
  • -
  • honcho mode / honcho sessions / honcho map — CLI parity with Hermes. (spec)
  • -
- -
- Already done in openclaw-honcho (do not re-implement): lastSavedIndex dedup, platform metadata stripping, multi-agent parent observer hierarchy, peerPerspective on context(), tiered tool surface (fast/LLM), workspace agentPeerMap, QMD passthrough, self-hosted Honcho support. -
-
- - -
-

nanobot-honcho checklist

- -

nanobot-honcho is a greenfield integration. Start from openclaw-honcho's architecture (hook-based, dual peer) and apply all Hermes patterns from day one rather than retrofitting. Priority order:

- -

Phase 1 — core correctness

-
    -
  • Dual peer model (owner + agent peer), both with observe_me=True
  • -
  • Message capture at turn end with lastSavedIndex dedup
  • -
  • Platform metadata stripping before Honcho storage
  • -
  • Async prefetch from day one — do not implement blocking context injection
  • -
  • Legacy file migration at first activation (USER.md → owner peer, SOUL.md → seedAiIdentity())
  • -
- -

Phase 2 — configuration

-
    -
  • Config schema: apiKey, workspaceId, baseUrl, memoryMode, userMemoryMode, agentMemoryMode, dialecticReasoningLevel, sessionStrategy, sessions
  • -
  • Per-peer memory mode gating
  • -
  • Dynamic reasoning level
  • -
  • Session naming strategies
  • -
- -

Phase 3 — tools and CLI

-
    -
  • Tool surface: honcho_profile, honcho_recall, honcho_analyze, honcho_search, honcho_context
  • -
  • CLI: setup, status, sessions, map, mode, identity
  • -
  • CLI surface injection into system prompt
  • -
  • AI peer name wired into agent identity
  • -
-
- - - - - - - diff --git a/docs/honcho-integration-spec.md b/docs/honcho-integration-spec.md deleted file mode 100644 index 7731a262d9..0000000000 --- a/docs/honcho-integration-spec.md +++ /dev/null @@ -1,377 +0,0 @@ -# honcho-integration-spec - -Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations. - ---- - -## Overview - -Two independent Honcho integrations have been built for two different agent runtimes: **Hermes Agent** (Python, baked into the runner) and **openclaw-honcho** (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, `session.context()`, `peer.chat()` — but they made different tradeoffs at every layer. - -This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language. - -> **Scope** Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive. - ---- - -## Architecture comparison - -### Hermes: baked-in runner - -Honcho is initialised directly inside `AIAgent.__init__`. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into `_cached_system_prompt`) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider. - -Turn flow: - -``` -user message - → _honcho_prefetch() (reads cache — no HTTP) - → _build_system_prompt() (first turn only, cached) - → LLM call - → response - → _honcho_fire_prefetch() (daemon threads, turn end) - → prefetch_context() thread ──┐ - → prefetch_dialectic() thread ─┴→ _context_cache / _dialectic_cache -``` - -### openclaw-honcho: hook-based plugin - -The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside `before_prompt_build` on every turn. Message capture happens in `agent_end`. The multi-agent hierarchy is tracked via `subagent_spawned`. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin. - -Turn flow: - -``` -user message - → before_prompt_build (BLOCKING HTTP — every turn) - → session.context() - → system prompt assembled - → LLM call - → response - → agent_end hook - → session.addMessages() - → session.setMetadata() -``` - ---- - -## Diff table - -| Dimension | Hermes Agent | openclaw-honcho | -|---|---|---| -| **Context injection timing** | Once per session (cached). Zero HTTP on response path after turn 1. | Every turn, blocking. Fresh context per turn but adds latency. | -| **Prefetch strategy** | Daemon threads fire at turn end; consumed next turn from cache. | None. Blocking call at prompt-build time. | -| **Dialectic (peer.chat)** | Prefetched async; result injected into system prompt next turn. | On-demand via `honcho_recall` / `honcho_analyze` tools. | -| **Reasoning level** | Dynamic: scales with message length. Floor = config default. Cap = "high". | Fixed per tool: recall=minimal, analyze=medium. | -| **Memory modes** | `user_memory_mode` / `agent_memory_mode`: hybrid / honcho / local. | None. Always writes to Honcho. | -| **Write frequency** | async (background queue), turn, session, N turns. | After every agent_end (no control). | -| **AI peer identity** | `observe_me=True`, `seed_ai_identity()`, `get_ai_representation()`, SOUL.md → AI peer. | Agent files uploaded to agent peer at setup. No ongoing self-observation. | -| **Context scope** | User peer + AI peer representation, both injected. | User peer (owner) representation + conversation summary. `peerPerspective` on context call. | -| **Session naming** | per-directory / global / manual map / title-based. | Derived from platform session key. | -| **Multi-agent** | Single-agent only. | Parent observer hierarchy via `subagent_spawned`. | -| **Tool surface** | Single `query_user_context` tool (on-demand dialectic). | 6 tools: session, profile, search, context (fast) + recall, analyze (LLM). | -| **Platform metadata** | Not stripped. | Explicitly stripped before Honcho storage. | -| **Message dedup** | None. | `lastSavedIndex` in session metadata prevents re-sending. | -| **CLI surface in prompt** | Management commands injected into system prompt. Agent knows its own CLI. | Not injected. | -| **AI peer name in identity** | Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured. | Not implemented. | -| **QMD / local file search** | Not implemented. | Passthrough tools when QMD backend configured. | -| **Workspace metadata** | Not implemented. | `agentPeerMap` in workspace metadata tracks agent→peer ID. | - ---- - -## Patterns - -Six patterns from Hermes are worth adopting in any Honcho integration. Each is described as an integration-agnostic interface. - -**Hermes contributes:** -- Async prefetch (zero-latency) -- Dynamic reasoning level -- Per-peer memory modes -- AI peer identity formation -- Session naming strategies -- CLI surface injection - -**openclaw-honcho contributes back (Hermes should adopt):** -- `lastSavedIndex` dedup -- Platform metadata stripping -- Multi-agent observer hierarchy -- `peerPerspective` on `context()` -- Tiered tool surface (fast/LLM) -- Workspace `agentPeerMap` - ---- - -## Spec: async prefetch - -### Problem - -Calling `session.context()` and `peer.chat()` synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn. - -### Pattern - -Fire both calls as non-blocking background work at the **end** of each turn. Store results in a per-session cache keyed by session ID. At the **start** of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path. - -### Interface contract - -```typescript -interface AsyncPrefetch { - // Fire context + dialectic fetches at turn end. Non-blocking. - firePrefetch(sessionId: string, userMessage: string): void; - - // Pop cached results at turn start. Returns empty if cache is cold. - popContextResult(sessionId: string): ContextResult | null; - popDialecticResult(sessionId: string): string | null; -} - -type ContextResult = { - representation: string; - card: string[]; - aiRepresentation?: string; // AI peer context if enabled - summary?: string; // conversation summary if fetched -}; -``` - -### Implementation notes - -- **Python:** `threading.Thread(daemon=True)`. Write to `dict[session_id, result]` — GIL makes this safe for simple writes. -- **TypeScript:** `Promise` stored in `Map>`. Await at pop time. If not resolved yet, return null — do not block. -- The pop is destructive: clears the cache entry after reading so stale data never accumulates. -- Prefetch should also fire on first turn (even though it won't be consumed until turn 2). - -### openclaw-honcho adoption - -Move `session.context()` from `before_prompt_build` to a post-`agent_end` background task. Store result in `state.contextCache`. In `before_prompt_build`, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn. - ---- - -## Spec: dynamic reasoning level - -### Problem - -Honcho's dialectic endpoint supports reasoning levels from `minimal` to `max`. A fixed level per tool wastes budget on simple queries and under-serves complex ones. - -### Pattern - -Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at `high` — never select `max` automatically. - -### Logic - -``` -< 120 chars → default (typically "low") -120–400 chars → one level above default (cap at "high") -> 400 chars → two levels above default (cap at "high") -``` - -### Config key - -Add `dialecticReasoningLevel` (string, default `"low"`). This sets the floor. The dynamic bump always applies on top. - -### openclaw-honcho adoption - -Apply in `honcho_recall` and `honcho_analyze`: replace fixed `reasoningLevel` with the dynamic selector. `honcho_recall` uses floor `"minimal"`, `honcho_analyze` uses floor `"medium"` — both still bump with message length. - ---- - -## Spec: per-peer memory modes - -### Problem - -Users want independent control over whether user context and agent context are written locally, to Honcho, or both. - -### Modes - -| Mode | Effect | -|---|---| -| `hybrid` | Write to both local files and Honcho (default) | -| `honcho` | Honcho only — disable corresponding local file writes | -| `local` | Local files only — skip Honcho sync for this peer | - -### Config schema - -```json -{ - "memoryMode": "hybrid", - "userMemoryMode": "honcho", - "agentMemoryMode": "hybrid" -} -``` - -Resolution order: per-peer field wins → shorthand `memoryMode` → default `"hybrid"`. - -### Effect on Honcho sync - -- `userMemoryMode=local`: skip adding user peer messages to Honcho -- `agentMemoryMode=local`: skip adding assistant peer messages to Honcho -- Both local: skip `session.addMessages()` entirely -- `userMemoryMode=honcho`: disable local USER.md writes -- `agentMemoryMode=honcho`: disable local MEMORY.md / SOUL.md writes - ---- - -## Spec: AI peer identity formation - -### Problem - -Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if `observe_me=True` is set for the agent peer. Without it, the agent peer accumulates nothing. - -Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation. - -### Part A: observe_me=True for agent peer - -```typescript -await session.addPeers([ - [ownerPeer.id, { observeMe: true, observeOthers: false }], - [agentPeer.id, { observeMe: true, observeOthers: true }], // was false -]); -``` - -One-line change. Foundational. Without it, the AI peer representation stays empty regardless of what the agent says. - -### Part B: seedAiIdentity() - -```typescript -async function seedAiIdentity( - agentPeer: Peer, - content: string, - source: string -): Promise { - const wrapped = [ - ``, - `${source}`, - ``, - content.trim(), - ``, - ].join("\n"); - - await agentPeer.addMessage("assistant", wrapped); - return true; -} -``` - -### Part C: migrate agent files at setup - -During `honcho setup`, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md) to the agent peer via `seedAiIdentity()` instead of `session.uploadFile()`. This routes content through Honcho's observation pipeline. - -### Part D: AI peer name in identity - -When the agent has a configured name, prepend it to the injected system prompt: - -```typescript -const namePrefix = agentName ? `You are ${agentName}.\n\n` : ""; -return { systemPrompt: namePrefix + "## User Memory Context\n\n" + sections }; -``` - -### CLI surface - -``` -honcho identity # seed from file -honcho identity --show # show current AI peer representation -``` - ---- - -## Spec: session naming strategies - -### Problem - -A single global session means every project shares the same Honcho context. Per-directory sessions provide isolation without requiring users to name sessions manually. - -### Strategies - -| Strategy | Session key | When to use | -|---|---|---| -| `per-directory` | basename of CWD | Default. Each project gets its own session. | -| `global` | fixed string `"global"` | Single cross-project session. | -| manual map | user-configured per path | `sessions` config map overrides directory basename. | -| title-based | sanitized session title | When agent supports named sessions set mid-conversation. | - -### Config schema - -```json -{ - "sessionStrategy": "per-directory", - "sessionPeerPrefix": false, - "sessions": { - "/home/user/projects/foo": "foo-project" - } -} -``` - -### CLI surface - -``` -honcho sessions # list all mappings -honcho map # map cwd to session name -honcho map # no-arg = list mappings -``` - -Resolution order: manual map → session title → directory basename → platform key. - ---- - -## Spec: CLI surface injection - -### Problem - -When a user asks "how do I change my memory settings?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface. - -### Pattern - -When Honcho is active, append a compact command reference to the system prompt. Keep it under 300 chars. - -``` -# Honcho memory integration -Active. Session: {sessionKey}. Mode: {mode}. -Management commands: - honcho status — show config + connection - honcho mode [hybrid|honcho|local] — show or set memory mode - honcho sessions — list session mappings - honcho map — map directory to session - honcho identity [file] [--show] — seed or show AI identity - honcho setup — full interactive wizard -``` - ---- - -## openclaw-honcho checklist - -Ordered by impact: - -- [ ] **Async prefetch** — move `session.context()` out of `before_prompt_build` into post-`agent_end` background Promise -- [ ] **observe_me=True for agent peer** — one-line change in `session.addPeers()` -- [ ] **Dynamic reasoning level** — add helper; apply in `honcho_recall` and `honcho_analyze`; add `dialecticReasoningLevel` to config -- [ ] **Per-peer memory modes** — add `userMemoryMode` / `agentMemoryMode` to config; gate Honcho sync and local writes -- [ ] **seedAiIdentity()** — add helper; use during setup migration for SOUL.md / IDENTITY.md -- [ ] **Session naming strategies** — add `sessionStrategy`, `sessions` map, `sessionPeerPrefix` -- [ ] **CLI surface injection** — append command reference to `before_prompt_build` return value -- [ ] **honcho identity subcommand** — seed from file or `--show` current representation -- [ ] **AI peer name injection** — if `aiPeer` name configured, prepend to injected system prompt -- [ ] **honcho mode / sessions / map** — CLI parity with Hermes - -Already done in openclaw-honcho (do not re-implement): `lastSavedIndex` dedup, platform metadata stripping, multi-agent parent observer, `peerPerspective` on `context()`, tiered tool surface, workspace `agentPeerMap`, QMD passthrough, self-hosted Honcho. - ---- - -## nanobot-honcho checklist - -Greenfield integration. Start from openclaw-honcho's architecture and apply all Hermes patterns from day one. - -### Phase 1 — core correctness - -- [ ] Dual peer model (owner + agent peer), both with `observe_me=True` -- [ ] Message capture at turn end with `lastSavedIndex` dedup -- [ ] Platform metadata stripping before Honcho storage -- [ ] Async prefetch from day one — do not implement blocking context injection -- [ ] Legacy file migration at first activation (USER.md → owner peer, SOUL.md → `seedAiIdentity()`) - -### Phase 2 — configuration - -- [ ] Config schema: `apiKey`, `workspaceId`, `baseUrl`, `memoryMode`, `userMemoryMode`, `agentMemoryMode`, `dialecticReasoningLevel`, `sessionStrategy`, `sessions` -- [ ] Per-peer memory mode gating -- [ ] Dynamic reasoning level -- [ ] Session naming strategies - -### Phase 3 — tools and CLI - -- [ ] Tool surface: `honcho_profile`, `honcho_recall`, `honcho_analyze`, `honcho_search`, `honcho_context` -- [ ] CLI: `setup`, `status`, `sessions`, `map`, `mode`, `identity` -- [ ] CLI surface injection into system prompt -- [ ] AI peer name wired into agent identity diff --git a/docs/migration/openclaw.md b/docs/migration/openclaw.md deleted file mode 100644 index 30f2f97e4d..0000000000 --- a/docs/migration/openclaw.md +++ /dev/null @@ -1,142 +0,0 @@ -# Migrating from OpenClaw to Hermes Agent - -This guide covers how to import your OpenClaw settings, memories, skills, and API keys into Hermes Agent. - -## Three Ways to Migrate - -### 1. Automatic (during first-time setup) - -When you run `hermes setup` for the first time and Hermes detects `~/.openclaw`, it automatically offers to import your OpenClaw data before configuration begins. Just accept the prompt and everything is handled for you. - -### 2. CLI Command (quick, scriptable) - -```bash -hermes claw migrate # Preview then migrate (always shows preview first) -hermes claw migrate --dry-run # Preview only, no changes -hermes claw migrate --preset user-data # Migrate without API keys/secrets -hermes claw migrate --yes # Skip confirmation prompt -``` - -The migration always shows a full preview of what will be imported before making any changes. You review the preview and confirm before anything is written. - -**All options:** - -| Flag | Description | -|------|-------------| -| `--source PATH` | Path to OpenClaw directory (default: `~/.openclaw`) | -| `--dry-run` | Preview only — no files are modified | -| `--preset {user-data,full}` | Migration preset (default: `full`). `user-data` excludes secrets | -| `--overwrite` | Overwrite existing files (default: skip conflicts) | -| `--migrate-secrets` | Include allowlisted secrets (auto-enabled with `full` preset) | -| `--workspace-target PATH` | Copy workspace instructions (AGENTS.md) to this absolute path | -| `--skill-conflict {skip,overwrite,rename}` | How to handle skill name conflicts (default: `skip`) | -| `--yes`, `-y` | Skip confirmation prompts | - -### 3. Agent-Guided (interactive, with previews) - -Ask the agent to run the migration for you: - -``` -> Migrate my OpenClaw setup to Hermes -``` - -The agent will use the `openclaw-migration` skill to: -1. Run a preview first to show what would change -2. Ask about conflict resolution (SOUL.md, skills, etc.) -3. Let you choose between `user-data` and `full` presets -4. Execute the migration with your choices -5. Print a detailed summary of what was migrated - -## What Gets Migrated - -### `user-data` preset -| Item | Source | Destination | -|------|--------|-------------| -| SOUL.md | `~/.openclaw/workspace/SOUL.md` | `~/.hermes/SOUL.md` | -| Memory entries | `~/.openclaw/workspace/MEMORY.md` | `~/.hermes/memories/MEMORY.md` | -| User profile | `~/.openclaw/workspace/USER.md` | `~/.hermes/memories/USER.md` | -| Skills | `~/.openclaw/workspace/skills/` | `~/.hermes/skills/openclaw-imports/` | -| Command allowlist | `~/.openclaw/workspace/exec_approval_patterns.yaml` | Merged into `~/.hermes/config.yaml` | -| Messaging settings | `~/.openclaw/config.yaml` (TELEGRAM_ALLOWED_USERS, MESSAGING_CWD) | `~/.hermes/.env` | -| TTS assets | `~/.openclaw/workspace/tts/` | `~/.hermes/tts/` | - -Workspace files are also checked at `workspace.default/` and `workspace-main/` as fallback paths (OpenClaw renamed `workspace/` to `workspace-main/` in recent versions). - -### `full` preset (adds to `user-data`) -| Item | Source | Destination | -|------|--------|-------------| -| Telegram bot token | `openclaw.json` channels config | `~/.hermes/.env` | -| OpenRouter API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | -| OpenAI API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | -| Anthropic API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | -| ElevenLabs API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | - -API keys are searched across four sources: inline config values, `~/.openclaw/.env`, the `openclaw.json` `"env"` sub-object, and per-agent auth profiles. - -Only allowlisted secrets are ever imported. Other credentials are skipped and reported. - -## OpenClaw Schema Compatibility - -The migration handles both old and current OpenClaw config layouts: - -- **Channel tokens**: Reads from flat paths (`channels.telegram.botToken`) and the newer `accounts.default` layout (`channels.telegram.accounts.default.botToken`) -- **TTS provider**: OpenClaw renamed "edge" to "microsoft" — both are recognized and mapped to Hermes' "edge" -- **Provider API types**: Both short (`openai`, `anthropic`) and hyphenated (`openai-completions`, `anthropic-messages`, `google-generative-ai`) values are mapped correctly -- **thinkingDefault**: All enum values are handled including newer ones (`minimal`, `xhigh`, `adaptive`) -- **Matrix**: Uses `accessToken` field (not `botToken`) -- **SecretRef formats**: Plain strings, env templates (`${VAR}`), and `source: "env"` SecretRefs are resolved. `source: "file"` and `source: "exec"` SecretRefs produce a warning — add those keys manually after migration. - -## Conflict Handling - -By default, the migration **will not overwrite** existing Hermes data: - -- **SOUL.md** — skipped if one already exists in `~/.hermes/` -- **Memory entries** — skipped if memories already exist (to avoid duplicates) -- **Skills** — skipped if a skill with the same name already exists -- **API keys** — skipped if the key is already set in `~/.hermes/.env` - -To overwrite conflicts, use `--overwrite`. The migration creates backups before overwriting. - -For skills, you can also use `--skill-conflict rename` to import conflicting skills under a new name (e.g., `skill-name-imported`). - -## Migration Report - -Every migration produces a report showing: -- **Migrated items** — what was successfully imported -- **Conflicts** — items skipped because they already exist -- **Skipped items** — items not found in the source -- **Errors** — items that failed to import - -For executed migrations, the full report is saved to `~/.hermes/migration/openclaw//`. - -## Post-Migration Notes - -- **Skills require a new session** — imported skills take effect after restarting your agent or starting a new chat. -- **WhatsApp requires re-pairing** — WhatsApp uses QR-code pairing, not token-based auth. Run `hermes whatsapp` to pair. -- **Archive cleanup** — after migration, you'll be offered to rename `~/.openclaw/` to `.openclaw.pre-migration/` to prevent state confusion. You can also run `hermes claw cleanup` later. - -## Troubleshooting - -### "OpenClaw directory not found" -The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moltbot`. If your OpenClaw is installed elsewhere, use `--source`: -```bash -hermes claw migrate --source /path/to/.openclaw -``` - -### "Migration script not found" -The migration script ships with Hermes Agent. If you installed via pip (not git clone), the `optional-skills/` directory may not be present. Install the skill from the Skills Hub: -```bash -hermes skills install openclaw-migration -``` - -### Memory overflow -If your OpenClaw MEMORY.md or USER.md exceeds Hermes' character limits, excess entries are exported to an overflow file in the migration report directory. You can manually review and add the most important ones. - -### API keys not found -Keys might be stored in different places depending on your OpenClaw setup: -- `~/.openclaw/.env` file -- Inline in `openclaw.json` under `models.providers.*.apiKey` -- In `openclaw.json` under the `"env"` or `"env.vars"` sub-objects -- In `~/.openclaw/agents/main/agent/auth-profiles.json` - -The migration checks all four. If keys use `source: "file"` or `source: "exec"` SecretRefs, they can't be resolved automatically — add them via `hermes config set`. diff --git a/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md b/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md deleted file mode 100644 index a75f14ff5a..0000000000 --- a/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md +++ /dev/null @@ -1,608 +0,0 @@ -# Pricing Accuracy Architecture - -Date: 2026-03-16 - -## Goal - -Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path. - -This design replaces the current static, heuristic pricing flow in: - -- `run_agent.py` -- `agent/usage_pricing.py` -- `agent/insights.py` -- `cli.py` - -with a provider-aware pricing system that: - -- handles cache billing correctly -- distinguishes `actual` vs `estimated` vs `included` vs `unknown` -- reconciles post-hoc costs when providers expose authoritative billing data -- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints - -## Problems In The Current Design - -Current Hermes behavior has four structural issues: - -1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately. -2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing. -3. It assumes public API list pricing matches the user's real billing path. -4. It has no distinction between live estimates and reconciled billed cost. - -## Design Principles - -1. Normalize usage before pricing. -2. Never fold cached tokens into plain input cost. -3. Track certainty explicitly. -4. Treat the billing path as part of the model identity. -5. Prefer official machine-readable sources over scraped docs. -6. Use post-hoc provider cost APIs when available. -7. Show `n/a` rather than inventing precision. - -## High-Level Architecture - -The new system has four layers: - -1. `usage_normalization` - Converts raw provider usage into a canonical usage record. -2. `pricing_source_resolution` - Determines the billing path, source of truth, and applicable pricing source. -3. `cost_estimation_and_reconciliation` - Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later. -4. `presentation` - `/usage`, `/insights`, and the status bar display cost with certainty metadata. - -## Canonical Usage Record - -Add a canonical usage model that every provider path maps into before any pricing math happens. - -Suggested structure: - -```python -@dataclass -class CanonicalUsage: - provider: str - billing_provider: str - model: str - billing_route: str - - input_tokens: int = 0 - output_tokens: int = 0 - cache_read_tokens: int = 0 - cache_write_tokens: int = 0 - reasoning_tokens: int = 0 - request_count: int = 1 - - raw_usage: dict[str, Any] | None = None - raw_usage_fields: dict[str, str] | None = None - computed_fields: set[str] | None = None - - provider_request_id: str | None = None - provider_generation_id: str | None = None - provider_response_id: str | None = None -``` - -Rules: - -- `input_tokens` means non-cached input only. -- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`. -- `output_tokens` excludes cache metrics. -- `reasoning_tokens` is telemetry unless a provider officially bills it separately. - -This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids. - -## Provider Normalization Rules - -### OpenAI Direct - -Source usage fields: - -- `prompt_tokens` -- `completion_tokens` -- `prompt_tokens_details.cached_tokens` - -Normalization: - -- `cache_read_tokens = cached_tokens` -- `input_tokens = prompt_tokens - cached_tokens` -- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route -- `output_tokens = completion_tokens` - -### Anthropic Direct - -Source usage fields: - -- `input_tokens` -- `output_tokens` -- `cache_read_input_tokens` -- `cache_creation_input_tokens` - -Normalization: - -- `input_tokens = input_tokens` -- `output_tokens = output_tokens` -- `cache_read_tokens = cache_read_input_tokens` -- `cache_write_tokens = cache_creation_input_tokens` - -### OpenRouter - -Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible. - -Reconciliation-time records should also store: - -- OpenRouter generation id -- native token fields when available -- `total_cost` -- `cache_discount` -- `upstream_inference_cost` -- `is_byok` - -### Gemini / Vertex - -Use official Gemini or Vertex usage fields where available. - -If cached content tokens are exposed: - -- map them to `cache_read_tokens` - -If a route exposes no cache creation metric: - -- store `cache_write_tokens = 0` -- preserve the raw usage payload for later extension - -### DeepSeek And Other Direct Providers - -Normalize only the fields that are officially exposed. - -If a provider does not expose cache buckets: - -- do not infer them unless the provider explicitly documents how to derive them - -### Subscription / Included-Cost Routes - -These still use the canonical usage model. - -Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists. - -## Billing Route Model - -Hermes must stop keying pricing solely by `model`. - -Introduce a billing route descriptor: - -```python -@dataclass -class BillingRoute: - provider: str - base_url: str | None - model: str - billing_mode: str - organization_hint: str | None = None -``` - -`billing_mode` values: - -- `official_cost_api` -- `official_generation_api` -- `official_models_api` -- `official_docs_snapshot` -- `subscription_included` -- `user_override` -- `custom_contract` -- `unknown` - -Examples: - -- OpenAI direct API with Costs API access: `official_cost_api` -- Anthropic direct API with Usage & Cost API access: `official_cost_api` -- OpenRouter request before reconciliation: `official_models_api` -- OpenRouter request after generation lookup: `official_generation_api` -- GitHub Copilot style subscription route: `subscription_included` -- local OpenAI-compatible server: `unknown` -- enterprise contract with configured rates: `custom_contract` - -## Cost Status Model - -Every displayed cost should have: - -```python -@dataclass -class CostResult: - amount_usd: Decimal | None - status: Literal["actual", "estimated", "included", "unknown"] - source: Literal[ - "provider_cost_api", - "provider_generation_api", - "provider_models_api", - "official_docs_snapshot", - "user_override", - "custom_contract", - "none", - ] - label: str - fetched_at: datetime | None - pricing_version: str | None - notes: list[str] -``` - -Presentation rules: - -- `actual`: show dollar amount as final -- `estimated`: show dollar amount with estimate labeling -- `included`: show `included` or `$0.00 (included)` depending on UX choice -- `unknown`: show `n/a` - -## Official Source Hierarchy - -Resolve cost using this order: - -1. Request-level or account-level official billed cost -2. Official machine-readable model pricing -3. Official docs snapshot -4. User override or custom contract -5. Unknown - -The system must never skip to a lower level if a higher-confidence source exists for the current billing route. - -## Provider-Specific Truth Rules - -### OpenAI Direct - -Preferred truth: - -1. Costs API for reconciled spend -2. Official pricing page for live estimate - -### Anthropic Direct - -Preferred truth: - -1. Usage & Cost API for reconciled spend -2. Official pricing docs for live estimate - -### OpenRouter - -Preferred truth: - -1. `GET /api/v1/generation` for reconciled `total_cost` -2. `GET /api/v1/models` pricing for live estimate - -Do not use underlying provider public pricing as the source of truth for OpenRouter billing. - -### Gemini / Vertex - -Preferred truth: - -1. official billing export or billing API for reconciled spend when available for the route -2. official pricing docs for estimate - -### DeepSeek - -Preferred truth: - -1. official machine-readable cost source if available in the future -2. official pricing docs snapshot today - -### Subscription-Included Routes - -Preferred truth: - -1. explicit route config marking the model as included in subscription - -These should display `included`, not an API list-price estimate. - -### Custom Endpoint / Local Model - -Preferred truth: - -1. user override -2. custom contract config -3. unknown - -These should default to `unknown`. - -## Pricing Catalog - -Replace the current `MODEL_PRICING` dict with a richer pricing catalog. - -Suggested record: - -```python -@dataclass -class PricingEntry: - provider: str - route_pattern: str - model_pattern: str - - input_cost_per_million: Decimal | None = None - output_cost_per_million: Decimal | None = None - cache_read_cost_per_million: Decimal | None = None - cache_write_cost_per_million: Decimal | None = None - request_cost: Decimal | None = None - image_cost: Decimal | None = None - - source: str = "official_docs_snapshot" - source_url: str | None = None - fetched_at: datetime | None = None - pricing_version: str | None = None -``` - -The catalog should be route-aware: - -- `openai:gpt-5` -- `anthropic:claude-opus-4-6` -- `openrouter:anthropic/claude-opus-4.6` -- `copilot:gpt-4o` - -This avoids conflating direct-provider billing with aggregator billing. - -## Pricing Sync Architecture - -Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table. - -Suggested modules: - -- `agent/pricing/catalog.py` -- `agent/pricing/sources.py` -- `agent/pricing/sync.py` -- `agent/pricing/reconcile.py` -- `agent/pricing/types.py` - -### Sync Sources - -- OpenRouter models API -- official provider docs snapshots where no API exists -- user overrides from config - -### Sync Output - -Cache pricing entries locally with: - -- source URL -- fetch timestamp -- version/hash -- confidence/source type - -### Sync Frequency - -- startup warm cache -- background refresh every 6 to 24 hours depending on source -- manual `hermes pricing sync` - -## Reconciliation Architecture - -Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost. - -Suggested flow: - -1. Agent call completes. -2. Hermes stores canonical usage plus reconciliation ids. -3. Hermes computes an immediate estimate if a pricing source exists. -4. A reconciliation worker fetches actual cost when supported. -5. Session and message records are updated with `actual` cost. - -This can run: - -- inline for cheap lookups -- asynchronously for delayed provider accounting - -## Persistence Changes - -Session storage should stop storing only aggregate prompt/completion totals. - -Add fields for both usage and cost certainty: - -- `input_tokens` -- `output_tokens` -- `cache_read_tokens` -- `cache_write_tokens` -- `reasoning_tokens` -- `estimated_cost_usd` -- `actual_cost_usd` -- `cost_status` -- `cost_source` -- `pricing_version` -- `billing_provider` -- `billing_mode` - -If schema expansion is too large for one PR, add a new pricing events table: - -```text -session_cost_events - id - session_id - request_id - provider - model - billing_mode - input_tokens - output_tokens - cache_read_tokens - cache_write_tokens - estimated_cost_usd - actual_cost_usd - cost_status - cost_source - pricing_version - created_at - updated_at -``` - -## Hermes Touchpoints - -### `run_agent.py` - -Current responsibility: - -- parse raw provider usage -- update session token counters - -New responsibility: - -- build `CanonicalUsage` -- update canonical counters -- store reconciliation ids -- emit usage event to pricing subsystem - -### `agent/usage_pricing.py` - -Current responsibility: - -- static lookup table -- direct cost arithmetic - -New responsibility: - -- move or replace with pricing catalog facade -- no fuzzy model-family heuristics -- no direct pricing without billing-route context - -### `cli.py` - -Current responsibility: - -- compute session cost directly from prompt/completion totals - -New responsibility: - -- display `CostResult` -- show status badges: - - `actual` - - `estimated` - - `included` - - `n/a` - -### `agent/insights.py` - -Current responsibility: - -- recompute historical estimates from static pricing - -New responsibility: - -- aggregate stored pricing events -- prefer actual cost over estimate -- surface estimates only when reconciliation is unavailable - -## UX Rules - -### Status Bar - -Show one of: - -- `$1.42` -- `~$1.42` -- `included` -- `cost n/a` - -Where: - -- `$1.42` means `actual` -- `~$1.42` means `estimated` -- `included` means subscription-backed or explicitly zero-cost route -- `cost n/a` means unknown - -### `/usage` - -Show: - -- token buckets -- estimated cost -- actual cost if available -- cost status -- pricing source - -### `/insights` - -Aggregate: - -- actual cost totals -- estimated-only totals -- unknown-cost sessions count -- included-cost sessions count - -## Config And Overrides - -Add user-configurable pricing overrides in config: - -```yaml -pricing: - mode: hybrid - sync_on_startup: true - sync_interval_hours: 12 - overrides: - - provider: openrouter - model: anthropic/claude-opus-4.6 - billing_mode: custom_contract - input_cost_per_million: 4.25 - output_cost_per_million: 22.0 - cache_read_cost_per_million: 0.5 - cache_write_cost_per_million: 6.0 - included_routes: - - provider: copilot - model: "*" - - provider: codex-subscription - model: "*" -``` - -Overrides must win over catalog defaults for the matching billing route. - -## Rollout Plan - -### Phase 1 - -- add canonical usage model -- split cache token buckets in `run_agent.py` -- stop pricing cache-inflated prompt totals -- preserve current UI with improved backend math - -### Phase 2 - -- add route-aware pricing catalog -- integrate OpenRouter models API sync -- add `estimated` vs `included` vs `unknown` - -### Phase 3 - -- add reconciliation for OpenRouter generation cost -- add actual cost persistence -- update `/insights` to prefer actual cost - -### Phase 4 - -- add direct OpenAI and Anthropic reconciliation paths -- add user overrides and contract pricing -- add pricing sync CLI command - -## Testing Strategy - -Add tests for: - -- OpenAI cached token subtraction -- Anthropic cache read/write separation -- OpenRouter estimated vs actual reconciliation -- subscription-backed models showing `included` -- custom endpoints showing `n/a` -- override precedence -- stale catalog fallback behavior - -Current tests that assume heuristic pricing should be replaced with route-aware expectations. - -## Non-Goals - -- exact enterprise billing reconstruction without an official source or user override -- backfilling perfect historical cost for old sessions that lack cache bucket data -- scraping arbitrary provider web pages at request time - -## Recommendation - -Do not expand the existing `MODEL_PRICING` dict. - -That path cannot satisfy the product requirement. Hermes should instead migrate to: - -- canonical usage normalization -- route-aware pricing sources -- estimate-then-reconcile cost lifecycle -- explicit certainty states in the UI - -This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible. diff --git a/docs/skins/example-skin.yaml b/docs/skins/example-skin.yaml deleted file mode 100644 index b81ae00f8d..0000000000 --- a/docs/skins/example-skin.yaml +++ /dev/null @@ -1,97 +0,0 @@ -# ============================================================================ -# Hermes Agent — Example Skin Template -# ============================================================================ -# -# Copy this file to ~/.hermes/skins/.yaml to create a custom skin. -# All fields are optional — missing values inherit from the default skin. -# Activate with: /skin or display.skin: in config.yaml -# -# See hermes_cli/skin_engine.py for the full schema reference. -# ============================================================================ - -# Required: unique skin name (used in /skin command and config) -name: example -description: An example custom skin — copy and modify this template - -# ── Colors ────────────────────────────────────────────────────────────────── -# Hex color values for Rich markup. These control the CLI's visual palette. -colors: - # Banner panel (the startup welcome box) - banner_border: "#CD7F32" # Panel border - banner_title: "#FFD700" # Panel title text - banner_accent: "#FFBF00" # Section headers (Available Tools, Skills, etc.) - banner_dim: "#B8860B" # Dim/muted text (separators, model info) - banner_text: "#FFF8DC" # Body text (tool names, skill names) - - # UI elements - ui_accent: "#FFBF00" # General accent color - ui_label: "#4dd0e1" # Labels - ui_ok: "#4caf50" # Success indicators - ui_error: "#ef5350" # Error indicators - ui_warn: "#ffa726" # Warning indicators - - # Input area - prompt: "#FFF8DC" # Prompt text color - input_rule: "#CD7F32" # Horizontal rule around input - - # Response box - response_border: "#FFD700" # Response box border (ANSI color) - - # Session display - session_label: "#DAA520" # Session label - session_border: "#8B8682" # Session ID dim color - - # TUI surfaces - status_bar_bg: "#1a1a2e" # Status / usage bar background - voice_status_bg: "#1a1a2e" # Voice-mode badge background - completion_menu_bg: "#1a1a2e" # Completion list background - completion_menu_current_bg: "#333355" # Active completion row background - completion_menu_meta_bg: "#1a1a2e" # Completion meta column background - completion_menu_meta_current_bg: "#333355" # Active completion meta background - -# ── Spinner ───────────────────────────────────────────────────────────────── -# Customize the animated spinner shown during API calls and tool execution. -spinner: - # Faces shown while waiting for the API response - waiting_faces: - - "(。◕‿◕。)" - - "(◕‿◕✿)" - - "٩(◕‿◕。)۶" - - # Faces shown during extended thinking/reasoning - thinking_faces: - - "(。•́︿•̀。)" - - "(◔_◔)" - - "(¬‿¬)" - - # Verbs used in spinner messages (e.g., "pondering your request...") - thinking_verbs: - - "pondering" - - "contemplating" - - "musing" - - "ruminating" - - # Optional: left/right decorations around the spinner - # Each entry is a [left, right] pair. Omit entirely for no wings. - # wings: - # - ["⟪⚔", "⚔⟫"] - # - ["⟪▲", "▲⟫"] - -# ── Branding ──────────────────────────────────────────────────────────────── -# Text strings used throughout the CLI interface. -branding: - agent_name: "Hermes Agent" # Banner title, about display - welcome: "Welcome! Type your message or /help for commands." - goodbye: "Goodbye! ⚕" # Exit message - response_label: " ⚕ Hermes " # Response box header label - prompt_symbol: "❯ " # Input prompt symbol - help_header: "(^_^)? Available Commands" # /help header text - -# ── Tool Output ───────────────────────────────────────────────────────────── -# Character used as the prefix for tool output lines. -# Default is "┊" (thin dotted vertical line). Some alternatives: -# "╎" (light triple dash vertical) -# "▏" (left one-eighth block) -# "│" (box drawing light vertical) -# "┃" (box drawing heavy vertical) -tool_prefix: "┊" diff --git a/docs/specs/container-cli-review-fixes.md b/docs/specs/container-cli-review-fixes.md deleted file mode 100644 index 0eb9070dbf..0000000000 --- a/docs/specs/container-cli-review-fixes.md +++ /dev/null @@ -1,329 +0,0 @@ -# Container-Aware CLI Review Fixes Spec - -**PR:** NousResearch/hermes-agent#7543 -**Review:** cursor[bot] bugbot review (4094049442) + two prior rounds -**Date:** 2026-04-12 -**Branch:** `feat/container-aware-cli-clean` - -## Review Issues Summary - -Six issues were raised across three bugbot review rounds. Three were fixed in intermediate commits (38277a6a, 726cf90f). This spec addresses remaining design concerns surfaced by those reviews and simplifies the implementation based on interview decisions. - -| # | Issue | Severity | Status | -|---|-------|----------|--------| -| 1 | `os.execvp` retry loop unreachable | Medium | Fixed in 79e8cd12 (switched to subprocess.run) | -| 2 | Redundant `shutil.which("sudo")` | Medium | Fixed in 38277a6a (reuses `sudo` var) | -| 3 | Missing `chown -h` on symlink update | Low | Fixed in 38277a6a | -| 4 | Container routing after `parse_args()` | High | Fixed in 726cf90f | -| 5 | Hardcoded `/home/${user}` | Medium | Fixed in 726cf90f | -| 6 | Group membership not gated on `container.enable` | Low | Fixed in 726cf90f | - -The mechanical fixes are in place but the overall design needs revision. The retry loop, error swallowing, and process model have deeper issues than what the bugbot flagged. - ---- - -## Spec: Revised `_exec_in_container` - -### Design Principles - -1. **Let it crash.** No silent fallbacks. If `.container-mode` exists but something goes wrong, the error propagates naturally (Python traceback). The only case where container routing is skipped is when `.container-mode` doesn't exist or `HERMES_DEV=1`. -2. **No retries.** Probe once for sudo, exec once. If it fails, docker/podman's stderr reaches the user verbatim. -3. **Completely transparent.** No error wrapping, no prefixes, no spinners. Docker's output goes straight through. -4. **`os.execvp` on the happy path.** Replace the Python process entirely so there's no idle parent during interactive sessions. Note: `execvp` never returns on success (process is replaced) and raises `OSError` on failure (it does not return a value). The container process's exit code becomes the process exit code by definition — no explicit propagation needed. -5. **One human-readable exception to "let it crash".** `subprocess.TimeoutExpired` from the sudo probe gets a specific catch with a readable message, since a raw traceback for "your Docker daemon is slow" is confusing. All other exceptions propagate naturally. - -### Execution Flow - -``` -1. get_container_exec_info() - - HERMES_DEV=1 → return None (skip routing) - - Inside container → return None (skip routing) - - .container-mode doesn't exist → return None (skip routing) - - .container-mode exists → parse and return dict - - .container-mode exists but malformed/unreadable → LET IT CRASH (no try/except) - -2. _exec_in_container(container_info, sys.argv[1:]) - a. shutil.which(backend) → if None, print "{backend} not found on PATH" and sys.exit(1) - b. Sudo probe: subprocess.run([runtime, "inspect", "--format", "ok", container_name], timeout=15) - - If succeeds → needs_sudo = False - - If fails → try subprocess.run([sudo, "-n", runtime, "inspect", ...], timeout=15) - - If succeeds → needs_sudo = True - - If fails → print error with sudoers hint (including why -n is required) and sys.exit(1) - - If TimeoutExpired → catch specifically, print human-readable message about slow daemon - c. Build exec_cmd: [sudo? + runtime, "exec", tty_flags, "-u", exec_user, env_flags, container, hermes_bin, *cli_args] - d. os.execvp(exec_cmd[0], exec_cmd) - - On success: process is replaced — Python is gone, container exit code IS the process exit code - - On OSError: let it crash (natural traceback) -``` - -### Changes to `hermes_cli/main.py` - -#### `_exec_in_container` — rewrite - -Remove: -- The entire retry loop (`max_retries`, `for attempt in range(...)`) -- Spinner logic (`"Waiting for container..."`, dots) -- Exit code classification (125/126/127 handling) -- `subprocess.run` for the exec call (keep it only for the sudo probe) -- Special TTY vs non-TTY retry counts -- The `time` import (no longer needed) - -Change: -- Use `os.execvp(exec_cmd[0], exec_cmd)` as the final call -- Keep the `subprocess` import only for the sudo probe -- Keep TTY detection for the `-it` vs `-i` flag -- Keep env var forwarding (TERM, COLORTERM, LANG, LC_ALL) -- Keep the sudo probe as-is (it's the one "smart" part) -- Bump probe `timeout` from 5s to 15s — cold podman on a loaded machine needs headroom -- Catch `subprocess.TimeoutExpired` specifically on both probe calls — print a readable message about the daemon being unresponsive instead of a raw traceback -- Expand the sudoers hint error message to explain *why* `-n` (non-interactive) is required: a password prompt would hang the CLI or break piped commands - -The function becomes roughly: - -```python -def _exec_in_container(container_info: dict, cli_args: list): - """Replace the current process with a command inside the managed container. - - Probes whether sudo is needed (rootful containers), then os.execvp - into the container. If exec fails, the OS error propagates naturally. - """ - import shutil - import subprocess - - backend = container_info["backend"] - container_name = container_info["container_name"] - exec_user = container_info["exec_user"] - hermes_bin = container_info["hermes_bin"] - - runtime = shutil.which(backend) - if not runtime: - print(f"Error: {backend} not found on PATH. Cannot route to container.", - file=sys.stderr) - sys.exit(1) - - # Probe whether we need sudo to see the rootful container. - # Timeout is 15s — cold podman on a loaded machine can take a while. - # TimeoutExpired is caught specifically for a human-readable message; - # all other exceptions propagate naturally. - needs_sudo = False - sudo = None - try: - probe = subprocess.run( - [runtime, "inspect", "--format", "ok", container_name], - capture_output=True, text=True, timeout=15, - ) - except subprocess.TimeoutExpired: - print( - f"Error: timed out waiting for {backend} to respond.\n" - f"The {backend} daemon may be unresponsive or starting up.", - file=sys.stderr, - ) - sys.exit(1) - - if probe.returncode != 0: - sudo = shutil.which("sudo") - if sudo: - try: - probe2 = subprocess.run( - [sudo, "-n", runtime, "inspect", "--format", "ok", container_name], - capture_output=True, text=True, timeout=15, - ) - except subprocess.TimeoutExpired: - print( - f"Error: timed out waiting for sudo {backend} to respond.", - file=sys.stderr, - ) - sys.exit(1) - - if probe2.returncode == 0: - needs_sudo = True - else: - print( - f"Error: container '{container_name}' not found via {backend}.\n" - f"\n" - f"The NixOS service runs the container as root. Your user cannot\n" - f"see it because {backend} uses per-user namespaces.\n" - f"\n" - f"Fix: grant passwordless sudo for {backend}. The -n (non-interactive)\n" - f"flag is required because the CLI calls sudo non-interactively —\n" - f"a password prompt would hang or break piped commands:\n" - f"\n" - f' security.sudo.extraRules = [{{\n' - f' users = [ "{os.getenv("USER", "your-user")}" ];\n' - f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n' - f' }}];\n' - f"\n" - f"Or run: sudo hermes {' '.join(cli_args)}", - file=sys.stderr, - ) - sys.exit(1) - else: - print( - f"Error: container '{container_name}' not found via {backend}.\n" - f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}", - file=sys.stderr, - ) - sys.exit(1) - - is_tty = sys.stdin.isatty() - tty_flags = ["-it"] if is_tty else ["-i"] - - env_flags = [] - for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"): - val = os.environ.get(var) - if val: - env_flags.extend(["-e", f"{var}={val}"]) - - cmd_prefix = [sudo, "-n", runtime] if needs_sudo else [runtime] - exec_cmd = ( - cmd_prefix + ["exec"] - + tty_flags - + ["-u", exec_user] - + env_flags - + [container_name, hermes_bin] - + cli_args - ) - - # execvp replaces this process entirely — it never returns on success. - # On failure it raises OSError, which propagates naturally. - os.execvp(exec_cmd[0], exec_cmd) -``` - -#### Container routing call site in `main()` — remove try/except - -Current: -```python -try: - from hermes_cli.config import get_container_exec_info - container_info = get_container_exec_info() - if container_info: - _exec_in_container(container_info, sys.argv[1:]) - sys.exit(1) # exec failed if we reach here -except SystemExit: - raise -except Exception: - pass # Container routing unavailable, proceed locally -``` - -Revised: -```python -from hermes_cli.config import get_container_exec_info -container_info = get_container_exec_info() -if container_info: - _exec_in_container(container_info, sys.argv[1:]) - # Unreachable: os.execvp never returns on success (process is replaced) - # and raises OSError on failure (which propagates as a traceback). - # This line exists only as a defensive assertion. - sys.exit(1) -``` - -No try/except. If `.container-mode` doesn't exist, `get_container_exec_info()` returns `None` and we skip routing. If it exists but is broken, the exception propagates with a natural traceback. - -Note: `sys.exit(1)` after `_exec_in_container` is dead code in all paths — `os.execvp` either replaces the process or raises. It's kept as a belt-and-suspenders assertion with a comment marking it unreachable, not as actual error handling. - -### Changes to `hermes_cli/config.py` - -#### `get_container_exec_info` — remove inner try/except - -Current code catches `(OSError, IOError)` and returns `None`. This silently hides permission errors, corrupt files, etc. - -Change: Remove the try/except around file reading. Keep the early returns for `HERMES_DEV=1` and `_is_inside_container()`. The `FileNotFoundError` from `open()` when `.container-mode` doesn't exist should still return `None` (this is the "container mode not enabled" case). All other exceptions propagate. - -```python -def get_container_exec_info() -> Optional[dict]: - if os.environ.get("HERMES_DEV") == "1": - return None - if _is_inside_container(): - return None - - container_mode_file = get_hermes_home() / ".container-mode" - - try: - with open(container_mode_file, "r") as f: - # ... parse key=value lines ... - except FileNotFoundError: - return None - # All other exceptions (PermissionError, malformed data, etc.) propagate - - return { ... } -``` - ---- - -## Spec: NixOS Module Changes - -### Symlink creation — simplify to two branches - -Current: 4 branches (symlink exists, directory exists, other file, doesn't exist). - -Revised: 2 branches. - -```bash -if [ -d "${symlinkPath}" ] && [ ! -L "${symlinkPath}" ]; then - # Real directory — back it up, then create symlink - _backup="${symlinkPath}.bak.$(date +%s)" - echo "hermes-agent: backing up existing ${symlinkPath} to $_backup" - mv "${symlinkPath}" "$_backup" -fi -# For everything else (symlink, doesn't exist, etc.) — just force-create -ln -sfn "${target}" "${symlinkPath}" -chown -h ${user}:${cfg.group} "${symlinkPath}" -``` - -`ln -sfn` handles: existing symlink (replaces), doesn't exist (creates), and after the `mv` above (creates). The only case that needs special handling is a real directory, because `ln -sfn` cannot atomically replace a directory. - -Note: there is a theoretical race between the `[ -d ... ]` check and the `mv` (something could create/remove the directory in between). In practice this is a NixOS activation script running as root during `nixos-rebuild switch` — no other process should be touching `~/.hermes` at that moment. Not worth adding locking for. - -### Sudoers — document, don't auto-configure - -Do NOT add `security.sudo.extraRules` to the module. Document the sudoers requirement in the module's description/comments and in the error message the CLI prints when sudo probe fails. - -### Group membership gating — keep as-is - -The fix in 726cf90f (`cfg.container.enable && cfg.container.hostUsers != []`) is correct. Leftover group membership when container mode is disabled is harmless. No cleanup needed. - ---- - -## Spec: Test Rewrite - -The existing test file (`tests/hermes_cli/test_container_aware_cli.py`) has 16 tests. With the simplified exec model, several are obsolete. - -### Tests to keep (update as needed) - -- `test_is_inside_container_dockerenv` — unchanged -- `test_is_inside_container_containerenv` — unchanged -- `test_is_inside_container_cgroup_docker` — unchanged -- `test_is_inside_container_false_on_host` — unchanged -- `test_get_container_exec_info_returns_metadata` — unchanged -- `test_get_container_exec_info_none_inside_container` — unchanged -- `test_get_container_exec_info_none_without_file` — unchanged -- `test_get_container_exec_info_skipped_when_hermes_dev` — unchanged -- `test_get_container_exec_info_not_skipped_when_hermes_dev_zero` — unchanged -- `test_get_container_exec_info_defaults` — unchanged -- `test_get_container_exec_info_docker_backend` — unchanged - -### Tests to add - -- `test_get_container_exec_info_crashes_on_permission_error` — verify that `PermissionError` propagates (no silent `None` return) -- `test_exec_in_container_calls_execvp` — verify `os.execvp` is called with correct args (runtime, tty flags, user, env, container, binary, cli args) -- `test_exec_in_container_sudo_probe_sets_prefix` — verify that when first probe fails and sudo probe succeeds, `os.execvp` is called with `sudo -n` prefix -- `test_exec_in_container_no_runtime_hard_fails` — keep existing, verify `sys.exit(1)` when `shutil.which` returns None -- `test_exec_in_container_non_tty_uses_i_only` — update to check `os.execvp` args instead of `subprocess.run` args -- `test_exec_in_container_probe_timeout_prints_message` — verify that `subprocess.TimeoutExpired` from the probe produces a human-readable error and `sys.exit(1)`, not a raw traceback -- `test_exec_in_container_container_not_running_no_sudo` — verify the path where runtime exists (`shutil.which` returns a path) but probe returns non-zero and no sudo is available. Should print the "container may be running under root" error. This is distinct from `no_runtime_hard_fails` which covers `shutil.which` returning None. - -### Tests to delete - -- `test_exec_in_container_tty_retries_on_container_failure` — retry loop removed -- `test_exec_in_container_non_tty_retries_silently_exits_126` — retry loop removed -- `test_exec_in_container_propagates_hermes_exit_code` — no subprocess.run to check exit codes; execvp replaces the process. Note: exit code propagation still works correctly — when `os.execvp` succeeds, the container's process *becomes* this process, so its exit code is the process exit code by OS semantics. No application code needed, no test needed. A comment in the function docstring documents this intent for future readers. - ---- - -## Out of Scope - -- Auto-configuring sudoers rules in the NixOS module -- Any changes to `get_container_exec_info` parsing logic beyond the try/except narrowing -- Changes to `.container-mode` file format -- Changes to the `HERMES_DEV=1` bypass -- Changes to container detection logic (`_is_inside_container`) diff --git a/flake.lock b/flake.lock index 78ceba92d7..305b79526e 100644 --- a/flake.lock +++ b/flake.lock @@ -36,6 +36,26 @@ "type": "github" } }, + "npm-lockfile-fix": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1775903712, + "narHash": "sha256-2GV79U6iVH4gKAPWYrxUReB0S41ty/Y3dBLquU8AlaA=", + "owner": "jeslie0", + "repo": "npm-lockfile-fix", + "rev": "c6093acb0c0548e0f9b8b3d82918823721930fe8", + "type": "github" + }, + "original": { + "owner": "jeslie0", + "repo": "npm-lockfile-fix", + "type": "github" + } + }, "pyproject-build-systems": { "inputs": { "nixpkgs": [ @@ -124,6 +144,7 @@ "inputs": { "flake-parts": "flake-parts", "nixpkgs": "nixpkgs", + "npm-lockfile-fix": "npm-lockfile-fix", "pyproject-build-systems": "pyproject-build-systems", "pyproject-nix": "pyproject-nix_2", "uv2nix": "uv2nix_2" diff --git a/flake.nix b/flake.nix index 919fa434dc..fcb5eaa619 100644 --- a/flake.nix +++ b/flake.nix @@ -19,11 +19,20 @@ url = "github:pyproject-nix/build-system-pkgs"; inputs.nixpkgs.follows = "nixpkgs"; }; + npm-lockfile-fix = { + url = "github:jeslie0/npm-lockfile-fix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; }; - outputs = inputs: + outputs = + inputs: inputs.flake-parts.lib.mkFlake { inherit inputs; } { - systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ]; + systems = [ + "x86_64-linux" + "aarch64-linux" + "aarch64-darwin" + ]; imports = [ ./nix/packages.nix diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py index ae2beda9ef..2489b718f8 100644 --- a/gateway/channel_directory.py +++ b/gateway/channel_directory.py @@ -100,7 +100,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]: def _build_discord(adapter) -> List[Dict[str, str]]: - """Enumerate all text channels the Discord bot can see.""" + """Enumerate all text channels and forum channels the Discord bot can see.""" channels = [] client = getattr(adapter, "_client", None) if not client: @@ -119,6 +119,15 @@ def _build_discord(adapter) -> List[Dict[str, str]]: "guild": guild.name, "type": "channel", }) + # Forum channels (type 15) — creating a message auto-spawns a thread post. + forums = getattr(guild, "forum_channels", None) or [] + for ch in forums: + channels.append({ + "id": str(ch.id), + "name": ch.name, + "guild": guild.name, + "type": "forum", + }) # Also include DM-capable users we've interacted with is not # feasible via guild enumeration; those come from sessions. @@ -191,6 +200,15 @@ def load_directory() -> Dict[str, Any]: return {"updated_at": None, "platforms": {}} +def lookup_channel_type(platform_name: str, chat_id: str) -> Optional[str]: + """Return the channel ``type`` string (e.g. ``"channel"``, ``"forum"``) for *chat_id*, or *None* if unknown.""" + directory = load_directory() + for ch in directory.get("platforms", {}).get(platform_name, []): + if ch.get("id") == chat_id: + return ch.get("type") + return None + + def resolve_channel_name(platform_name: str, name: str) -> Optional[str]: """ Resolve a human-friendly channel name to a numeric ID. diff --git a/gateway/config.py b/gateway/config.py index 7ce105f331..2d74073234 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -258,6 +258,13 @@ class GatewayConfig: # Streaming configuration streaming: StreamingConfig = field(default_factory=StreamingConfig) + # Session store pruning: drop SessionEntry records older than this many + # days from the in-memory dict and sessions.json. Keeps the store from + # growing unbounded in gateways serving many chats/threads/users over + # months. Pruning is invisible to users — if they resume, they get a + # fresh session exactly as if the reset policy had fired. 0 = disabled. + session_store_max_age_days: int = 90 + def get_connected_platforms(self) -> List[Platform]: """Return list of platforms that are enabled and configured.""" connected = [] @@ -307,6 +314,14 @@ class GatewayConfig: # QQBot uses extra dict for app credentials elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"): connected.append(platform) + # DingTalk uses client_id/client_secret from config.extra or env vars + elif platform == Platform.DINGTALK and ( + config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID") + ) and ( + config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET") + ): + connected.append(platform) + return connected def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]: @@ -357,6 +372,7 @@ class GatewayConfig: "thread_sessions_per_user": self.thread_sessions_per_user, "unauthorized_dm_behavior": self.unauthorized_dm_behavior, "streaming": self.streaming.to_dict(), + "session_store_max_age_days": self.session_store_max_age_days, } @classmethod @@ -404,6 +420,13 @@ class GatewayConfig: "pair", ) + try: + session_store_max_age_days = int(data.get("session_store_max_age_days", 90)) + if session_store_max_age_days < 0: + session_store_max_age_days = 0 + except (TypeError, ValueError): + session_store_max_age_days = 90 + return cls( platforms=platforms, default_reset_policy=default_policy, @@ -418,6 +441,7 @@ class GatewayConfig: thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False), unauthorized_dm_behavior=unauthorized_dm_behavior, streaming=StreamingConfig.from_dict(data.get("streaming", {})), + session_store_max_age_days=session_store_max_age_days, ) def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str: @@ -554,6 +578,12 @@ def load_gateway_config() -> GatewayConfig: bridged["mention_patterns"] = platform_cfg["mention_patterns"] if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg: bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"] + if "channel_prompts" in platform_cfg: + channel_prompts = platform_cfg["channel_prompts"] + if isinstance(channel_prompts, dict): + bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()} + else: + bridged["channel_prompts"] = channel_prompts if not bridged: continue plat_data = platforms_data.setdefault(plat.value, {}) @@ -611,6 +641,20 @@ def load_gateway_config() -> GatewayConfig: if isinstance(ntc, list): ntc = ",".join(str(v) for v in ntc) os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc) + # allow_mentions: granular control over what the bot can ping. + # Safe defaults (no @everyone/roles) are applied in the adapter; + # these YAML keys only override when set and let users opt back + # into unsafe modes (e.g. roles=true) if they actually want it. + allow_mentions_cfg = discord_cfg.get("allow_mentions") + if isinstance(allow_mentions_cfg, dict): + for yaml_key, env_key in ( + ("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"), + ("roles", "DISCORD_ALLOW_MENTION_ROLES"), + ("users", "DISCORD_ALLOW_MENTION_USERS"), + ("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"), + ): + if yaml_key in allow_mentions_cfg and not os.getenv(env_key): + os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower() # Telegram settings → env vars (env vars take precedence) telegram_cfg = yaml_cfg.get("telegram", {}) @@ -632,6 +676,18 @@ def load_gateway_config() -> GatewayConfig: os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads) if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"): os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower() + if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"): + os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip() + if "disable_link_previews" in telegram_cfg: + plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {}) + if not isinstance(plat_data, dict): + plat_data = {} + platforms_data[Platform.TELEGRAM.value] = plat_data + extra = plat_data.setdefault("extra", {}) + if not isinstance(extra, dict): + extra = {} + plat_data["extra"] = extra + extra["disable_link_previews"] = telegram_cfg["disable_link_previews"] whatsapp_cfg = yaml_cfg.get("whatsapp", {}) if isinstance(whatsapp_cfg, dict): @@ -645,6 +701,24 @@ def load_gateway_config() -> GatewayConfig: frc = ",".join(str(v) for v in frc) os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc) + # DingTalk settings → env vars (env vars take precedence) + dingtalk_cfg = yaml_cfg.get("dingtalk", {}) + if isinstance(dingtalk_cfg, dict): + if "require_mention" in dingtalk_cfg and not os.getenv("DINGTALK_REQUIRE_MENTION"): + os.environ["DINGTALK_REQUIRE_MENTION"] = str(dingtalk_cfg["require_mention"]).lower() + if "mention_patterns" in dingtalk_cfg and not os.getenv("DINGTALK_MENTION_PATTERNS"): + os.environ["DINGTALK_MENTION_PATTERNS"] = json.dumps(dingtalk_cfg["mention_patterns"]) + frc = dingtalk_cfg.get("free_response_chats") + if frc is not None and not os.getenv("DINGTALK_FREE_RESPONSE_CHATS"): + if isinstance(frc, list): + frc = ",".join(str(v) for v in frc) + os.environ["DINGTALK_FREE_RESPONSE_CHATS"] = str(frc) + allowed = dingtalk_cfg.get("allowed_users") + if allowed is not None and not os.getenv("DINGTALK_ALLOWED_USERS"): + if isinstance(allowed, list): + allowed = ",".join(str(v) for v in allowed) + os.environ["DINGTALK_ALLOWED_USERS"] = str(allowed) + # Matrix settings → env vars (env vars take precedence) matrix_cfg = yaml_cfg.get("matrix", {}) if isinstance(matrix_cfg, dict): @@ -988,6 +1062,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None: if webhook_secret: config.platforms[Platform.WEBHOOK].extra["secret"] = webhook_secret + # DingTalk + dingtalk_client_id = os.getenv("DINGTALK_CLIENT_ID") + dingtalk_client_secret = os.getenv("DINGTALK_CLIENT_SECRET") + if dingtalk_client_id and dingtalk_client_secret: + if Platform.DINGTALK not in config.platforms: + config.platforms[Platform.DINGTALK] = PlatformConfig() + config.platforms[Platform.DINGTALK].enabled = True + config.platforms[Platform.DINGTALK].extra.update({ + "client_id": dingtalk_client_id, + "client_secret": dingtalk_client_secret, + }) + dingtalk_home = os.getenv("DINGTALK_HOME_CHANNEL") + if dingtalk_home: + config.platforms[Platform.DINGTALK].home_channel = HomeChannel( + platform=Platform.DINGTALK, + chat_id=dingtalk_home, + name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"), + ) + # Feishu / Lark feishu_app_id = os.getenv("FEISHU_APP_ID") feishu_app_secret = os.getenv("FEISHU_APP_SECRET") @@ -1136,12 +1229,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None: qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip() if qq_group_allowed: extra["group_allow_from"] = qq_group_allowed - qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip() + qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip() + qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME" + if not qq_home: + # Back-compat: accept the pre-rename name and log a one-time warning. + legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip() + if legacy_home: + qq_home = legacy_home + qq_home_name_env = "QQ_HOME_CHANNEL_NAME" + import logging + logging.getLogger(__name__).warning( + "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL " + "in your .env for consistency with the platform key." + ) if qq_home: config.platforms[Platform.QQBOT].home_channel = HomeChannel( platform=Platform.QQBOT, chat_id=qq_home, - name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"), + name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"), ) # Session settings diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 2077c9c859..7efb756c9c 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -117,6 +117,160 @@ def _normalize_chat_content( return "" +# Content part type aliases used by the OpenAI Chat Completions and Responses +# APIs. We accept both spellings on input and emit a single canonical internal +# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the +# rest of the agent pipeline already understands. +_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"}) +_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"}) +_FILE_PART_TYPES = frozenset({"file", "input_file"}) + + +def _normalize_multimodal_content(content: Any) -> Any: + """Validate and normalize multimodal content for the API server. + + Returns a plain string when the content is text-only, or a list of + ``{"type": "text"|"image_url", ...}`` parts when images are present. + The output shape is the native OpenAI Chat Completions vision format, + which the agent pipeline accepts verbatim (OpenAI-wire providers) or + converts (``_preprocess_anthropic_content`` for Anthropic). + + Raises ``ValueError`` with an OpenAI-style code on invalid input: + * ``unsupported_content_type`` — file/input_file/file_id parts, or + non-image ``data:`` URLs. + * ``invalid_image_url`` — missing URL or unsupported scheme. + * ``invalid_content_part`` — malformed text/image objects. + + Callers translate the ValueError into a 400 response. + """ + # Scalar passthrough mirrors ``_normalize_chat_content``. + if content is None: + return "" + if isinstance(content, str): + return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content + if not isinstance(content, list): + # Mirror the legacy text-normalizer's fallback so callers that + # pre-existed image support still get a string back. + return _normalize_chat_content(content) + + items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content + normalized_parts: List[Dict[str, Any]] = [] + text_accum_len = 0 + + for part in items: + if isinstance(part, str): + if part: + trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH] + normalized_parts.append({"type": "text", "text": trimmed}) + text_accum_len += len(trimmed) + continue + + if not isinstance(part, dict): + # Ignore unknown scalars for forward compatibility with future + # Responses API additions (e.g. ``refusal``). The same policy + # the text normalizer applies. + continue + + raw_type = part.get("type") + part_type = str(raw_type or "").strip().lower() + + if part_type in _TEXT_PART_TYPES: + text = part.get("text") + if text is None: + continue + if not isinstance(text, str): + text = str(text) + if text: + trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH] + normalized_parts.append({"type": "text", "text": trimmed}) + text_accum_len += len(trimmed) + continue + + if part_type in _IMAGE_PART_TYPES: + detail = part.get("detail") + image_ref = part.get("image_url") + # OpenAI Responses sends ``input_image`` with a top-level + # ``image_url`` string; Chat Completions sends ``image_url`` as + # ``{"url": "...", "detail": "..."}``. Support both. + if isinstance(image_ref, dict): + url_value = image_ref.get("url") + detail = image_ref.get("detail", detail) + else: + url_value = image_ref + if not isinstance(url_value, str) or not url_value.strip(): + raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.") + url_value = url_value.strip() + lowered = url_value.lower() + if lowered.startswith("data:"): + if not lowered.startswith("data:image/") or "," not in url_value: + raise ValueError( + "unsupported_content_type:Only image data URLs are supported. " + "Non-image data payloads are not supported." + ) + elif not (lowered.startswith("http://") or lowered.startswith("https://")): + raise ValueError( + "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs." + ) + image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}} + if detail is not None: + if not isinstance(detail, str) or not detail.strip(): + raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.") + image_part["image_url"]["detail"] = detail.strip() + normalized_parts.append(image_part) + continue + + if part_type in _FILE_PART_TYPES: + raise ValueError( + "unsupported_content_type:Inline image inputs are supported, " + "but uploaded files and document inputs are not supported on this endpoint." + ) + + # Unknown part type — reject explicitly so clients get a clear error + # instead of a silently dropped turn. + raise ValueError( + f"unsupported_content_type:Unsupported content part type {raw_type!r}. " + "Only text and image_url/input_image parts are supported." + ) + + if not normalized_parts: + return "" + + # Text-only: collapse to a plain string so downstream logging/trajectory + # code sees the native shape and prompt caching on text-only turns is + # unaffected. + if all(p.get("type") == "text" for p in normalized_parts): + return "\n".join(p["text"] for p in normalized_parts if p.get("text")) + + return normalized_parts + + +def _content_has_visible_payload(content: Any) -> bool: + """True when content has any text or image attachment. Used to reject empty turns.""" + if isinstance(content, str): + return bool(content.strip()) + if isinstance(content, list): + for part in content: + if isinstance(part, dict): + ptype = str(part.get("type") or "").strip().lower() + if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip(): + return True + if ptype in _IMAGE_PART_TYPES: + return True + return False + + +def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response": + """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response.""" + raw = str(exc) + code, _, message = raw.partition(":") + if not message: + code, message = "invalid_content_part", raw + return web.json_response( + _openai_error(message, code=code, param=param), + status=400, + ) + + def check_api_server_requirements() -> bool: """Check if API server dependencies are available.""" return AIOHTTP_AVAILABLE @@ -515,6 +669,8 @@ class APIServerAdapter(BasePlatformAdapter): session_id: Optional[str] = None, stream_delta_callback=None, tool_progress_callback=None, + tool_start_callback=None, + tool_complete_callback=None, ) -> Any: """ Create an AIAgent instance using the gateway's runtime config. @@ -553,6 +709,8 @@ class APIServerAdapter(BasePlatformAdapter): platform="api_server", stream_delta_callback=stream_delta_callback, tool_progress_callback=tool_progress_callback, + tool_start_callback=tool_start_callback, + tool_complete_callback=tool_complete_callback, session_db=self._ensure_session_db(), fallback_model=fallback_model, ) @@ -633,26 +791,32 @@ class APIServerAdapter(BasePlatformAdapter): system_prompt = None conversation_messages: List[Dict[str, str]] = [] - for msg in messages: + for idx, msg in enumerate(messages): role = msg.get("role", "") - content = _normalize_chat_content(msg.get("content", "")) + raw_content = msg.get("content", "") if role == "system": - # Accumulate system messages + # System messages don't support images (Anthropic rejects, OpenAI + # text-model systems don't render them). Flatten to text. + content = _normalize_chat_content(raw_content) if system_prompt is None: system_prompt = content else: system_prompt = system_prompt + "\n" + content elif role in ("user", "assistant"): + try: + content = _normalize_multimodal_content(raw_content) + except ValueError as exc: + return _multimodal_validation_error(exc, param=f"messages[{idx}].content") conversation_messages.append({"role": role, "content": content}) # Extract the last user message as the primary input - user_message = "" + user_message: Any = "" history = [] if conversation_messages: user_message = conversation_messages[-1].get("content", "") history = conversation_messages[:-1] - if not user_message: + if not _content_has_visible_payload(user_message): return web.json_response( {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}}, status=400, @@ -898,7 +1062,7 @@ class APIServerAdapter(BasePlatformAdapter): return time.monotonic() # Stream content chunks as they arrive from the agent - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() while True: try: delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5)) @@ -965,6 +1129,427 @@ class APIServerAdapter(BasePlatformAdapter): return response + async def _write_sse_responses( + self, + request: "web.Request", + response_id: str, + model: str, + created_at: int, + stream_q, + agent_task, + agent_ref, + conversation_history: List[Dict[str, str]], + user_message: str, + instructions: Optional[str], + conversation: Optional[str], + store: bool, + session_id: str, + ) -> "web.StreamResponse": + """Write an SSE stream for POST /v1/responses (OpenAI Responses API). + + Emits spec-compliant event types as the agent runs: + + - ``response.created`` — initial envelope (status=in_progress) + - ``response.output_text.delta`` / ``response.output_text.done`` — + streamed assistant text + - ``response.output_item.added`` / ``response.output_item.done`` + with ``item.type == "function_call"`` — when the agent invokes a + tool (both events fire; the ``done`` event carries the finalized + ``arguments`` string) + - ``response.output_item.added`` with + ``item.type == "function_call_output"`` — tool result with + ``{call_id, output, status}`` + - ``response.completed`` — terminal event carrying the full + response object with all output items + usage (same payload + shape as the non-streaming path for parity) + - ``response.failed`` — terminal event on agent error + + If the client disconnects mid-stream, ``agent.interrupt()`` is + called so the agent stops issuing upstream LLM calls, then the + asyncio task is cancelled. When ``store=True`` the full response + is persisted to the ResponseStore in a ``finally`` block so GET + /v1/responses/{id} and ``previous_response_id`` chaining work the + same as the batch path. + """ + import queue as _q + + sse_headers = { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", + } + origin = request.headers.get("Origin", "") + cors = self._cors_headers_for_origin(origin) if origin else None + if cors: + sse_headers.update(cors) + if session_id: + sse_headers["X-Hermes-Session-Id"] = session_id + response = web.StreamResponse(status=200, headers=sse_headers) + await response.prepare(request) + + # State accumulated during the stream + final_text_parts: List[str] = [] + # Track open function_call items by name so we can emit a matching + # ``done`` event when the tool completes. Order preserved. + pending_tool_calls: List[Dict[str, Any]] = [] + # Output items we've emitted so far (used to build the terminal + # response.completed payload). Kept in the order they appeared. + emitted_items: List[Dict[str, Any]] = [] + # Monotonic counter for output_index (spec requires it). + output_index = 0 + # Monotonic counter for call_id generation if the agent doesn't + # provide one (it doesn't, from tool_progress_callback). + call_counter = 0 + # Canonical Responses SSE events include a monotonically increasing + # sequence_number. Add it server-side for every emitted event so + # clients that validate the OpenAI event schema can parse our stream. + sequence_number = 0 + # Track the assistant message item id + content index for text + # delta events — the spec ties deltas to a specific item. + message_item_id = f"msg_{uuid.uuid4().hex[:24]}" + message_output_index: Optional[int] = None + message_opened = False + + async def _write_event(event_type: str, data: Dict[str, Any]) -> None: + nonlocal sequence_number + if "sequence_number" not in data: + data["sequence_number"] = sequence_number + sequence_number += 1 + payload = f"event: {event_type}\ndata: {json.dumps(data)}\n\n" + await response.write(payload.encode()) + + def _envelope(status: str) -> Dict[str, Any]: + env: Dict[str, Any] = { + "id": response_id, + "object": "response", + "status": status, + "created_at": created_at, + "model": model, + } + return env + + final_response_text = "" + agent_error: Optional[str] = None + usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + + try: + # response.created — initial envelope, status=in_progress + created_env = _envelope("in_progress") + created_env["output"] = [] + await _write_event("response.created", { + "type": "response.created", + "response": created_env, + }) + last_activity = time.monotonic() + + async def _open_message_item() -> None: + """Emit response.output_item.added for the assistant message + the first time any text delta arrives.""" + nonlocal message_opened, message_output_index, output_index + if message_opened: + return + message_opened = True + message_output_index = output_index + output_index += 1 + item = { + "id": message_item_id, + "type": "message", + "status": "in_progress", + "role": "assistant", + "content": [], + } + await _write_event("response.output_item.added", { + "type": "response.output_item.added", + "output_index": message_output_index, + "item": item, + }) + + async def _emit_text_delta(delta_text: str) -> None: + await _open_message_item() + final_text_parts.append(delta_text) + await _write_event("response.output_text.delta", { + "type": "response.output_text.delta", + "item_id": message_item_id, + "output_index": message_output_index, + "content_index": 0, + "delta": delta_text, + "logprobs": [], + }) + + async def _emit_tool_started(payload: Dict[str, Any]) -> str: + """Emit response.output_item.added for a function_call. + + Returns the call_id so the matching completion event can + reference it. Prefer the real ``tool_call_id`` from the + agent when available; fall back to a generated call id for + safety in tests or older code paths. + """ + nonlocal output_index, call_counter + call_counter += 1 + call_id = payload.get("tool_call_id") or f"call_{response_id[5:]}_{call_counter}" + args = payload.get("arguments", {}) + if isinstance(args, dict): + arguments_str = json.dumps(args) + else: + arguments_str = str(args) + item = { + "id": f"fc_{uuid.uuid4().hex[:24]}", + "type": "function_call", + "status": "in_progress", + "name": payload.get("name", ""), + "call_id": call_id, + "arguments": arguments_str, + } + idx = output_index + output_index += 1 + pending_tool_calls.append({ + "call_id": call_id, + "name": payload.get("name", ""), + "arguments": arguments_str, + "item_id": item["id"], + "output_index": idx, + }) + emitted_items.append({ + "type": "function_call", + "name": payload.get("name", ""), + "arguments": arguments_str, + "call_id": call_id, + }) + await _write_event("response.output_item.added", { + "type": "response.output_item.added", + "output_index": idx, + "item": item, + }) + return call_id + + async def _emit_tool_completed(payload: Dict[str, Any]) -> None: + """Emit response.output_item.done (function_call) followed + by response.output_item.added (function_call_output).""" + nonlocal output_index + call_id = payload.get("tool_call_id") + result = payload.get("result", "") + pending = None + if call_id: + for i, p in enumerate(pending_tool_calls): + if p["call_id"] == call_id: + pending = pending_tool_calls.pop(i) + break + if pending is None: + # Completion without a matching start — skip to avoid + # emitting orphaned done events. + return + + # function_call done + done_item = { + "id": pending["item_id"], + "type": "function_call", + "status": "completed", + "name": pending["name"], + "call_id": pending["call_id"], + "arguments": pending["arguments"], + } + await _write_event("response.output_item.done", { + "type": "response.output_item.done", + "output_index": pending["output_index"], + "item": done_item, + }) + + # function_call_output added (result) + result_str = result if isinstance(result, str) else json.dumps(result) + output_parts = [{"type": "input_text", "text": result_str}] + output_item = { + "id": f"fco_{uuid.uuid4().hex[:24]}", + "type": "function_call_output", + "call_id": pending["call_id"], + "output": output_parts, + "status": "completed", + } + idx = output_index + output_index += 1 + emitted_items.append({ + "type": "function_call_output", + "call_id": pending["call_id"], + "output": output_parts, + }) + await _write_event("response.output_item.added", { + "type": "response.output_item.added", + "output_index": idx, + "item": output_item, + }) + await _write_event("response.output_item.done", { + "type": "response.output_item.done", + "output_index": idx, + "item": output_item, + }) + + # Main drain loop — thread-safe queue fed by agent callbacks. + async def _dispatch(it) -> None: + """Route a queue item to the correct SSE emitter. + + Plain strings are text deltas. Tagged tuples with + ``__tool_started__`` / ``__tool_completed__`` prefixes + are tool lifecycle events. + """ + if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str): + tag, payload = it + if tag == "__tool_started__": + await _emit_tool_started(payload) + elif tag == "__tool_completed__": + await _emit_tool_completed(payload) + # Unknown tags are silently ignored (forward-compat). + elif isinstance(it, str): + await _emit_text_delta(it) + # Other types (non-string, non-tuple) are silently dropped. + + loop = asyncio.get_running_loop() + while True: + try: + item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5)) + except _q.Empty: + if agent_task.done(): + # Drain remaining + while True: + try: + item = stream_q.get_nowait() + if item is None: + break + await _dispatch(item) + last_activity = time.monotonic() + except _q.Empty: + break + break + if time.monotonic() - last_activity >= CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS: + await response.write(b": keepalive\n\n") + last_activity = time.monotonic() + continue + + if item is None: # EOS sentinel + break + + await _dispatch(item) + last_activity = time.monotonic() + + # Pick up agent result + usage from the completed task + try: + result, agent_usage = await agent_task + usage = agent_usage or usage + # If the agent produced a final_response but no text + # deltas were streamed (e.g. some providers only emit + # the full response at the end), emit a single fallback + # delta so Responses clients still receive a live text part. + agent_final = result.get("final_response", "") if isinstance(result, dict) else "" + if agent_final and not final_text_parts: + await _emit_text_delta(agent_final) + if agent_final and not final_response_text: + final_response_text = agent_final + if isinstance(result, dict) and result.get("error") and not final_response_text: + agent_error = result["error"] + except Exception as e: # noqa: BLE001 + logger.error("Error running agent for streaming responses: %s", e, exc_info=True) + agent_error = str(e) + + # Close the message item if it was opened + final_response_text = "".join(final_text_parts) or final_response_text + if message_opened: + await _write_event("response.output_text.done", { + "type": "response.output_text.done", + "item_id": message_item_id, + "output_index": message_output_index, + "content_index": 0, + "text": final_response_text, + "logprobs": [], + }) + msg_done_item = { + "id": message_item_id, + "type": "message", + "status": "completed", + "role": "assistant", + "content": [ + {"type": "output_text", "text": final_response_text} + ], + } + await _write_event("response.output_item.done", { + "type": "response.output_item.done", + "output_index": message_output_index, + "item": msg_done_item, + }) + + # Always append a final message item in the completed + # response envelope so clients that only parse the terminal + # payload still see the assistant text. This mirrors the + # shape produced by _extract_output_items in the batch path. + final_items: List[Dict[str, Any]] = list(emitted_items) + final_items.append({ + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": final_response_text or (agent_error or "")} + ], + }) + + if agent_error: + failed_env = _envelope("failed") + failed_env["output"] = final_items + failed_env["error"] = {"message": agent_error, "type": "server_error"} + failed_env["usage"] = { + "input_tokens": usage.get("input_tokens", 0), + "output_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), + } + await _write_event("response.failed", { + "type": "response.failed", + "response": failed_env, + }) + else: + completed_env = _envelope("completed") + completed_env["output"] = final_items + completed_env["usage"] = { + "input_tokens": usage.get("input_tokens", 0), + "output_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), + } + await _write_event("response.completed", { + "type": "response.completed", + "response": completed_env, + }) + + # Persist for future chaining / GET retrieval, mirroring + # the batch path behavior. + if store: + full_history = list(conversation_history) + full_history.append({"role": "user", "content": user_message}) + if isinstance(result, dict) and result.get("messages"): + full_history.extend(result["messages"]) + else: + full_history.append({"role": "assistant", "content": final_response_text}) + self._response_store.put(response_id, { + "response": completed_env, + "conversation_history": full_history, + "instructions": instructions, + "session_id": session_id, + }) + if conversation: + self._response_store.set_conversation(conversation, response_id) + + except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError): + # Client disconnected — interrupt the agent so it stops + # making upstream LLM calls, then cancel the task. + agent = agent_ref[0] if agent_ref else None + if agent is not None: + try: + agent.interrupt("SSE client disconnected") + except Exception: + pass + if not agent_task.done(): + agent_task.cancel() + try: + await agent_task + except (asyncio.CancelledError, Exception): + pass + logger.info("SSE client disconnected; interrupted agent task %s", response_id) + + return response + async def _handle_responses(self, request: "web.Request") -> "web.Response": """POST /v1/responses — OpenAI Responses API format.""" auth_err = self._check_auth(request) @@ -999,16 +1584,19 @@ class APIServerAdapter(BasePlatformAdapter): # No error if conversation doesn't exist yet — it's a new conversation # Normalize input to message list - input_messages: List[Dict[str, str]] = [] + input_messages: List[Dict[str, Any]] = [] if isinstance(raw_input, str): input_messages = [{"role": "user", "content": raw_input}] elif isinstance(raw_input, list): - for item in raw_input: + for idx, item in enumerate(raw_input): if isinstance(item, str): input_messages.append({"role": "user", "content": item}) elif isinstance(item, dict): role = item.get("role", "user") - content = _normalize_chat_content(item.get("content", "")) + try: + content = _normalize_multimodal_content(item.get("content", "")) + except ValueError as exc: + return _multimodal_validation_error(exc, param=f"input[{idx}].content") input_messages.append({"role": role, "content": content}) else: return web.json_response(_openai_error("'input' must be a string or array"), status=400) @@ -1017,7 +1605,7 @@ class APIServerAdapter(BasePlatformAdapter): # This lets stateless clients supply their own history instead of # relying on server-side response chaining via previous_response_id. # Precedence: explicit conversation_history > previous_response_id. - conversation_history: List[Dict[str, str]] = [] + conversation_history: List[Dict[str, Any]] = [] raw_history = body.get("conversation_history") if raw_history: if not isinstance(raw_history, list): @@ -1031,15 +1619,21 @@ class APIServerAdapter(BasePlatformAdapter): _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"), status=400, ) - conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])}) + try: + entry_content = _normalize_multimodal_content(entry["content"]) + except ValueError as exc: + return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content") + conversation_history.append({"role": str(entry["role"]), "content": entry_content}) if previous_response_id: logger.debug("Both conversation_history and previous_response_id provided; using conversation_history") + stored_session_id = None if not conversation_history and previous_response_id: stored = self._response_store.get(previous_response_id) if stored is None: return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404) conversation_history = list(stored.get("conversation_history", [])) + stored_session_id = stored.get("session_id") # If no instructions provided, carry forward from previous if instructions is None: instructions = stored.get("instructions") @@ -1049,16 +1643,91 @@ class APIServerAdapter(BasePlatformAdapter): conversation_history.append(msg) # Last input message is the user_message - user_message = input_messages[-1].get("content", "") if input_messages else "" - if not user_message: + user_message: Any = input_messages[-1].get("content", "") if input_messages else "" + if not _content_has_visible_payload(user_message): return web.json_response(_openai_error("No user message found in input"), status=400) # Truncation support if body.get("truncation") == "auto" and len(conversation_history) > 100: conversation_history = conversation_history[-100:] - # Run the agent (with Idempotency-Key support) - session_id = str(uuid.uuid4()) + # Reuse session from previous_response_id chain so the dashboard + # groups the entire conversation under one session entry. + session_id = stored_session_id or str(uuid.uuid4()) + + stream = bool(body.get("stream", False)) + if stream: + # Streaming branch — emit OpenAI Responses SSE events as the + # agent runs so frontends can render text deltas and tool + # calls in real time. See _write_sse_responses for details. + import queue as _q + _stream_q: _q.Queue = _q.Queue() + + def _on_delta(delta): + # None from the agent is a CLI box-close signal, not EOS. + # Forwarding would kill the SSE stream prematurely; the + # SSE writer detects completion via agent_task.done(). + if delta is not None: + _stream_q.put(delta) + + def _on_tool_progress(event_type, name, preview, args, **kwargs): + """Queue non-start tool progress events if needed in future. + + The structured Responses stream uses ``tool_start_callback`` + and ``tool_complete_callback`` for exact call-id correlation, + so progress events are currently ignored here. + """ + return + + def _on_tool_start(tool_call_id, function_name, function_args): + """Queue a started tool for live function_call streaming.""" + _stream_q.put(("__tool_started__", { + "tool_call_id": tool_call_id, + "name": function_name, + "arguments": function_args or {}, + })) + + def _on_tool_complete(tool_call_id, function_name, function_args, function_result): + """Queue a completed tool result for live function_call_output streaming.""" + _stream_q.put(("__tool_completed__", { + "tool_call_id": tool_call_id, + "name": function_name, + "arguments": function_args or {}, + "result": function_result, + })) + + agent_ref = [None] + agent_task = asyncio.ensure_future(self._run_agent( + user_message=user_message, + conversation_history=conversation_history, + ephemeral_system_prompt=instructions, + session_id=session_id, + stream_delta_callback=_on_delta, + tool_progress_callback=_on_tool_progress, + tool_start_callback=_on_tool_start, + tool_complete_callback=_on_tool_complete, + agent_ref=agent_ref, + )) + + response_id = f"resp_{uuid.uuid4().hex[:28]}" + model_name = body.get("model", self._model_name) + created_at = int(time.time()) + + return await self._write_sse_responses( + request=request, + response_id=response_id, + model=model_name, + created_at=created_at, + stream_q=_stream_q, + agent_task=agent_task, + agent_ref=agent_ref, + conversation_history=conversation_history, + user_message=user_message, + instructions=instructions, + conversation=conversation, + store=store, + session_id=session_id, + ) async def _compute_response(): return await self._run_agent( @@ -1133,6 +1802,7 @@ class APIServerAdapter(BasePlatformAdapter): "response": response_data, "conversation_history": full_history, "instructions": instructions, + "session_id": session_id, }) # Update conversation mapping so the next request with the same # conversation name automatically chains to this response @@ -1486,6 +2156,8 @@ class APIServerAdapter(BasePlatformAdapter): session_id: Optional[str] = None, stream_delta_callback=None, tool_progress_callback=None, + tool_start_callback=None, + tool_complete_callback=None, agent_ref: Optional[list] = None, ) -> tuple: """ @@ -1499,7 +2171,7 @@ class APIServerAdapter(BasePlatformAdapter): callers (e.g. the SSE writer) to call ``agent.interrupt()`` from another thread to stop in-progress LLM calls. """ - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() def _run(): agent = self._create_agent( @@ -1507,6 +2179,8 @@ class APIServerAdapter(BasePlatformAdapter): session_id=session_id, stream_delta_callback=stream_delta_callback, tool_progress_callback=tool_progress_callback, + tool_start_callback=tool_start_callback, + tool_complete_callback=tool_complete_callback, ) if agent_ref is not None: agent_ref[0] = agent @@ -1643,10 +2317,12 @@ class APIServerAdapter(BasePlatformAdapter): if previous_response_id: logger.debug("Both conversation_history and previous_response_id provided; using conversation_history") + stored_session_id = None if not conversation_history and previous_response_id: stored = self._response_store.get(previous_response_id) if stored: conversation_history = list(stored.get("conversation_history", [])) + stored_session_id = stored.get("session_id") if instructions is None: instructions = stored.get("instructions") @@ -1665,7 +2341,7 @@ class APIServerAdapter(BasePlatformAdapter): ) conversation_history.append({"role": msg["role"], "content": str(content)}) - session_id = body.get("session_id") or run_id + session_id = body.get("session_id") or stored_session_id or run_id ephemeral_system_prompt = instructions async def _run_and_close(): diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index f7943da473..bda137cf3b 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -6,6 +6,7 @@ and implement the required methods. """ import asyncio +import inspect import ipaddress import logging import os @@ -551,6 +552,39 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> raise last_exc +# --------------------------------------------------------------------------- +# Video cache utilities +# +# Same pattern as image/audio cache -- videos from platforms are downloaded +# here so the agent can reference them by local file path. +# --------------------------------------------------------------------------- + +VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache") + +SUPPORTED_VIDEO_TYPES = { + ".mp4": "video/mp4", + ".mov": "video/quicktime", + ".webm": "video/webm", + ".mkv": "video/x-matroska", + ".avi": "video/x-msvideo", +} + + +def get_video_cache_dir() -> Path: + """Return the video cache directory, creating it if it doesn't exist.""" + VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True) + return VIDEO_CACHE_DIR + + +def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str: + """Save raw video bytes to the cache and return the absolute file path.""" + cache_dir = get_video_cache_dir() + filename = f"video_{uuid.uuid4().hex[:12]}{ext}" + filepath = cache_dir / filename + filepath.write_bytes(data) + return str(filepath) + + # --------------------------------------------------------------------------- # Document cache utilities # @@ -669,6 +703,15 @@ class MessageEvent: # Original platform data raw_message: Any = None message_id: Optional[str] = None + + # Platform-specific update identifier. For Telegram this is the + # ``update_id`` from the PTB Update wrapper; other platforms currently + # ignore it. Used by ``/restart`` to record the triggering update so the + # new gateway can advance the Telegram offset past it and avoid processing + # the same ``/restart`` twice if PTB's graceful-shutdown ACK times out + # ("Error while calling `get_updates` one more time to mark all fetched + # updates" in gateway.log). + platform_update_id: Optional[int] = None # Media attachments # media_urls: local file paths (for vision tool access) @@ -682,6 +725,10 @@ class MessageEvent: # Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics, # Discord channel_skill_bindings). A single name or ordered list. auto_skill: Optional[str | list[str]] = None + + # Per-channel ephemeral system prompt (e.g. Discord channel_prompts). + # Applied at API call time and never persisted to transcript history. + channel_prompt: Optional[str] = None # Internal flag — set for synthetic events (e.g. background process # completion notifications) that must bypass user authorization checks. @@ -730,25 +777,56 @@ def merge_pending_message_event( pending_messages: Dict[str, MessageEvent], session_key: str, event: MessageEvent, + *, + merge_text: bool = False, ) -> None: """Store or merge a pending event for a session. Photo bursts/albums often arrive as multiple near-simultaneous PHOTO events. Merge those into the existing queued event so the next turn sees - the whole burst, while non-photo follow-ups still replace the pending - event normally. + the whole burst. + + When ``merge_text`` is enabled, rapid follow-up TEXT events are appended + instead of replacing the pending turn. This is used for Telegram bursty + follow-ups so a multi-part user thought is not silently truncated to only + the last queued fragment. """ existing = pending_messages.get(session_key) - if ( - existing - and getattr(existing, "message_type", None) == MessageType.PHOTO - and event.message_type == MessageType.PHOTO - ): - existing.media_urls.extend(event.media_urls) - existing.media_types.extend(event.media_types) - if event.text: - existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text) - return + if existing: + existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO + incoming_is_photo = event.message_type == MessageType.PHOTO + existing_has_media = bool(existing.media_urls) + incoming_has_media = bool(event.media_urls) + + if existing_is_photo and incoming_is_photo: + existing.media_urls.extend(event.media_urls) + existing.media_types.extend(event.media_types) + if event.text: + existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text) + return + + if existing_has_media or incoming_has_media: + if incoming_has_media: + existing.media_urls.extend(event.media_urls) + existing.media_types.extend(event.media_types) + if event.text: + if existing.text: + existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text) + else: + existing.text = event.text + if existing_is_photo or incoming_is_photo: + existing.message_type = MessageType.PHOTO + return + + if ( + merge_text + and getattr(existing, "message_type", None) == MessageType.TEXT + and event.message_type == MessageType.TEXT + ): + if event.text: + existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text + return + pending_messages[session_key] = event @@ -776,6 +854,36 @@ _RETRYABLE_ERROR_PATTERNS = ( MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]] +def resolve_channel_prompt( + config_extra: dict, + channel_id: str, + parent_id: str | None = None, +) -> str | None: + """Resolve a per-channel ephemeral prompt from platform config. + + Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict. + Prefers an exact match on *channel_id*; falls back to *parent_id* + (useful for forum threads / child channels inheriting a parent prompt). + + Returns the prompt string, or None if no match is found. Blank/whitespace- + only prompts are treated as absent. + """ + prompts = config_extra.get("channel_prompts") or {} + if not isinstance(prompts, dict): + return None + + for key in (channel_id, parent_id): + if not key: + continue + prompt = prompts.get(key) + if prompt is None: + continue + prompt = str(prompt).strip() + if prompt: + return prompt + return None + + class BasePlatformAdapter(ABC): """ Base class for platform adapters. @@ -805,6 +913,12 @@ class BasePlatformAdapter(ABC): # Gateway shutdown cancels these so an old gateway instance doesn't keep # working on a task after --replace or manual restarts. self._background_tasks: set[asyncio.Task] = set() + # One-shot callbacks to fire after the main response is delivered. + # Keyed by session_key. Values are either a bare callback (legacy) or + # a ``(generation, callback)`` tuple so GatewayRunner can make deferred + # deliveries generation-aware and avoid stale runs clearing callbacks + # registered by a fresher run for the same session. + self._post_delivery_callbacks: Dict[str, Any] = {} self._expected_cancelled_tasks: set[asyncio.Task] = set() self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None # Chats where auto-TTS on voice input is disabled (set by /voice off) @@ -975,16 +1089,40 @@ class BasePlatformAdapter(ABC): """ pass + # Default: the adapter treats ``finalize=True`` on edit_message as a + # no-op and is happy to have the stream consumer skip redundant final + # edits. Subclasses that *require* an explicit finalize call to close + # out the message lifecycle (e.g. rich card / AI assistant surfaces + # such as DingTalk AI Cards) override this to True (class attribute or + # property) so the stream consumer knows not to short-circuit. + REQUIRES_EDIT_FINALIZE: bool = False + async def edit_message( self, chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """ Edit a previously sent message. Optional — platforms that don't support editing return success=False and callers fall back to sending a new message. + + ``finalize`` signals that this is the last edit in a streaming + sequence. Most platforms (Telegram, Slack, Discord, Matrix, + etc.) treat it as a no-op because their edit APIs have no notion + of message lifecycle state — an edit is an edit. Platforms that + render streaming updates with a distinct "in progress" state and + require explicit closure (e.g. rich card / AI assistant surfaces + such as DingTalk AI Cards) use it to finalize the message and + transition the UI out of the streaming indicator — those should + also set ``REQUIRES_EDIT_FINALIZE = True`` so callers route a + final edit through even when content is unchanged. Callers + should set ``finalize=True`` on the final edit of a streamed + response (typically when ``got_done`` fires in the stream + consumer) and leave it ``False`` on intermediate edits. """ return SendResult(success=False, error="Not supported") @@ -1221,7 +1359,7 @@ class BasePlatformAdapter(ABC): path = path[1:-1].strip() path = path.lstrip("`\"'").rstrip("`\"',.;:)}]") if path: - media.append((path, has_voice_tag)) + media.append((os.path.expanduser(path), has_voice_tag)) # Remove MEDIA tags from content (including surrounding quote/backtick wrappers) if media: @@ -1298,7 +1436,13 @@ class BasePlatformAdapter(ABC): return paths, cleaned - async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None: + async def _keep_typing( + self, + chat_id: str, + interval: float = 2.0, + metadata=None, + stop_event: asyncio.Event | None = None, + ) -> None: """ Continuously send typing indicator until cancelled. @@ -1312,9 +1456,18 @@ class BasePlatformAdapter(ABC): """ try: while True: + if stop_event is not None and stop_event.is_set(): + return if chat_id not in self._typing_paused: await self.send_typing(chat_id, metadata=metadata) - await asyncio.sleep(interval) + if stop_event is None: + await asyncio.sleep(interval) + continue + try: + await asyncio.wait_for(stop_event.wait(), timeout=interval) + except asyncio.TimeoutError: + continue + return except asyncio.CancelledError: pass # Normal cancellation when handler completes finally: @@ -1341,6 +1494,59 @@ class BasePlatformAdapter(ABC): """Resume typing indicator for a chat after approval resolves.""" self._typing_paused.discard(chat_id) + async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None: + """Signal the active session loop to stop and clear typing immediately.""" + if session_key: + interrupt_event = self._active_sessions.get(session_key) + if interrupt_event is not None: + interrupt_event.set() + try: + await self.stop_typing(chat_id) + except Exception: + pass + + def register_post_delivery_callback( + self, + session_key: str, + callback: Callable, + *, + generation: int | None = None, + ) -> None: + """Register a deferred callback to fire after the main response. + + ``generation`` lets callers tie the callback to a specific gateway run + generation so stale runs cannot clear callbacks owned by a fresher run. + """ + if not session_key or not callable(callback): + return + if generation is None: + self._post_delivery_callbacks[session_key] = callback + else: + self._post_delivery_callbacks[session_key] = (int(generation), callback) + + def pop_post_delivery_callback( + self, + session_key: str, + *, + generation: int | None = None, + ) -> Callable | None: + """Pop a deferred callback, optionally requiring generation ownership.""" + if not session_key: + return None + entry = self._post_delivery_callbacks.get(session_key) + if entry is None: + return None + if isinstance(entry, tuple) and len(entry) == 2: + entry_generation, callback = entry + if generation is not None and int(entry_generation) != int(generation): + return None + self._post_delivery_callbacks.pop(session_key, None) + return callback if callable(callback) else None + if generation is not None: + return None + self._post_delivery_callbacks.pop(session_key, None) + return entry if callable(entry) else None + # ── Processing lifecycle hooks ────────────────────────────────────────── # Subclasses override these to react to message processing events # (e.g. Discord adds 👀/✅/❌ reactions). @@ -1509,7 +1715,9 @@ class BasePlatformAdapter(ABC): # session lifecycle and its cleanup races with the running task # (see PR #4926). cmd = event.get_command() - if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"): + from hermes_cli.commands import should_bypass_active_session + + if should_bypass_active_session(cmd): logger.debug( "[%s] Command '/%s' bypassing active-session guard for %s", self.name, cmd, session_key, @@ -1609,10 +1817,23 @@ class BasePlatformAdapter(ABC): # Fall back to a new Event only if the entry was removed externally. interrupt_event = self._active_sessions.get(session_key) or asyncio.Event() self._active_sessions[session_key] = interrupt_event + callback_generation = getattr(interrupt_event, "_hermes_run_generation", None) # Start continuous typing indicator (refreshes every 2 seconds) _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None - typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata)) + _keep_typing_kwargs = {"metadata": _thread_metadata} + try: + _keep_typing_sig = inspect.signature(self._keep_typing) + except (TypeError, ValueError): + _keep_typing_sig = None + if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters: + _keep_typing_kwargs["stop_event"] = interrupt_event + typing_task = asyncio.create_task( + self._keep_typing( + event.source.chat_id, + **_keep_typing_kwargs, + ) + ) try: await self._run_processing_hook("on_processing_start", event) @@ -1624,6 +1845,21 @@ class BasePlatformAdapter(ABC): # streaming already delivered the text (already_sent=True) or # when the message was queued behind an active agent. Log at # DEBUG to avoid noisy warnings for expected behavior. + # + # Suppress stale response when the session was interrupted by a + # new message that hasn't been consumed yet. The pending message + # is processed by the pending-message handler below (#8221/#2483). + if ( + response + and interrupt_event.is_set() + and session_key in self._pending_messages + ): + logger.info( + "[%s] Suppressing stale response for interrupted session %s", + self.name, + session_key, + ) + response = None if not response: logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id) if response: @@ -1806,9 +2042,18 @@ class BasePlatformAdapter(ABC): if session_key in self._pending_messages: pending_event = self._pending_messages.pop(session_key) logger.debug("[%s] Processing queued message from interrupt", self.name) - # Clean up current session before processing pending - if session_key in self._active_sessions: - del self._active_sessions[session_key] + # Keep the _active_sessions entry live across the turn chain + # and only CLEAR the interrupt Event — do NOT delete the entry. + # If we deleted here, a concurrent inbound message arriving + # during the awaits below would pass the Level-1 guard, spawn + # its own _process_message_background, and run simultaneously + # with the recursive drain below. Two agents on one + # session_key = duplicate responses, duplicate tool calls. + # Clearing the Event keeps the guard live so follow-ups take + # the busy-handler path (queue + interrupt) as intended. + _active = self._active_sessions.get(session_key) + if _active is not None: + _active.clear() typing_task.cancel() try: await typing_task @@ -1845,6 +2090,21 @@ class BasePlatformAdapter(ABC): except Exception: pass # Last resort — don't let error reporting crash the handler finally: + # Fire any one-shot post-delivery callback registered for this + # session (e.g. deferred background-review notifications). + _callback_generation = callback_generation + if hasattr(self, "pop_post_delivery_callback"): + _post_cb = self.pop_post_delivery_callback( + session_key, + generation=_callback_generation, + ) + else: + _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None) + if callable(_post_cb): + try: + _post_cb() + except Exception: + pass # Stop typing indicator typing_task.cancel() try: @@ -1858,9 +2118,37 @@ class BasePlatformAdapter(ABC): await self.stop_typing(event.source.chat_id) except Exception: pass - # Clean up session tracking - if session_key in self._active_sessions: - del self._active_sessions[session_key] + # Late-arrival drain: a message may have arrived during the + # cleanup awaits above (typing_task cancel, stop_typing). Such + # messages passed the Level-1 guard (entry still live, Event + # possibly set) and landed in _pending_messages via the + # busy-handler path. Without this block, we would delete the + # active-session entry and the queued message would be silently + # dropped (user never gets a reply). + late_pending = self._pending_messages.pop(session_key, None) + if late_pending is not None: + logger.debug( + "[%s] Late-arrival pending message during cleanup — spawning drain task", + self.name, + ) + _active = self._active_sessions.get(session_key) + if _active is not None: + _active.clear() + drain_task = asyncio.create_task( + self._process_message_background(late_pending, session_key) + ) + try: + self._background_tasks.add(drain_task) + drain_task.add_done_callback(self._background_tasks.discard) + except TypeError: + # Tests stub create_task() with non-hashable sentinels; tolerate. + pass + # Leave _active_sessions[session_key] populated — the drain + # task's own lifecycle will clean it up. + else: + # Clean up session tracking + if session_key in self._active_sessions: + del self._active_sessions[session_key] async def cancel_background_tasks(self) -> None: """Cancel any in-flight background message-processing tasks. @@ -1868,12 +2156,26 @@ class BasePlatformAdapter(ABC): Used during gateway shutdown/replacement so active sessions from the old process do not keep running after adapters are being torn down. """ - tasks = [task for task in self._background_tasks if not task.done()] - for task in tasks: - self._expected_cancelled_tasks.add(task) - task.cancel() - if tasks: + # Loop until no new tasks appear. Without this, a message + # arriving during the `await asyncio.gather` below would spawn + # a fresh _process_message_background task (added to + # self._background_tasks at line ~1668 via handle_message), + # and the _background_tasks.clear() at the end of this method + # would drop the reference — the task runs untracked against a + # disconnecting adapter, logs send-failures, and may linger + # until it completes on its own. Retrying the drain until the + # task set stabilizes closes the window. + MAX_DRAIN_ROUNDS = 5 + for _ in range(MAX_DRAIN_ROUNDS): + tasks = [task for task in self._background_tasks if not task.done()] + if not tasks: + break + for task in tasks: + self._expected_cancelled_tasks.add(task) + task.cancel() await asyncio.gather(*tasks, return_exceptions=True) + # Loop: late-arrival tasks spawned during the gather above + # will be in self._background_tasks now. Re-check. self._background_tasks.clear() self._expected_cancelled_tasks.clear() self._pending_messages.clear() @@ -1898,6 +2200,7 @@ class BasePlatformAdapter(ABC): chat_topic: Optional[str] = None, user_id_alt: Optional[str] = None, chat_id_alt: Optional[str] = None, + is_bot: bool = False, ) -> SessionSource: """Helper to build a SessionSource for this platform.""" # Normalize empty topic to None @@ -1914,6 +2217,7 @@ class BasePlatformAdapter(ABC): chat_topic=chat_topic.strip() if chat_topic else None, user_id_alt=user_id_alt, chat_id_alt=chat_id_alt, + is_bot=is_bot, ) @abstractmethod diff --git a/gateway/platforms/dingtalk.py b/gateway/platforms/dingtalk.py index dfa4f73632..3037e402b2 100644 --- a/gateway/platforms/dingtalk.py +++ b/gateway/platforms/dingtalk.py @@ -1,45 +1,92 @@ """ DingTalk platform adapter using Stream Mode. -Uses dingtalk-stream SDK for real-time message reception without webhooks. +Uses dingtalk-stream SDK (>=0.20) for real-time message reception without webhooks. Responses are sent via DingTalk's session webhook (markdown format). +Supports: text, images, audio, video, rich text, files, and group @mentions. Requires: - pip install dingtalk-stream httpx + pip install "dingtalk-stream>=0.20" httpx DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET env vars Configuration in config.yaml: platforms: dingtalk: enabled: true + # Optional group-chat gating (mirrors Slack/Telegram/Discord): + require_mention: true # or DINGTALK_REQUIRE_MENTION env var + # free_response_chats: # conversations that skip require_mention + # - cidABC== + # mention_patterns: # regex wake-words (e.g. Chinese bot names) + # - "^小马" + # allowed_users: # staff_id or sender_id list; "*" = any + # - "manager1234" extra: client_id: "your-app-key" # or DINGTALK_CLIENT_ID env var client_secret: "your-secret" # or DINGTALK_CLIENT_SECRET env var """ import asyncio +import json import logging import os import re +import traceback import uuid from datetime import datetime, timezone -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional, Set try: import dingtalk_stream - from dingtalk_stream import ChatbotHandler, ChatbotMessage + from dingtalk_stream import ChatbotMessage + from dingtalk_stream.frames import CallbackMessage, AckMessage + DINGTALK_STREAM_AVAILABLE = True except ImportError: DINGTALK_STREAM_AVAILABLE = False dingtalk_stream = None # type: ignore[assignment] + ChatbotMessage = None # type: ignore[assignment] + CallbackMessage = None # type: ignore[assignment] + AckMessage = type( + "AckMessage", + (), + { + "STATUS_OK": 200, + "STATUS_SYSTEM_EXCEPTION": 500, + }, + ) # type: ignore[assignment] try: import httpx + HTTPX_AVAILABLE = True except ImportError: HTTPX_AVAILABLE = False httpx = None # type: ignore[assignment] +# Card SDK for AI Cards (following QwenPaw pattern) +try: + from alibabacloud_dingtalk.card_1_0 import ( + client as dingtalk_card_client, + models as dingtalk_card_models, + ) + from alibabacloud_dingtalk.robot_1_0 import ( + client as dingtalk_robot_client, + models as dingtalk_robot_models, + ) + from alibabacloud_tea_openapi import models as open_api_models + from alibabacloud_tea_util import models as tea_util_models + + CARD_SDK_AVAILABLE = True +except ImportError: + CARD_SDK_AVAILABLE = False + dingtalk_card_client = None + dingtalk_card_models = None + dingtalk_robot_client = None + dingtalk_robot_models = None + open_api_models = None + tea_util_models = None + from gateway.config import Platform, PlatformConfig from gateway.platforms.helpers import MessageDeduplicator from gateway.platforms.base import ( @@ -54,7 +101,13 @@ logger = logging.getLogger(__name__) MAX_MESSAGE_LENGTH = 20000 RECONNECT_BACKOFF = [2, 5, 10, 30, 60] _SESSION_WEBHOOKS_MAX = 500 -_DINGTALK_WEBHOOK_RE = re.compile(r'^https://api\.dingtalk\.com/') +_DINGTALK_WEBHOOK_RE = re.compile(r'^https://(?:api|oapi)\.dingtalk\.com/') + +# DingTalk message type → runtime content type +DINGTALK_TYPE_MAPPING = { + "picture": "image", + "voice": "audio", +} def check_dingtalk_requirements() -> bool: @@ -72,46 +125,136 @@ class DingTalkAdapter(BasePlatformAdapter): The dingtalk-stream SDK maintains a long-lived WebSocket connection. Incoming messages arrive via a ChatbotHandler callback. Replies are sent via the incoming message's session_webhook URL using httpx. + + Features: + - Text messages (plain + rich text) + - Images, audio, video, files (via download codes) + - Group chat @mention detection + - Session webhook caching with expiry tracking + - Markdown formatted replies """ MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH + @property + def SUPPORTS_MESSAGE_EDITING(self) -> bool: # noqa: N802 + """Edits only meaningful when AI Cards are configured. + + The gateway gates streaming cursor + edit behaviour on this flag, + so we must reflect the actual adapter capability at runtime. + """ + return bool(self._card_template_id and self._card_sdk) + + @property + def REQUIRES_EDIT_FINALIZE(self) -> bool: # noqa: N802 + """AI Card lifecycle requires an explicit ``finalize=True`` edit + to close the streaming indicator, even when the final content is + identical to the last streamed update. Enabled only when cards + are configured — webhook-only DingTalk doesn't need it. + """ + return bool(self._card_template_id and self._card_sdk) + def __init__(self, config: PlatformConfig): super().__init__(config, Platform.DINGTALK) extra = config.extra or {} - self._client_id: str = extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID", "") - self._client_secret: str = extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET", "") + self._client_id: str = extra.get("client_id") or os.getenv( + "DINGTALK_CLIENT_ID", "" + ) + self._client_secret: str = extra.get("client_secret") or os.getenv( + "DINGTALK_CLIENT_SECRET", "" + ) + + # Group-chat gating (mirrors Slack/Telegram/Discord/WhatsApp conventions). + # Mention state is the structured ``is_in_at_list`` attribute from the + # dingtalk-stream SDK (set from the callback's ``isInAtList`` flag), + # not text parsing. + self._mention_patterns: List[re.Pattern] = self._compile_mention_patterns() + self._allowed_users: Set[str] = self._load_allowed_users() self._stream_client: Any = None self._stream_task: Optional[asyncio.Task] = None self._http_client: Optional["httpx.AsyncClient"] = None + self._card_sdk: Optional[Any] = None + self._robot_sdk: Optional[Any] = None + self._robot_code: str = extra.get("robot_code") or self._client_id # Message deduplication self._dedup = MessageDeduplicator(max_size=1000) - # Map chat_id -> session_webhook for reply routing - self._session_webhooks: Dict[str, str] = {} + # Map chat_id -> (session_webhook, expired_time_ms) for reply routing + self._session_webhooks: Dict[str, tuple[str, int]] = {} + # Map chat_id -> last inbound ChatbotMessage. Keyed by chat_id instead + # of a single class attribute to avoid cross-message clobbering when + # multiple conversations run concurrently. + self._message_contexts: Dict[str, Any] = {} + self._card_template_id: Optional[str] = extra.get("card_template_id") + + # Chats for which we've already fired the Done reaction — prevents + # double-firing across segment boundaries or parallel flows + # (tool-progress + stream-consumer both finalizing their cards). + # Reset each inbound message. + self._done_emoji_fired: Set[str] = set() + # Cards in streaming state per chat: chat_id -> { out_track_id -> last_content }. + # Every `send()` creates+finalizes a card (closed state). A subsequent + # `edit_message(finalize=False)` re-opens the card (DingTalk's API + # allows streaming_update on a finalized card — it flips back to + # streaming). We track those reopened cards so the next `send()` can + # auto-close them as siblings — otherwise tool-progress cards get + # stuck in streaming state forever. + self._streaming_cards: Dict[str, Dict[str, str]] = {} + # Track fire-and-forget emoji/reaction coroutines so Python's GC + # doesn't drop them mid-flight, and we can cancel them on disconnect. + self._bg_tasks: Set[asyncio.Task] = set() # -- Connection lifecycle ----------------------------------------------- async def connect(self) -> bool: """Connect to DingTalk via Stream Mode.""" if not DINGTALK_STREAM_AVAILABLE: - logger.warning("[%s] dingtalk-stream not installed. Run: pip install dingtalk-stream", self.name) + logger.warning( + "[%s] dingtalk-stream not installed. Run: pip install 'dingtalk-stream>=0.20'", + self.name, + ) return False if not HTTPX_AVAILABLE: - logger.warning("[%s] httpx not installed. Run: pip install httpx", self.name) + logger.warning( + "[%s] httpx not installed. Run: pip install httpx", self.name + ) return False if not self._client_id or not self._client_secret: - logger.warning("[%s] DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET required", self.name) + logger.warning( + "[%s] DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET required", self.name + ) return False try: self._http_client = httpx.AsyncClient(timeout=30.0) - credential = dingtalk_stream.Credential(self._client_id, self._client_secret) + credential = dingtalk_stream.Credential( + self._client_id, self._client_secret + ) self._stream_client = dingtalk_stream.DingTalkStreamClient(credential) + # Initialize card SDK if available and configured + if CARD_SDK_AVAILABLE and self._card_template_id: + sdk_config = open_api_models.Config() + sdk_config.protocol = "https" + sdk_config.region_id = "central" + self._card_sdk = dingtalk_card_client.Client(sdk_config) + self._robot_sdk = dingtalk_robot_client.Client(sdk_config) + logger.info( + "[%s] Card SDK initialized with template: %s", + self.name, + self._card_template_id, + ) + elif CARD_SDK_AVAILABLE: + # Initialize robot SDK even without card template (for media download) + sdk_config = open_api_models.Config() + sdk_config.protocol = "https" + sdk_config.region_id = "central" + self._robot_sdk = dingtalk_robot_client.Client(sdk_config) + logger.info("[%s] Robot SDK initialized (media download)", self.name) + # Capture the current event loop for cross-thread dispatch loop = asyncio.get_running_loop() handler = _IncomingHandler(self, loop) @@ -128,12 +271,12 @@ class DingTalkAdapter(BasePlatformAdapter): return False async def _run_stream(self) -> None: - """Run the blocking stream client with auto-reconnection.""" + """Run the async stream client with auto-reconnection.""" backoff_idx = 0 while self._running: try: logger.debug("[%s] Starting stream client...", self.name) - await asyncio.to_thread(self._stream_client.start) + await self._stream_client.start() except asyncio.CancelledError: return except Exception as e: @@ -154,37 +297,240 @@ class DingTalkAdapter(BasePlatformAdapter): self._running = False self._mark_disconnected() + # Close the active websocket first so the stream task sees the + # disconnection and exits cleanly, rather than getting stuck + # awaiting frames that will never arrive. + websocket = getattr(self._stream_client, "websocket", None) if self._stream_client else None + if websocket is not None: + try: + await websocket.close() + except Exception as e: + logger.debug("[%s] websocket close during disconnect failed: %s", self.name, e) + if self._stream_task: + # Try graceful close first if SDK supports it. The SDK's close() + # is sync and may block on network I/O, so offload to a thread. + if hasattr(self._stream_client, "close"): + try: + await asyncio.to_thread(self._stream_client.close) + except Exception: + pass + self._stream_task.cancel() try: - await self._stream_task - except asyncio.CancelledError: - pass + await asyncio.wait_for(self._stream_task, timeout=5.0) + except (asyncio.CancelledError, asyncio.TimeoutError): + logger.debug("[%s] stream task did not exit cleanly during disconnect", self.name) self._stream_task = None + # Cancel any in-flight background tasks (emoji reactions, etc.) + if self._bg_tasks: + for task in list(self._bg_tasks): + task.cancel() + await asyncio.gather(*self._bg_tasks, return_exceptions=True) + self._bg_tasks.clear() + if self._http_client: await self._http_client.aclose() self._http_client = None self._stream_client = None self._session_webhooks.clear() + self._message_contexts.clear() + self._streaming_cards.clear() + self._done_emoji_fired.clear() self._dedup.clear() logger.info("[%s] Disconnected", self.name) + # -- Group gating -------------------------------------------------------- + + def _dingtalk_require_mention(self) -> bool: + """Return whether group chats should require an explicit bot trigger.""" + configured = self.config.extra.get("require_mention") + if configured is not None: + if isinstance(configured, str): + return configured.lower() in ("true", "1", "yes", "on") + return bool(configured) + return os.getenv("DINGTALK_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on") + + def _dingtalk_free_response_chats(self) -> Set[str]: + raw = self.config.extra.get("free_response_chats") + if raw is None: + raw = os.getenv("DINGTALK_FREE_RESPONSE_CHATS", "") + if isinstance(raw, list): + return {str(part).strip() for part in raw if str(part).strip()} + return {part.strip() for part in str(raw).split(",") if part.strip()} + + def _compile_mention_patterns(self) -> List[re.Pattern]: + """Compile optional regex wake-word patterns for group triggers.""" + patterns = self.config.extra.get("mention_patterns") if self.config.extra else None + if patterns is None: + raw = os.getenv("DINGTALK_MENTION_PATTERNS", "").strip() + if raw: + try: + loaded = json.loads(raw) + except Exception: + loaded = [part.strip() for part in raw.splitlines() if part.strip()] + if not loaded: + loaded = [part.strip() for part in raw.split(",") if part.strip()] + patterns = loaded + + if patterns is None: + return [] + if isinstance(patterns, str): + patterns = [patterns] + if not isinstance(patterns, list): + logger.warning( + "[%s] dingtalk mention_patterns must be a list or string; got %s", + self.name, + type(patterns).__name__, + ) + return [] + + compiled: List[re.Pattern] = [] + for pattern in patterns: + if not isinstance(pattern, str) or not pattern.strip(): + continue + try: + compiled.append(re.compile(pattern, re.IGNORECASE)) + except re.error as exc: + logger.warning("[%s] Invalid DingTalk mention pattern %r: %s", self.name, pattern, exc) + if compiled: + logger.info("[%s] Loaded %d DingTalk mention pattern(s)", self.name, len(compiled)) + return compiled + + def _load_allowed_users(self) -> Set[str]: + """Load allowed-users list from config.extra or env var. + + IDs are matched case-insensitively against the sender's ``staff_id`` and + ``sender_id``. A wildcard ``*`` disables the check. + """ + raw = self.config.extra.get("allowed_users") if self.config.extra else None + if raw is None: + raw = os.getenv("DINGTALK_ALLOWED_USERS", "") + if isinstance(raw, list): + items = [str(part).strip() for part in raw if str(part).strip()] + else: + items = [part.strip() for part in str(raw).split(",") if part.strip()] + return {item.lower() for item in items} + + def _is_user_allowed(self, sender_id: str, sender_staff_id: str) -> bool: + if not self._allowed_users or "*" in self._allowed_users: + return True + candidates = {(sender_id or "").lower(), (sender_staff_id or "").lower()} + candidates.discard("") + return bool(candidates & self._allowed_users) + + def _message_mentions_bot(self, message: "ChatbotMessage") -> bool: + """True if the bot was @-mentioned in a group message. + + dingtalk-stream sets ``is_in_at_list`` on the incoming ChatbotMessage + when the bot is addressed via @-mention. + """ + return bool(getattr(message, "is_in_at_list", False)) + + def _message_matches_mention_patterns(self, text: str) -> bool: + if not text or not self._mention_patterns: + return False + return any(pattern.search(text) for pattern in self._mention_patterns) + + def _should_process_message(self, message: "ChatbotMessage", text: str, is_group: bool, chat_id: str) -> bool: + """Apply DingTalk group trigger rules. + + DMs remain unrestricted (subject to ``allowed_users`` which is enforced + earlier). Group messages are accepted when: + - the chat is explicitly allowlisted in ``free_response_chats`` + - ``require_mention`` is disabled + - the bot is @mentioned (``is_in_at_list``) + - the text matches a configured regex wake-word pattern + """ + if not is_group: + return True + if chat_id and chat_id in self._dingtalk_free_response_chats(): + return True + if not self._dingtalk_require_mention(): + return True + if self._message_mentions_bot(message): + return True + return self._message_matches_mention_patterns(text) + + def _spawn_bg(self, coro) -> None: + """Start a fire-and-forget coroutine and track it for cleanup.""" + task = asyncio.create_task(coro) + self._bg_tasks.add(task) + task.add_done_callback(self._bg_tasks.discard) + + # -- AI Card lifecycle helpers ------------------------------------------ + + async def _close_streaming_siblings(self, chat_id: str) -> None: + """Finalize any previously-open streaming cards for this chat. + + Called at the start of every ``send()`` so lingering tool-progress + cards that were reopened by ``edit_message(finalize=False)`` get + cleanly closed before the next card is created. Without this, + tool-progress cards stay stuck in streaming state after the agent + moves on (there is no explicit "turn end" signal from the gateway). + """ + cards = self._streaming_cards.pop(chat_id, None) + if not cards: + return + token = await self._get_access_token() + if not token: + return + for out_track_id, last_content in list(cards.items()): + try: + await self._stream_card_content( + out_track_id, token, last_content, finalize=True, + ) + logger.debug( + "[%s] AI Card sibling closed: %s", + self.name, out_track_id, + ) + except Exception as e: + logger.debug( + "[%s] Sibling close failed for %s: %s", + self.name, out_track_id, e, + ) + + def _fire_done_reaction(self, chat_id: str) -> None: + """Swap 🤔Thinking → 🥳Done on the original user message. + + Idempotent per chat_id — safe to call from segment-break flushes + and final-done flushes without double-firing. + """ + if chat_id in self._done_emoji_fired: + return + self._done_emoji_fired.add(chat_id) + msg = self._message_contexts.get(chat_id) + if not msg: + return + msg_id = getattr(msg, "message_id", "") or "" + conversation_id = getattr(msg, "conversation_id", "") or "" + if not (msg_id and conversation_id): + return + + async def _swap() -> None: + await self._send_emotion( + msg_id, conversation_id, "🤔Thinking", recall=True, + ) + await self._send_emotion( + msg_id, conversation_id, "🥳Done", recall=False, + ) + + self._spawn_bg(_swap()) + # -- Inbound message processing ----------------------------------------- - async def _on_message(self, message: "ChatbotMessage") -> None: + async def _on_message( + self, + message: "ChatbotMessage", + ) -> None: """Process an incoming DingTalk chatbot message.""" msg_id = getattr(message, "message_id", None) or uuid.uuid4().hex if self._dedup.is_duplicate(msg_id): logger.debug("[%s] Duplicate message %s, skipping", self.name, msg_id) return - text = self._extract_text(message) - if not text: - logger.debug("[%s] Empty message, skipping", self.name) - return - # Chat context conversation_id = getattr(message, "conversation_id", "") or "" conversation_type = getattr(message, "conversation_type", "1") @@ -196,16 +542,62 @@ class DingTalkAdapter(BasePlatformAdapter): chat_id = conversation_id or sender_id chat_type = "group" if is_group else "dm" - # Store session webhook for reply routing (validate origin to prevent SSRF) + # Allowed-users gate (applies to both DM and group) + if not self._is_user_allowed(sender_id, sender_staff_id): + logger.debug( + "[%s] Dropping message from non-allowlisted user staff_id=%s sender_id=%s", + self.name, sender_staff_id, sender_id, + ) + return + + # Group mention/pattern gate. DMs pass through unconditionally. + # We need the message text for regex wake-word matching; extract it + # early but don't consume the rest of the pipeline until after the + # gate decides whether to process. + _early_text = self._extract_text(message) or "" + if not self._should_process_message(message, _early_text, is_group, chat_id): + logger.debug( + "[%s] Dropping group message that failed mention gate message_id=%s chat_id=%s", + self.name, msg_id, chat_id, + ) + return + + # Stash the incoming message keyed by chat_id so concurrent + # conversations don't clobber each other's context. Also reset + # the per-chat "Done emoji fired" marker so a new inbound message + # gets its own Thinking→Done cycle. + if chat_id: + self._message_contexts[chat_id] = message + self._done_emoji_fired.discard(chat_id) + + # Store session webhook session_webhook = getattr(message, "session_webhook", None) or "" + session_webhook_expired_time = ( + getattr(message, "session_webhook_expired_time", 0) or 0 + ) if session_webhook and chat_id and _DINGTALK_WEBHOOK_RE.match(session_webhook): if len(self._session_webhooks) >= _SESSION_WEBHOOKS_MAX: - # Evict oldest entry to cap memory growth try: self._session_webhooks.pop(next(iter(self._session_webhooks))) except StopIteration: pass - self._session_webhooks[chat_id] = session_webhook + self._session_webhooks[chat_id] = ( + session_webhook, + session_webhook_expired_time, + ) + + # Resolve media download codes to URLs so vision tools can use them + await self._resolve_media_codes(message) + + # Extract text content + text = self._extract_text(message) + + # Determine message type and build media list + msg_type, media_urls, media_types = self._extract_media(message) + + if not text and not media_urls: + logger.debug("[%s] Empty message, skipping", self.name) + return source = self.build_source( chat_id=chat_id, @@ -219,41 +611,141 @@ class DingTalkAdapter(BasePlatformAdapter): # Parse timestamp create_at = getattr(message, "create_at", None) try: - timestamp = datetime.fromtimestamp(int(create_at) / 1000, tz=timezone.utc) if create_at else datetime.now(tz=timezone.utc) + timestamp = ( + datetime.fromtimestamp(int(create_at) / 1000, tz=timezone.utc) + if create_at + else datetime.now(tz=timezone.utc) + ) except (ValueError, OSError, TypeError): timestamp = datetime.now(tz=timezone.utc) event = MessageEvent( text=text, - message_type=MessageType.TEXT, + message_type=msg_type, source=source, message_id=msg_id, raw_message=message, + media_urls=media_urls, + media_types=media_types, timestamp=timestamp, ) - logger.debug("[%s] Message from %s in %s: %s", - self.name, sender_nick, chat_id[:20] if chat_id else "?", text[:50]) + logger.debug( + "[%s] Message from %s in %s: %s", + self.name, + sender_nick, + chat_id[:20] if chat_id else "?", + text[:80] if text else "(media)", + ) await self.handle_message(event) @staticmethod def _extract_text(message: "ChatbotMessage") -> str: - """Extract plain text from a DingTalk chatbot message.""" + """Extract plain text from a DingTalk chatbot message. + + Handles both legacy and current dingtalk-stream SDK payload shapes: + * legacy: ``message.text`` was a dict ``{"content": "..."}`` + * >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose + ``__str__`` returns ``"TextContent(content=...)"`` — never fall + back to ``str(text)`` without extracting ``.content`` first. + * rich text moved from ``message.rich_text`` (list) to + ``message.rich_text_content.rich_text_list`` (list of dicts). + """ text = getattr(message, "text", None) or "" - if isinstance(text, dict): + + # Handle TextContent object (SDK style) + if hasattr(text, "content"): + content = (text.content or "").strip() + elif isinstance(text, dict): content = text.get("content", "").strip() else: content = str(text).strip() - # Fall back to rich text if present if not content: - rich_text = getattr(message, "rich_text", None) - if rich_text and isinstance(rich_text, list): - parts = [item["text"] for item in rich_text - if isinstance(item, dict) and item.get("text")] - content = " ".join(parts).strip() + rich_text = getattr(message, "rich_text_content", None) or getattr( + message, "rich_text", None + ) + if rich_text: + rich_list = getattr(rich_text, "rich_text_list", None) or rich_text + if isinstance(rich_list, list): + parts = [] + for item in rich_list: + if isinstance(item, dict): + t = item.get("text") or item.get("content") or "" + if t: + parts.append(t) + elif hasattr(item, "text") and item.text: + parts.append(item.text) + content = " ".join(parts).strip() + + # Do NOT strip "@bot" from the text. The mention is a routing + # signal (delivered structurally via callback `isInAtList`), and + # regex-stripping @handles would collateral-damage e-mails + # (alice@example.com), SSH URLs (git@github.com), and literal + # references the user wrote ("what does @openai think"). Let the + # LLM see the raw text — it handles "@bot hello" cleanly. return content + def _extract_media(self, message: "ChatbotMessage"): + """Extract media info from message. Returns (MessageType, [urls], [mime_types]).""" + msg_type = MessageType.TEXT + media_urls = [] + media_types = [] + + # Check for image/picture + image_content = getattr(message, "image_content", None) + if image_content: + download_code = getattr(image_content, "download_code", None) + if download_code: + media_urls.append(download_code) + media_types.append("image") + msg_type = MessageType.PHOTO + + # Check for rich text with mixed content + rich_text = getattr(message, "rich_text_content", None) or getattr( + message, "rich_text", None + ) + if rich_text: + rich_list = getattr(rich_text, "rich_text_list", None) or rich_text + if isinstance(rich_list, list): + for item in rich_list: + if isinstance(item, dict): + dl_code = ( + item.get("downloadCode") or item.get("download_code") or "" + ) + item_type = item.get("type", "") + if dl_code: + mapped = DINGTALK_TYPE_MAPPING.get(item_type, "file") + media_urls.append(dl_code) + if mapped == "image": + media_types.append("image") + if msg_type == MessageType.TEXT: + msg_type = MessageType.PHOTO + elif mapped == "audio": + media_types.append("audio") + if msg_type == MessageType.TEXT: + msg_type = MessageType.AUDIO + elif mapped == "video": + media_types.append("video") + if msg_type == MessageType.TEXT: + msg_type = MessageType.VIDEO + else: + media_types.append("application/octet-stream") + if msg_type == MessageType.TEXT: + msg_type = MessageType.DOCUMENT + + msg_type_str = getattr(message, "message_type", "") or "" + if msg_type_str == "picture" and not media_urls: + msg_type = MessageType.PHOTO + elif msg_type_str == "richText": + msg_type = ( + MessageType.PHOTO + if any("image" in t for t in media_types) + else MessageType.TEXT + ) + + return msg_type, media_urls, media_types + # -- Outbound messaging ------------------------------------------------- async def send( @@ -265,29 +757,101 @@ class DingTalkAdapter(BasePlatformAdapter): ) -> SendResult: """Send a markdown reply via DingTalk session webhook.""" metadata = metadata or {} + logger.debug( + "[%s] send() chat_id=%s card_enabled=%s", + self.name, + chat_id, + bool(self._card_template_id and self._card_sdk), + ) - session_webhook = metadata.get("session_webhook") or self._session_webhooks.get(chat_id) + # Check metadata first (for direct webhook sends) + session_webhook = metadata.get("session_webhook") if not session_webhook: - return SendResult(success=False, - error="No session_webhook available. Reply must follow an incoming message.") + webhook_info = self._get_valid_webhook(chat_id) + if not webhook_info: + logger.warning( + "[%s] No valid session_webhook for chat_id=%s", + self.name, chat_id, + ) + return SendResult( + success=False, + error="No valid session_webhook available. Reply must follow an incoming message.", + ) + session_webhook, _ = webhook_info if not self._http_client: return SendResult(success=False, error="HTTP client not initialized") + # Look up the inbound message for this chat (for AI Card routing) + current_message = self._message_contexts.get(chat_id) + + # ``reply_to`` is the signal that this send is the FINAL response + # to an inbound user message — only `base.py:_send_with_retry` sets + # it. Tool-progress, commentary, and stream-consumer first-sends + # all leave it None. We use it for two orthogonal decisions: + # 1. finalize on create? Yes if final reply, No if intermediate + # (intermediate cards stay in streaming state so edit_message + # updates don't flicker closed→streaming→closed repeatedly). + # 2. fire Done reaction? Only when this is the final reply. + is_final_reply = reply_to is not None + + # Try AI Card first (using alibabacloud_dingtalk.card_1_0 SDK). + if self._card_template_id and current_message and self._card_sdk: + # Close any previously-open streaming cards for this chat + # before creating a new one (handles tool-progress → final- + # response handoff; also cleans up lingering commentary cards). + await self._close_streaming_siblings(chat_id) + + result = await self._create_and_stream_card( + chat_id, current_message, content, + finalize=is_final_reply, + ) + if result and result.success: + if is_final_reply: + # Final reply: card closed, swap Thinking → Done. + self._fire_done_reaction(chat_id) + else: + # Intermediate (tool progress / commentary / streaming + # first chunk): keep the card open and track it so the + # next send() auto-closes it as a sibling, or + # edit_message(finalize=True) closes it explicitly. + self._streaming_cards.setdefault(chat_id, {})[ + result.message_id + ] = content + return result + + logger.warning("[%s] AI Card send failed, falling back to webhook", self.name) + + logger.debug("[%s] Sending via webhook", self.name) + # Normalize markdown for DingTalk + normalized = self._normalize_markdown(content[: self.MAX_MESSAGE_LENGTH]) + payload = { "msgtype": "markdown", - "markdown": {"title": "Hermes", "text": content[:self.MAX_MESSAGE_LENGTH]}, + "markdown": {"title": "Hermes", "text": normalized}, } try: - resp = await self._http_client.post(session_webhook, json=payload, timeout=15.0) + resp = await self._http_client.post( + session_webhook, json=payload, timeout=15.0 + ) if resp.status_code < 300: + # Webhook path: fire Done only for final replies, same as + # the card path. + if is_final_reply: + self._fire_done_reaction(chat_id) return SendResult(success=True, message_id=uuid.uuid4().hex[:12]) body = resp.text - logger.warning("[%s] Send failed HTTP %d: %s", self.name, resp.status_code, body[:200]) - return SendResult(success=False, error=f"HTTP {resp.status_code}: {body[:200]}") + logger.warning( + "[%s] Send failed HTTP %d: %s", self.name, resp.status_code, body[:200] + ) + return SendResult( + success=False, error=f"HTTP {resp.status_code}: {body[:200]}" + ) except httpx.TimeoutException: - return SendResult(success=False, error="Timeout sending message to DingTalk") + return SendResult( + success=False, error="Timeout sending message to DingTalk" + ) except Exception as e: logger.error("[%s] Send error: %s", self.name, e) return SendResult(success=False, error=str(e)) @@ -298,36 +862,501 @@ class DingTalkAdapter(BasePlatformAdapter): async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: """Return basic info about a DingTalk conversation.""" - return {"name": chat_id, "type": "group" if "group" in chat_id.lower() else "dm"} + return { + "name": chat_id, + "type": "group" if "group" in chat_id.lower() else "dm", + } + + def _get_valid_webhook(self, chat_id: str) -> Optional[tuple[str, int]]: + """Get a valid (non-expired) session webhook for the given chat_id.""" + info = self._session_webhooks.get(chat_id) + if not info: + return None + webhook, expired_time_ms = info + # Check expiry with 5-minute safety margin + if expired_time_ms and expired_time_ms > 0: + now_ms = int(datetime.now(tz=timezone.utc).timestamp() * 1000) + safety_margin_ms = 5 * 60 * 1000 + if now_ms + safety_margin_ms >= expired_time_ms: + # Expired, remove from cache + self._session_webhooks.pop(chat_id, None) + return None + return info + + async def _create_and_stream_card( + self, + chat_id: str, + message: Any, + content: str, + *, + finalize: bool = True, + ) -> Optional[SendResult]: + """Create an AI Card, deliver it to the conversation, and stream initial content. + + Always called with ``finalize=True`` from ``send()`` (closed state). + If the caller later issues ``edit_message(finalize=False)``, the + DingTalk streaming_update API reopens the card into streaming + state, and we track that in ``_streaming_cards`` for sibling + cleanup on the next send. + """ + try: + token = await self._get_access_token() + if not token: + return None + + out_track_id = f"hermes_{uuid.uuid4().hex[:12]}" + + conversation_id = getattr(message, "conversation_id", "") or "" + conversation_type = getattr(message, "conversation_type", "1") + is_group = str(conversation_type) == "2" + sender_staff_id = getattr(message, "sender_staff_id", "") or "" + + runtime = tea_util_models.RuntimeOptions() + + # Step 1: Create card with STREAM callback type + create_request = dingtalk_card_models.CreateCardRequest( + card_template_id=self._card_template_id, + out_track_id=out_track_id, + card_data=dingtalk_card_models.CreateCardRequestCardData( + card_param_map={"content": ""}, + ), + callback_type="STREAM", + im_group_open_space_model=( + dingtalk_card_models.CreateCardRequestImGroupOpenSpaceModel( + support_forward=True, + ) + ), + im_robot_open_space_model=( + dingtalk_card_models.CreateCardRequestImRobotOpenSpaceModel( + support_forward=True, + ) + ), + ) + + create_headers = dingtalk_card_models.CreateCardHeaders( + x_acs_dingtalk_access_token=token, + ) + + await self._card_sdk.create_card_with_options_async( + create_request, create_headers, runtime + ) + + # Step 2: Deliver card to the conversation + if is_group: + open_space_id = f"dtv1.card//IM_GROUP.{conversation_id}" + deliver_request = dingtalk_card_models.DeliverCardRequest( + out_track_id=out_track_id, + user_id_type=1, + open_space_id=open_space_id, + im_group_open_deliver_model=( + dingtalk_card_models.DeliverCardRequestImGroupOpenDeliverModel( + robot_code=self._robot_code, + ) + ), + ) + else: + if not sender_staff_id: + logger.warning( + "[%s] AI Card skipped: missing sender_staff_id for DM", + self.name, + ) + return None + open_space_id = f"dtv1.card//IM_ROBOT.{sender_staff_id}" + deliver_request = dingtalk_card_models.DeliverCardRequest( + out_track_id=out_track_id, + user_id_type=1, + open_space_id=open_space_id, + im_robot_open_deliver_model=( + dingtalk_card_models.DeliverCardRequestImRobotOpenDeliverModel( + space_type="IM_ROBOT", + ) + ), + ) + + deliver_headers = dingtalk_card_models.DeliverCardHeaders( + x_acs_dingtalk_access_token=token, + ) + + await self._card_sdk.deliver_card_with_options_async( + deliver_request, deliver_headers, runtime + ) + + # Step 3: Stream initial content. finalize=True closes the + # card immediately (one-shot); finalize=False keeps it open + # for streaming edit_message updates by out_track_id. + await self._stream_card_content( + out_track_id, token, content, finalize=finalize, + ) + + logger.info( + "[%s] AI Card %s: %s", + self.name, + "created+finalized" if finalize else "created (streaming)", + out_track_id, + ) + return SendResult(success=True, message_id=out_track_id) + + except Exception as e: + logger.warning( + "[%s] AI Card create failed: %s\n%s", + self.name, e, traceback.format_exc(), + ) + return None + + async def edit_message( + self, + chat_id: str, + message_id: str, + content: str, + *, + finalize: bool = False, + ) -> SendResult: + """Edit an AI Card by streaming updated content. + + ``message_id`` is the out_track_id returned by the initial ``send()`` + call that created this card. Callers (stream_consumer, tool + progress) track their own ids independently so two parallel flows + on the same chat_id don't interfere. + """ + if not message_id: + return SendResult(success=False, error="message_id required") + token = await self._get_access_token() + if not token: + return SendResult(success=False, error="No access token") + + try: + await self._stream_card_content( + message_id, token, content, finalize=finalize, + ) + if finalize: + # Remove from streaming-cards tracking and fire Done. This + # is the canonical "response ended" signal from stream + # consumer's final edit. + self._streaming_cards.get(chat_id, {}).pop(message_id, None) + if not self._streaming_cards.get(chat_id): + self._streaming_cards.pop(chat_id, None) + logger.debug( + "[%s] AI Card finalized (edit): %s", + self.name, message_id, + ) + self._fire_done_reaction(chat_id) + else: + # Non-final edit reopens the card into streaming state — + # track it so the next send() can auto-close it as a + # sibling. + self._streaming_cards.setdefault(chat_id, {})[message_id] = content + return SendResult(success=True, message_id=message_id) + except Exception as e: + logger.warning("[%s] Card edit failed: %s", self.name, e) + return SendResult(success=False, error=str(e)) + + async def _stream_card_content( + self, + out_track_id: str, + token: str, + content: str, + finalize: bool = False, + ) -> None: + """Stream content to an existing AI Card.""" + stream_request = dingtalk_card_models.StreamingUpdateRequest( + out_track_id=out_track_id, + guid=str(uuid.uuid4()), + key="content", + content=content[: self.MAX_MESSAGE_LENGTH], + is_full=True, + is_finalize=finalize, + is_error=False, + ) + + stream_headers = dingtalk_card_models.StreamingUpdateHeaders( + x_acs_dingtalk_access_token=token, + ) + + runtime = tea_util_models.RuntimeOptions() + await self._card_sdk.streaming_update_with_options_async( + stream_request, stream_headers, runtime + ) + + async def _get_access_token(self) -> Optional[str]: + """Get access token using SDK's cached token.""" + if not self._stream_client: + return None + try: + # SDK's get_access_token is sync and uses requests + token = await asyncio.to_thread(self._stream_client.get_access_token) + return token + except Exception as e: + logger.error("[%s] Failed to get access token: %s", self.name, e) + return None + + async def _send_emotion( + self, + open_msg_id: str, + open_conversation_id: str, + emoji_name: str, + *, + recall: bool = False, + ) -> None: + """Add or recall an emoji reaction on a message.""" + if not self._robot_sdk or not open_msg_id or not open_conversation_id: + return + action = "recall" if recall else "reply" + try: + token = await self._get_access_token() + if not token: + return + + emotion_kwargs = { + "robot_code": self._robot_code, + "open_msg_id": open_msg_id, + "open_conversation_id": open_conversation_id, + "emotion_type": 2, + "emotion_name": emoji_name, + } + runtime = tea_util_models.RuntimeOptions() + + if recall: + emotion_kwargs["text_emotion"] = ( + dingtalk_robot_models.RobotRecallEmotionRequestTextEmotion( + emotion_id="2659900", + emotion_name=emoji_name, + text=emoji_name, + background_id="im_bg_1", + ) + ) + request = dingtalk_robot_models.RobotRecallEmotionRequest( + **emotion_kwargs, + ) + sdk_headers = dingtalk_robot_models.RobotRecallEmotionHeaders( + x_acs_dingtalk_access_token=token, + ) + await self._robot_sdk.robot_recall_emotion_with_options_async( + request, sdk_headers, runtime + ) + else: + emotion_kwargs["text_emotion"] = ( + dingtalk_robot_models.RobotReplyEmotionRequestTextEmotion( + emotion_id="2659900", + emotion_name=emoji_name, + text=emoji_name, + background_id="im_bg_1", + ) + ) + request = dingtalk_robot_models.RobotReplyEmotionRequest( + **emotion_kwargs, + ) + sdk_headers = dingtalk_robot_models.RobotReplyEmotionHeaders( + x_acs_dingtalk_access_token=token, + ) + await self._robot_sdk.robot_reply_emotion_with_options_async( + request, sdk_headers, runtime + ) + logger.info( + "[%s] _send_emotion: %s %s on msg=%s", + self.name, action, emoji_name, open_msg_id[:24], + ) + except Exception: + logger.debug( + "[%s] _send_emotion %s failed", self.name, action, exc_info=True + ) + + async def _resolve_media_codes(self, message: "ChatbotMessage") -> None: + """Resolve download codes in message to actual URLs.""" + token = await self._get_access_token() + if not token: + return + + robot_code = getattr(message, "robot_code", None) or self._client_id + codes_to_resolve = [] + + # Collect codes and references to update + # 1. Single image content + img_content = getattr(message, "image_content", None) + if img_content and getattr(img_content, "download_code", None): + codes_to_resolve.append((img_content, "download_code")) + + # 2. Rich text list + rich_text = getattr(message, "rich_text_content", None) + if rich_text: + rich_list = getattr(rich_text, "rich_text_list", []) or [] + for item in rich_list: + if isinstance(item, dict): + for key in ("downloadCode", "pictureDownloadCode", "download_code"): + if item.get(key): + codes_to_resolve.append((item, key)) + + if not codes_to_resolve: + return + + # Resolve all codes in parallel + tasks = [] + for obj, key in codes_to_resolve: + code = getattr(obj, key, None) if hasattr(obj, key) else obj.get(key) + if code: + tasks.append( + self._fetch_download_url(code, robot_code, token, obj, key) + ) + + await asyncio.gather(*tasks, return_exceptions=True) + + async def _fetch_download_url( + self, code: str, robot_code: str, token: str, obj, key: str + ) -> None: + """Fetch download URL for a single code using the robot SDK.""" + if not self._robot_sdk: + logger.warning( + "[%s] Robot SDK not initialized, cannot resolve media code", + self.name, + ) + return + try: + request = dingtalk_robot_models.RobotMessageFileDownloadRequest( + download_code=code, + robot_code=robot_code, + ) + headers = dingtalk_robot_models.RobotMessageFileDownloadHeaders( + x_acs_dingtalk_access_token=token, + ) + runtime = tea_util_models.RuntimeOptions() + response = await self._robot_sdk.robot_message_file_download_with_options_async( + request, headers, runtime + ) + body = response.body if response else None + if body: + url = getattr(body, "download_url", None) + if url: + if hasattr(obj, key): + setattr(obj, key, url) + elif isinstance(obj, dict): + obj[key] = url + else: + logger.warning( + "[%s] Failed to download media: empty response for code %s", + self.name, + code, + ) + except Exception as e: + logger.error("[%s] Error resolving media code %s: %s", self.name, code, e) + + @staticmethod + def _normalize_markdown(text: str) -> str: + """Normalize markdown for DingTalk's parser. + + DingTalk's markdown renderer has quirks: + - Numbered lists need blank line before them + - Indented code blocks may render incorrectly + """ + lines = text.split("\n") + out = [] + for i, line in enumerate(lines): + # Ensure blank line before numbered list items + is_numbered = re.match(r"^\d+\.\s", line.strip()) + if is_numbered and i > 0: + prev = lines[i - 1] + if prev.strip() and not re.match(r"^\d+\.\s", prev.strip()): + out.append("") + # Dedent fenced code blocks + if line.strip().startswith("```") and line != line.lstrip(): + indent = len(line) - len(line.lstrip()) + line = line[indent:] + out.append(line) + return "\n".join(out) # --------------------------------------------------------------------------- # Internal stream handler # --------------------------------------------------------------------------- -class _IncomingHandler(ChatbotHandler if DINGTALK_STREAM_AVAILABLE else object): - """dingtalk-stream ChatbotHandler that forwards messages to the adapter.""" - def __init__(self, adapter: DingTalkAdapter, loop: asyncio.AbstractEventLoop): +class _IncomingHandler( + dingtalk_stream.ChatbotHandler if DINGTALK_STREAM_AVAILABLE else object +): + """dingtalk-stream ChatbotHandler that forwards messages to the adapter. + + SDK >= 0.20 changed process() from sync to async, and the message + parameter from ChatbotMessage to CallbackMessage. We parse the + CallbackMessage.data dict into a ChatbotMessage before forwarding. + """ + + def __init__(self, adapter: DingTalkAdapter, loop: Optional[asyncio.AbstractEventLoop] = None): if DINGTALK_STREAM_AVAILABLE: super().__init__() self._adapter = adapter self._loop = loop - def process(self, message: "ChatbotMessage"): - """Called by dingtalk-stream in its thread when a message arrives. + async def process(self, message: "CallbackMessage"): + """Called by dingtalk-stream (>=0.20) when a message arrives. - Schedules the async handler on the main event loop. + dingtalk-stream >= 0.24 passes a CallbackMessage whose ``.data`` contains + the chatbot payload. Convert it to ChatbotMessage via + ``ChatbotMessage.from_dict()``. + + Message processing is dispatched as a background task so that this + method returns the ACK immediately — blocking here would prevent the + SDK from sending heartbeats, eventually causing a disconnect. """ - loop = self._loop - if loop is None or loop.is_closed(): - logger.error("[DingTalk] Event loop unavailable, cannot dispatch message") - return dingtalk_stream.AckMessage.STATUS_OK, "OK" - - future = asyncio.run_coroutine_threadsafe(self._adapter._on_message(message), loop) try: - future.result(timeout=60) - except Exception: - logger.exception("[DingTalk] Error processing incoming message") + # CallbackMessage.data is a dict containing the raw DingTalk payload + data = message.data + if isinstance(data, str): + data = json.loads(data) - return dingtalk_stream.AckMessage.STATUS_OK, "OK" + # Parse dict into ChatbotMessage using SDK's from_dict + chatbot_msg = ChatbotMessage.from_dict(data) + + # Ensure session_webhook is populated even if the SDK's + # from_dict() did not map it (field name mismatch across + # SDK versions). + if not getattr(chatbot_msg, "session_webhook", None): + webhook = ( + data.get("sessionWebhook") + or data.get("session_webhook") + or "" + ) if isinstance(data, dict) else "" + if webhook: + chatbot_msg.session_webhook = webhook + + # Ensure is_in_at_list is populated from the structured callback + # flag even if from_dict() did not map it. DingTalk sends + # ``isInAtList`` in the raw payload; the adapter's mention check + # reads the ChatbotMessage attribute ``is_in_at_list``. + if not getattr(chatbot_msg, "is_in_at_list", False): + raw_flag = ( + data.get("isInAtList") if isinstance(data, dict) else False + ) + if raw_flag: + chatbot_msg.is_in_at_list = True + + msg_id = getattr(chatbot_msg, "message_id", None) or "" + conversation_id = getattr(chatbot_msg, "conversation_id", None) or "" + + # Thinking reaction — fire-and-forget, tracked + if msg_id and conversation_id: + self._adapter._spawn_bg( + self._adapter._send_emotion( + msg_id, conversation_id, "🤔Thinking", recall=False, + ) + ) + + # Fire-and-forget: return ACK immediately, process in background. + # Blocking here would prevent the SDK from sending heartbeats, + # eventually causing a disconnect. _on_message is wrapped so + # exceptions inside the task surface in logs instead of + # disappearing into the event loop. + asyncio.create_task(self._safe_on_message(chatbot_msg)) + except Exception: + logger.exception( + "[%s] Error preparing incoming message", self._adapter.name + ) + return AckMessage.STATUS_SYSTEM_EXCEPTION, "error" + + return AckMessage.STATUS_OK, "OK" + + async def _safe_on_message(self, chatbot_msg: "ChatbotMessage") -> None: + """Wrapper that catches exceptions from _on_message.""" + try: + await self._adapter._on_message(chatbot_msg) + except Exception: + logger.exception( + "[%s] Error processing incoming message", self._adapter.name + ) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 0adee9eb6c..660ed46dd8 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -51,7 +51,9 @@ from gateway.platforms.base import ( ProcessingOutcome, SendResult, cache_image_from_url, + cache_image_from_bytes, cache_audio_from_url, + cache_audio_from_bytes, cache_document_from_bytes, SUPPORTED_DOCUMENT_TYPES, ) @@ -80,6 +82,41 @@ def check_discord_requirements() -> bool: return DISCORD_AVAILABLE +def _build_allowed_mentions(): + """Build Discord ``AllowedMentions`` with safe defaults, overridable via env. + + Discord bots default to parsing ``@everyone``, ``@here``, role pings, and + user pings when ``allowed_mentions`` is unset on the client — any LLM + output or echoed user content that contains ``@everyone`` would therefore + ping the whole server. We explicitly deny ``@everyone`` and role pings + by default and keep user / replied-user pings enabled so normal + conversation still works. + + Override via environment variables (or ``discord.allow_mentions.*`` in + config.yaml): + + DISCORD_ALLOW_MENTION_EVERYONE default false — @everyone + @here + DISCORD_ALLOW_MENTION_ROLES default false — @role pings + DISCORD_ALLOW_MENTION_USERS default true — @user pings + DISCORD_ALLOW_MENTION_REPLIED_USER default true — reply-ping author + """ + if not DISCORD_AVAILABLE: + return None + + def _b(name: str, default: bool) -> bool: + raw = os.getenv(name, "").strip().lower() + if not raw: + return default + return raw in ("true", "1", "yes", "on") + + return discord.AllowedMentions( + everyone=_b("DISCORD_ALLOW_MENTION_EVERYONE", False), + roles=_b("DISCORD_ALLOW_MENTION_ROLES", False), + users=_b("DISCORD_ALLOW_MENTION_USERS", True), + replied_user=_b("DISCORD_ALLOW_MENTION_REPLIED_USER", True), + ) + + class VoiceReceiver: """Captures and decodes voice audio from a Discord voice channel. @@ -235,6 +272,7 @@ class VoiceReceiver: # Calculate dynamic RTP header size (RFC 9335 / rtpsize mode) cc = first_byte & 0x0F # CSRC count has_extension = bool(first_byte & 0x10) # extension bit + has_padding = bool(first_byte & 0x20) # padding bit (RFC 3550 §5.1) header_size = 12 + (4 * cc) + (4 if has_extension else 0) if len(data) < header_size + 4: # need at least header + nonce @@ -278,6 +316,31 @@ class VoiceReceiver: if ext_data_len and len(decrypted) > ext_data_len: decrypted = decrypted[ext_data_len:] + # --- Strip RTP padding (RFC 3550 §5.1) --- + # When the P bit is set, the last payload byte holds the count of + # trailing padding bytes (including itself) that must be removed + # before further processing. Skipping this passes padding-contaminated + # bytes into DAVE/Opus and corrupts inbound audio. + if has_padding: + if not decrypted: + if self._packet_debug_count <= 10: + logger.warning( + "RTP padding bit set but no payload (ssrc=%d)", ssrc, + ) + return + pad_len = decrypted[-1] + if pad_len == 0 or pad_len > len(decrypted): + if self._packet_debug_count <= 10: + logger.warning( + "Invalid RTP padding length %d for payload size %d (ssrc=%d)", + pad_len, len(decrypted), ssrc, + ) + return + decrypted = decrypted[:-pad_len] + if not decrypted: + # Padding consumed entire payload — nothing to decode + return + # --- DAVE E2EE decrypt --- if self._dave_session: with self._lock: @@ -432,8 +495,10 @@ class DiscordAdapter(BasePlatformAdapter): self._client: Optional[commands.Bot] = None self._ready_event = asyncio.Event() self._allowed_user_ids: set = set() # For button approval authorization + self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering # Voice channel state (per-guild) self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient + self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave # Text batching: merge rapid successive messages (Telegram-style) self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6")) self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) @@ -510,6 +575,15 @@ class DiscordAdapter(BasePlatformAdapter): if uid.strip() } + # Parse DISCORD_ALLOWED_ROLES — comma-separated role IDs. + # Users with ANY of these roles can interact with the bot. + roles_env = os.getenv("DISCORD_ALLOWED_ROLES", "") + if roles_env: + self._allowed_role_ids = { + int(rid.strip()) for rid in roles_env.split(",") + if rid.strip().isdigit() + } + # Set up intents. # Message Content is required for normal text replies. # Server Members is only needed when the allowlist contains usernames @@ -521,7 +595,10 @@ class DiscordAdapter(BasePlatformAdapter): intents.message_content = True intents.dm_messages = True intents.guild_messages = True - intents.members = any(not entry.isdigit() for entry in self._allowed_user_ids) + intents.members = ( + any(not entry.isdigit() for entry in self._allowed_user_ids) + or bool(self._allowed_role_ids) # Need members intent for role lookup + ) intents.voice_states = True # Resolve proxy (DISCORD_PROXY > generic env vars > macOS system proxy) @@ -530,10 +607,15 @@ class DiscordAdapter(BasePlatformAdapter): if proxy_url: logger.info("[%s] Using proxy for Discord: %s", self.name, proxy_url) - # Create bot — proxy= for HTTP, connector= for SOCKS + # Create bot — proxy= for HTTP, connector= for SOCKS. + # allowed_mentions is set with safe defaults (no @everyone/roles) + # so LLM output or echoed user content can't ping the whole + # server; override per DISCORD_ALLOW_MENTION_* env vars or the + # discord.allow_mentions.* block in config.yaml. self._client = commands.Bot( command_prefix="!", # Not really used, we handle raw messages intents=intents, + allowed_mentions=_build_allowed_mentions(), **proxy_kwargs_for_bot(proxy_url), ) adapter_self = self # capture for closure @@ -555,6 +637,15 @@ class DiscordAdapter(BasePlatformAdapter): @self._client.event async def on_message(message: DiscordMessage): + # Block until _resolve_allowed_usernames has swapped + # any raw usernames in DISCORD_ALLOWED_USERS for numeric + # IDs (otherwise on_message's author.id lookup can miss). + if not adapter_self._ready_event.is_set(): + try: + await asyncio.wait_for(adapter_self._ready_event.wait(), timeout=30.0) + except asyncio.TimeoutError: + pass + # Dedup: Discord RESUME replays events after reconnects (#4777) if adapter_self._dedup.is_duplicate(str(message.id)): return @@ -568,14 +659,13 @@ class DiscordAdapter(BasePlatformAdapter): if message.type not in (discord.MessageType.default, discord.MessageType.reply): return - # Check if the message author is in the allowed user list - if not self._is_allowed_user(str(message.author.id)): - return - # Bot message filtering (DISCORD_ALLOW_BOTS): # "none" — ignore all other bots (default) # "mentions" — accept bot messages only when they @mention us # "all" — accept all bot messages + # Must run BEFORE the user allowlist check so that bots + # permitted by DISCORD_ALLOW_BOTS are not rejected for + # not being in DISCORD_ALLOWED_USERS (fixes #4466). if getattr(message.author, "bot", False): allow_bots = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip() if allow_bots == "none": @@ -583,7 +673,12 @@ class DiscordAdapter(BasePlatformAdapter): elif allow_bots == "mentions": if not self._client.user or self._client.user not in message.mentions: return - # "all" falls through to handle_message + # "all" falls through; bot is permitted — skip the + # human-user allowlist below (bots aren't in it). + else: + # Non-bot: enforce the configured user/role allowlists. + if not self._is_allowed_user(str(message.author.id), message.author): + return # Multi-agent filtering: if the message mentions specific bots # but NOT this bot, the sender is talking to another agent — @@ -772,6 +867,9 @@ class DiscordAdapter(BasePlatformAdapter): When metadata contains a thread_id, the message is sent to that thread instead of the parent channel identified by chat_id. + + Forum channels (type 15) reject direct messages — a thread post is + created automatically. """ if not self._client: return SendResult(success=False, error="Not connected") @@ -797,6 +895,10 @@ class DiscordAdapter(BasePlatformAdapter): if not channel: return SendResult(success=False, error=f"Channel {chat_id} not found") + # Forum channels reject channel.send() — create a thread post instead. + if self._is_forum_parent(channel): + return await self._send_to_forum(channel, content) + # Format and split message if needed formatted = self.format_message(content) chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH) @@ -807,7 +909,10 @@ class DiscordAdapter(BasePlatformAdapter): if reply_to and self._reply_to_mode != "off": try: ref_msg = await channel.fetch_message(int(reply_to)) - reference = ref_msg + if hasattr(ref_msg, "to_reference"): + reference = ref_msg.to_reference(fail_if_not_exists=False) + else: + reference = ref_msg except Exception as e: logger.debug("Could not fetch reply-to message: %s", e) @@ -825,14 +930,20 @@ class DiscordAdapter(BasePlatformAdapter): err_text = str(e) if ( chunk_reference is not None - and "error code: 50035" in err_text - and "Cannot reply to a system message" in err_text + and ( + ( + "error code: 50035" in err_text + and "Cannot reply to a system message" in err_text + ) + or "error code: 10008" in err_text + ) ): logger.warning( - "[%s] Reply target %s is a Discord system message; retrying send without reply reference", + "[%s] Reply target %s rejected the reply reference; retrying send without reply reference", self.name, reply_to, ) + reference = None msg = await channel.send( content=chunk, reference=None, @@ -851,11 +962,127 @@ class DiscordAdapter(BasePlatformAdapter): logger.error("[%s] Failed to send Discord message: %s", self.name, e, exc_info=True) return SendResult(success=False, error=str(e)) + async def _send_to_forum(self, forum_channel: Any, content: str) -> SendResult: + """Create a thread post in a forum channel with the message as starter content. + + Forum channels (type 15) don't support direct messages. Instead we + POST to /channels/{forum_id}/threads with a thread name derived from + the first line of the message. Any follow-up chunk failures are + reported in ``raw_response['warnings']`` so the caller can surface + partial-send issues. + """ + from tools.send_message_tool import _derive_forum_thread_name + + formatted = self.format_message(content) + chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH) + + thread_name = _derive_forum_thread_name(content) + + starter_content = chunks[0] if chunks else thread_name + + try: + thread = await forum_channel.create_thread( + name=thread_name, + content=starter_content, + ) + except Exception as e: + logger.error("[%s] Failed to create forum thread in %s: %s", self.name, forum_channel.id, e) + return SendResult(success=False, error=f"Forum thread creation failed: {e}") + + thread_channel = thread if hasattr(thread, "send") else getattr(thread, "thread", None) + thread_id = str(getattr(thread_channel, "id", getattr(thread, "id", ""))) + starter_msg = getattr(thread, "message", None) + message_id = str(getattr(starter_msg, "id", thread_id)) if starter_msg else thread_id + + # Send remaining chunks into the newly created thread. Track any + # per-chunk failures so the caller sees partial-send outcomes. + message_ids = [message_id] + warnings: list[str] = [] + for chunk in chunks[1:]: + try: + msg = await thread_channel.send(content=chunk) + message_ids.append(str(msg.id)) + except Exception as e: + warning = f"Failed to send follow-up chunk to forum thread {thread_id}: {e}" + logger.warning("[%s] %s", self.name, warning) + warnings.append(warning) + + raw_response: Dict[str, Any] = {"message_ids": message_ids, "thread_id": thread_id} + if warnings: + raw_response["warnings"] = warnings + + return SendResult( + success=True, + message_id=message_ids[0], + raw_response=raw_response, + ) + + async def _forum_post_file( + self, + forum_channel: Any, + *, + thread_name: Optional[str] = None, + content: str = "", + file: Any = None, + files: Optional[list] = None, + ) -> SendResult: + """Create a forum thread whose starter message carries file attachments. + + Used by the send_voice / send_image_file / send_document paths when + the target channel is a forum (type 15). ``create_thread`` on a + ForumChannel accepts the same file/files/content kwargs as + ``channel.send``, creating the thread and starter message atomically. + """ + from tools.send_message_tool import _derive_forum_thread_name + + if not thread_name: + # Prefer the text content, fall back to the first attached + # filename, fall back to the generic default. + hint = content or "" + if not hint.strip(): + if file is not None: + hint = getattr(file, "filename", "") or "" + elif files: + hint = getattr(files[0], "filename", "") or "" + thread_name = _derive_forum_thread_name(hint) if hint.strip() else "New Post" + + kwargs: Dict[str, Any] = {"name": thread_name} + if content: + kwargs["content"] = content + if file is not None: + kwargs["file"] = file + if files: + kwargs["files"] = files + + try: + thread = await forum_channel.create_thread(**kwargs) + except Exception as e: + logger.error( + "[%s] Failed to create forum thread with file in %s: %s", + self.name, + getattr(forum_channel, "id", "?"), + e, + ) + return SendResult(success=False, error=f"Forum thread creation failed: {e}") + + thread_channel = thread if hasattr(thread, "send") else getattr(thread, "thread", None) + thread_id = str(getattr(thread_channel, "id", getattr(thread, "id", ""))) + starter_msg = getattr(thread, "message", None) + message_id = str(getattr(starter_msg, "id", thread_id)) if starter_msg else thread_id + + return SendResult( + success=True, + message_id=message_id, + raw_response={"thread_id": thread_id}, + ) + async def edit_message( self, chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Discord message.""" if not self._client: @@ -881,7 +1108,11 @@ class DiscordAdapter(BasePlatformAdapter): caption: Optional[str] = None, file_name: Optional[str] = None, ) -> SendResult: - """Send a local file as a Discord attachment.""" + """Send a local file as a Discord attachment. + + Forum channels (type 15) get a new thread whose starter message + carries the file — they reject direct POST /messages. + """ if not self._client: return SendResult(success=False, error="Not connected") @@ -894,6 +1125,12 @@ class DiscordAdapter(BasePlatformAdapter): filename = file_name or os.path.basename(file_path) with open(file_path, "rb") as fh: file = discord.File(fh, filename=filename) + if self._is_forum_parent(channel): + return await self._forum_post_file( + channel, + content=(caption or "").strip(), + file=file, + ) msg = await channel.send(content=caption if caption else None, file=file) return SendResult(success=True, message_id=str(msg.id)) @@ -942,6 +1179,18 @@ class DiscordAdapter(BasePlatformAdapter): with open(audio_path, "rb") as f: file_data = f.read() + # Forum channels (type 15) reject direct POST /messages — the + # native voice flag path also targets /messages so it would fail + # too. Create a thread post with the audio as the starter + # attachment instead. + if self._is_forum_parent(channel): + forum_file = discord.File(io.BytesIO(file_data), filename=filename) + return await self._forum_post_file( + channel, + content=(caption or "").strip(), + file=forum_file, + ) + # Try sending as a native voice message via raw API (flags=8192). try: import base64 @@ -1000,51 +1249,53 @@ class DiscordAdapter(BasePlatformAdapter): return False guild_id = channel.guild.id - # Already connected in this guild? - existing = self._voice_clients.get(guild_id) - if existing and existing.is_connected(): - if existing.channel.id == channel.id: + async with self._voice_locks.setdefault(guild_id, asyncio.Lock()): + # Already connected in this guild? + existing = self._voice_clients.get(guild_id) + if existing and existing.is_connected(): + if existing.channel.id == channel.id: + self._reset_voice_timeout(guild_id) + return True + await existing.move_to(channel) self._reset_voice_timeout(guild_id) return True - await existing.move_to(channel) + + vc = await channel.connect() + self._voice_clients[guild_id] = vc self._reset_voice_timeout(guild_id) + + # Start voice receiver (Phase 2: listen to users) + try: + receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids) + receiver.start() + self._voice_receivers[guild_id] = receiver + self._voice_listen_tasks[guild_id] = asyncio.ensure_future( + self._voice_listen_loop(guild_id) + ) + except Exception as e: + logger.warning("Voice receiver failed to start: %s", e) + return True - vc = await channel.connect() - self._voice_clients[guild_id] = vc - self._reset_voice_timeout(guild_id) - - # Start voice receiver (Phase 2: listen to users) - try: - receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids) - receiver.start() - self._voice_receivers[guild_id] = receiver - self._voice_listen_tasks[guild_id] = asyncio.ensure_future( - self._voice_listen_loop(guild_id) - ) - except Exception as e: - logger.warning("Voice receiver failed to start: %s", e) - - return True - async def leave_voice_channel(self, guild_id: int) -> None: """Disconnect from the voice channel in a guild.""" - # Stop voice receiver first - receiver = self._voice_receivers.pop(guild_id, None) - if receiver: - receiver.stop() - listen_task = self._voice_listen_tasks.pop(guild_id, None) - if listen_task: - listen_task.cancel() + async with self._voice_locks.setdefault(guild_id, asyncio.Lock()): + # Stop voice receiver first + receiver = self._voice_receivers.pop(guild_id, None) + if receiver: + receiver.stop() + listen_task = self._voice_listen_tasks.pop(guild_id, None) + if listen_task: + listen_task.cancel() - vc = self._voice_clients.pop(guild_id, None) - if vc and vc.is_connected(): - await vc.disconnect() - task = self._voice_timeout_tasks.pop(guild_id, None) - if task: - task.cancel() - self._voice_text_channels.pop(guild_id, None) - self._voice_sources.pop(guild_id, None) + vc = self._voice_clients.pop(guild_id, None) + if vc and vc.is_connected(): + await vc.disconnect() + task = self._voice_timeout_tasks.pop(guild_id, None) + if task: + task.cancel() + self._voice_text_channels.pop(guild_id, None) + self._voice_sources.pop(guild_id, None) # Maximum seconds to wait for voice playback before giving up PLAYBACK_TIMEOUT = 120 @@ -1284,11 +1535,48 @@ class DiscordAdapter(BasePlatformAdapter): except OSError: pass - def _is_allowed_user(self, user_id: str) -> bool: - """Check if user is in DISCORD_ALLOWED_USERS.""" - if not self._allowed_user_ids: + def _is_allowed_user(self, user_id: str, author=None) -> bool: + """Check if user is allowed via DISCORD_ALLOWED_USERS or DISCORD_ALLOWED_ROLES. + + Uses OR semantics: if the user matches EITHER allowlist, they're allowed. + If both allowlists are empty, everyone is allowed (backwards compatible). + When author is a Member, checks .roles directly; otherwise falls back + to scanning the bot's mutual guilds for a Member record. + """ + # ``getattr`` fallbacks here guard against test fixtures that build + # an adapter via ``object.__new__(DiscordAdapter)`` and skip __init__ + # (see AGENTS.md pitfall #17 — same pattern as gateway.run). + allowed_users = getattr(self, "_allowed_user_ids", set()) + allowed_roles = getattr(self, "_allowed_role_ids", set()) + has_users = bool(allowed_users) + has_roles = bool(allowed_roles) + if not has_users and not has_roles: return True - return user_id in self._allowed_user_ids + # Check user ID allowlist + if has_users and user_id in allowed_users: + return True + # Check role allowlist + if has_roles: + # Try direct role check from Member object + direct_roles = getattr(author, "roles", None) if author is not None else None + if direct_roles: + if any(getattr(r, "id", None) in allowed_roles for r in direct_roles): + return True + # Fallback: scan mutual guilds for member's roles + if self._client is not None: + try: + uid_int = int(user_id) + except (TypeError, ValueError): + uid_int = None + if uid_int is not None: + for guild in self._client.guilds: + m = guild.get_member(uid_int) + if m is None: + continue + m_roles = getattr(m, "roles", None) or [] + if any(getattr(r, "id", None) in allowed_roles for r in m_roles): + return True + return False async def send_image_file( self, @@ -1357,6 +1645,13 @@ class DiscordAdapter(BasePlatformAdapter): import io file = discord.File(io.BytesIO(image_data), filename=f"image.{ext}") + if self._is_forum_parent(channel): + return await self._forum_post_file( + channel, + content=(caption or "").strip(), + file=file, + ) + msg = await channel.send( content=caption if caption else None, file=file, @@ -1379,6 +1674,75 @@ class DiscordAdapter(BasePlatformAdapter): ) return await super().send_image(chat_id, image_url, caption, reply_to) + async def send_animation( + self, + chat_id: str, + animation_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send an animated GIF natively as a Discord file attachment.""" + if not self._client: + return SendResult(success=False, error="Not connected") + + if not is_safe_url(animation_url): + logger.warning("[%s] Blocked unsafe animation URL during Discord send_animation", self.name) + return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata) + + try: + import aiohttp + + channel = self._client.get_channel(int(chat_id)) + if not channel: + channel = await self._client.fetch_channel(int(chat_id)) + if not channel: + return SendResult(success=False, error=f"Channel {chat_id} not found") + + # Download the GIF and send as a Discord file attachment + # (Discord renders .gif attachments as auto-playing animations inline) + from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp + _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY") + _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy) + async with aiohttp.ClientSession(**_sess_kw) as session: + async with session.get(animation_url, timeout=aiohttp.ClientTimeout(total=30), **_req_kw) as resp: + if resp.status != 200: + raise Exception(f"Failed to download animation: HTTP {resp.status}") + + animation_data = await resp.read() + + import io + file = discord.File(io.BytesIO(animation_data), filename="animation.gif") + + if self._is_forum_parent(channel): + return await self._forum_post_file( + channel, + content=(caption or "").strip(), + file=file, + ) + + msg = await channel.send( + content=caption if caption else None, + file=file, + ) + return SendResult(success=True, message_id=str(msg.id)) + + except ImportError: + logger.warning( + "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp", + self.name, + exc_info=True, + ) + return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata) + except Exception as e: # pragma: no cover - defensive logging + logger.error( + "[%s] Failed to send animation attachment, falling back to URL: %s", + self.name, + e, + exc_info=True, + ) + return await super().send_animation(chat_id, animation_url, caption, reply_to, metadata=metadata) + async def send_video( self, chat_id: str, @@ -1583,6 +1947,24 @@ class DiscordAdapter(BasePlatformAdapter): the "thinking..." indicator is replaced with that text; otherwise it is deleted so the channel isn't cluttered. """ + # Log the invoker so ghost-command reports can be triaged. Discord + # native slash invocations are always user-initiated (no bot can fire + # them), but mobile autocomplete / keyboard shortcuts / other users + # in the same channel are easy to miss in post-mortems. + try: + _user = interaction.user + _chan_id = getattr(interaction.channel, "id", None) or getattr(interaction, "channel_id", None) + logger.info( + "[Discord] slash '%s' invoked by user=%s id=%s channel=%s guild=%s", + command_text, + getattr(_user, "name", "?"), + getattr(_user, "id", "?"), + _chan_id, + getattr(interaction, "guild_id", None), + ) + except Exception: + pass # logging must never block command dispatch + await interaction.response.defer(ephemeral=True) event = self._build_slash_event(interaction, command_text) await self.handle_message(event) @@ -1644,6 +2026,11 @@ class DiscordAdapter(BasePlatformAdapter): async def slash_stop(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/stop", "Stop requested~") + @tree.command(name="steer", description="Inject a message after the next tool call (no interrupt)") + @discord.app_commands.describe(prompt="Text to inject into the agent's next tool result") + async def slash_steer(interaction: discord.Interaction, prompt: str): + await self._run_simple_slash(interaction, f"/steer {prompt}".strip()) + @tree.command(name="compress", description="Compress conversation context") async def slash_compress(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/compress") @@ -1696,6 +2083,10 @@ class DiscordAdapter(BasePlatformAdapter): async def slash_update(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/update", "Update initiated~") + @tree.command(name="restart", description="Gracefully restart the Hermes gateway") + async def slash_restart(interaction: discord.Interaction): + await self._run_simple_slash(interaction, "/restart", "Restart requested~") + @tree.command(name="approve", description="Approve a pending dangerous command") @discord.app_commands.describe(scope="Optional: 'all', 'session', 'always', 'all session', 'all always'") async def slash_approve(interaction: discord.Interaction, scope: str = ""): @@ -1736,46 +2127,200 @@ class DiscordAdapter(BasePlatformAdapter): async def slash_btw(interaction: discord.Interaction, question: str): await self._run_simple_slash(interaction, f"/btw {question}") - # Register installed skills as native slash commands (parity with - # Telegram, which uses telegram_menu_commands() in commands.py). - # Discord allows up to 100 application commands globally. - _DISCORD_CMD_LIMIT = 100 + # ── Auto-register any gateway-available commands not yet on the tree ── + # This ensures new commands added to COMMAND_REGISTRY in + # hermes_cli/commands.py automatically appear as Discord slash + # commands without needing a manual entry here. try: - from hermes_cli.commands import discord_skill_commands + from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates - existing_names = {cmd.name for cmd in tree.get_commands()} - remaining_slots = max(0, _DISCORD_CMD_LIMIT - len(existing_names)) + already_registered = set() + try: + already_registered = {cmd.name for cmd in tree.get_commands()} + except Exception: + pass - skill_entries, skipped = discord_skill_commands( - max_slots=remaining_slots, - reserved_names=existing_names, - ) + config_overrides = _resolve_config_gates() - for discord_name, description, cmd_key in skill_entries: - # Closure factory to capture cmd_key per iteration - def _make_skill_handler(_key: str): - async def _skill_slash(interaction: discord.Interaction, args: str = ""): - await self._run_simple_slash(interaction, f"{_key} {args}".strip()) - return _skill_slash + for cmd_def in COMMAND_REGISTRY: + if not _is_gateway_available(cmd_def, config_overrides): + continue + # Discord command names: lowercase, hyphens OK, max 32 chars. + discord_name = cmd_def.name.lower()[:32] + if discord_name in already_registered: + continue + # Skip aliases that overlap with already-registered names + # (aliases for explicitly registered commands are handled above). + desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100] + has_args = bool(cmd_def.args_hint) - handler = _make_skill_handler(cmd_key) - handler.__name__ = f"skill_{discord_name.replace('-', '_')}" + if has_args: + # Command takes optional arguments — create handler with + # an optional ``args`` string parameter. + def _make_args_handler(_name: str, _hint: str): + @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100]) + async def _handler(interaction: discord.Interaction, args: str = ""): + await self._run_simple_slash( + interaction, f"/{_name} {args}".strip() + ) + _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}" + return _handler - cmd = discord.app_commands.Command( + handler = _make_args_handler(cmd_def.name, cmd_def.args_hint) + else: + # Parameterless command. + def _make_simple_handler(_name: str): + async def _handler(interaction: discord.Interaction): + await self._run_simple_slash(interaction, f"/{_name}") + _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}" + return _handler + + handler = _make_simple_handler(cmd_def.name) + + auto_cmd = discord.app_commands.Command( name=discord_name, - description=description, + description=desc, callback=handler, ) - discord.app_commands.describe(args="Optional arguments for the skill")(cmd) - tree.add_command(cmd) + try: + tree.add_command(auto_cmd) + already_registered.add(discord_name) + except Exception: + # Silently skip commands that fail registration (e.g. + # name conflict with a subcommand group). + pass - if skipped: - logger.warning( - "[%s] Discord slash command limit reached (%d): %d skill(s) not registered", - self.name, _DISCORD_CMD_LIMIT, skipped, + logger.debug( + "Discord auto-registered %d commands from COMMAND_REGISTRY", + len(already_registered), + ) + except Exception as e: + logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e) + + # Register skills under a single /skill command group with category + # subcommand groups. This uses 1 top-level slot instead of N, + # supporting up to 25 categories × 25 skills = 625 skills. + self._register_skill_group(tree) + + def _register_skill_group(self, tree) -> None: + """Register a single ``/skill`` command with autocomplete on the name. + + Discord enforces an ~8000-byte per-command payload limit. The older + nested layout (``/skill ``) registered one giant + command whose serialized payload grew linearly with the skill + catalog — with the default ~75 skills the payload was ~14 KB and + ``tree.sync()`` rejected the entire slash-command batch (issues + #11321, #10259, #11385, #10261, #10214). + + Autocomplete options are fetched dynamically by Discord when the + user types — they do NOT count against the per-command registration + budget. So we register ONE flat ``/skill`` command with + ``name: str`` (autocompleted) and ``args: str = ""``. This scales + to thousands of skills with no size math, no splitting, and no + hidden skills. The slash picker also becomes more discoverable — + Discord live-filters by the user's typed prefix against both the + skill name and its description. + """ + try: + from hermes_cli.commands import discord_skill_commands_by_category + + existing_names = set() + try: + existing_names = {cmd.name for cmd in tree.get_commands()} + except Exception: + pass + + # Reuse the existing collector for consistent filtering + # (per-platform disabled, hub-excluded, name clamping), then + # flatten — the category grouping was only useful for the + # nested layout. + categories, uncategorized, hidden = discord_skill_commands_by_category( + reserved_names=existing_names, + ) + entries: list[tuple[str, str, str]] = list(uncategorized) + for cat_skills in categories.values(): + entries.extend(cat_skills) + + if not entries: + return + + # Stable alphabetical order so the autocomplete suggestion + # list is predictable across restarts. + entries.sort(key=lambda t: t[0]) + + # name -> (description, cmd_key) — used by both the autocomplete + # callback and the handler for O(1) dispatch. + skill_lookup: dict[str, tuple[str, str]] = { + n: (d, k) for n, d, k in entries + } + + async def _autocomplete_name( + interaction: "discord.Interaction", current: str, + ) -> list: + """Filter skills by the user's typed prefix. + + Matches both the skill name and its description so + "/skill pdf" surfaces skills whose description mentions + PDFs even if the name doesn't. Discord caps this list at + 25 entries per query. + """ + q = (current or "").strip().lower() + choices: list = [] + for name, desc, _key in entries: + if not q or q in name.lower() or (desc and q in desc.lower()): + if desc: + label = f"{name} — {desc}" + else: + label = name + # Discord's Choice.name is capped at 100 chars. + if len(label) > 100: + label = label[:97] + "..." + choices.append( + discord.app_commands.Choice(name=label, value=name) + ) + if len(choices) >= 25: + break + return choices + + @discord.app_commands.describe( + name="Which skill to run", + args="Optional arguments for the skill", + ) + @discord.app_commands.autocomplete(name=_autocomplete_name) + async def _skill_handler( + interaction: "discord.Interaction", name: str, args: str = "", + ): + entry = skill_lookup.get(name) + if not entry: + await interaction.response.send_message( + f"Unknown skill: `{name}`. Start typing for " + f"autocomplete suggestions.", + ephemeral=True, + ) + return + _desc, cmd_key = entry + await self._run_simple_slash( + interaction, f"{cmd_key} {args}".strip() + ) + + cmd = discord.app_commands.Command( + name="skill", + description="Run a Hermes skill", + callback=_skill_handler, + ) + tree.add_command(cmd) + + logger.info( + "[%s] Registered /skill command with %d skill(s) via autocomplete", + self.name, len(entries), + ) + if hidden: + logger.info( + "[%s] %d skill(s) filtered out of /skill (name clamp / reserved)", + self.name, hidden, ) except Exception as exc: - logger.warning("[%s] Failed to register skill slash commands: %s", self.name, exc) + logger.warning("[%s] Failed to register /skill command: %s", self.name, exc) def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent: """Build a MessageEvent from a Discord slash command interaction.""" @@ -1812,11 +2357,14 @@ class DiscordAdapter(BasePlatformAdapter): ) msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT + channel_id = str(interaction.channel_id) + parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "") return MessageEvent( text=text, message_type=msg_type, source=source, raw_message=interaction, + channel_prompt=self._resolve_channel_prompt(channel_id, parent_id or None), ) # ------------------------------------------------------------------ @@ -1887,14 +2435,17 @@ class DiscordAdapter(BasePlatformAdapter): chat_topic=chat_topic, ) - _parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "") + _parent_channel = self._thread_parent_channel(getattr(interaction, "channel", None)) + _parent_id = str(getattr(_parent_channel, "id", "") or "") _skills = self._resolve_channel_skills(thread_id, _parent_id or None) + _channel_prompt = self._resolve_channel_prompt(thread_id, _parent_id or None) event = MessageEvent( text=text, message_type=MessageType.TEXT, source=source, raw_message=interaction, auto_skill=_skills, + channel_prompt=_channel_prompt, ) await self.handle_message(event) @@ -1923,6 +2474,31 @@ class DiscordAdapter(BasePlatformAdapter): return list(dict.fromkeys(skills)) # dedup, preserve order return None + def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None: + """Resolve a Discord per-channel prompt, preferring the exact channel over its parent.""" + from gateway.platforms.base import resolve_channel_prompt + return resolve_channel_prompt(self.config.extra, channel_id, parent_id) + + def _discord_require_mention(self) -> bool: + """Return whether Discord channel messages require a bot mention.""" + configured = self.config.extra.get("require_mention") + if configured is not None: + if isinstance(configured, str): + return configured.lower() not in ("false", "0", "no", "off") + return bool(configured) + return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off") + + def _discord_free_response_channels(self) -> set: + """Return Discord channel IDs where no bot mention is required.""" + raw = self.config.extra.get("free_response_channels") + if raw is None: + raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "") + if isinstance(raw, list): + return {str(part).strip() for part in raw if str(part).strip()} + if isinstance(raw, str) and raw.strip(): + return {part.strip() for part in raw.split(",") if part.strip()} + return set() + def _thread_parent_channel(self, channel: Any) -> Any: """Return the parent text channel when invoked from a thread.""" return getattr(channel, "parent", None) or channel @@ -2025,8 +2601,15 @@ class DiscordAdapter(BasePlatformAdapter): Returns the created thread object, or ``None`` on failure. """ - # Build a short thread name from the message + # Build a short thread name from the message. Strip Discord mention + # syntax (users / roles / channels) so thread titles don't end up + # showing raw <@id>, <@&id>, or <#id> markers — the ID isn't + # meaningful to humans glancing at the thread list (#6336). content = (message.content or "").strip() + # <@123>, <@!123>, <@&123>, <#123> — collapse to empty; normalize spaces. + content = re.sub(r"<@[!&]?\d+>", "", content) + content = re.sub(r"<#\d+>", "", content) + content = re.sub(r"\s+", " ", content).strip() thread_name = content[:80] if content else "Hermes" if len(content) > 80: thread_name = thread_name[:77] + "..." @@ -2034,9 +2617,25 @@ class DiscordAdapter(BasePlatformAdapter): try: thread = await message.create_thread(name=thread_name, auto_archive_duration=1440) return thread - except Exception as e: - logger.warning("[%s] Auto-thread creation failed: %s", self.name, e) - return None + except Exception as direct_error: + display_name = getattr(getattr(message, "author", None), "display_name", None) or "unknown user" + reason = f"Auto-threaded from mention by {display_name}" + try: + seed_msg = await message.channel.send(f"\U0001f9f5 Thread created by Hermes: **{thread_name}**") + thread = await seed_msg.create_thread( + name=thread_name, + auto_archive_duration=1440, + reason=reason, + ) + return thread + except Exception as fallback_error: + logger.warning( + "[%s] Auto-thread creation failed. Direct error: %s. Fallback error: %s", + self.name, + direct_error, + fallback_error, + ) + return None async def send_exec_approval( self, chat_id: str, command: str, session_key: str, @@ -2223,6 +2822,124 @@ class DiscordAdapter(BasePlatformAdapter): return f"{parent_name} / {thread_name}" return thread_name + # ------------------------------------------------------------------ + # Attachment download helpers + # + # Discord attachments (images / audio / documents) are fetched via the + # authenticated bot session whenever the Attachment object exposes + # ``read()``. That sidesteps two classes of bug that hit the older + # plain-HTTP path: + # + # 1. ``cdn.discordapp.com`` URLs increasingly require bot auth on + # download — unauthenticated httpx sees 403 Forbidden. + # (issue #8242) + # 2. Some user environments (VPNs, corporate DNS, tunnels) resolve + # ``cdn.discordapp.com`` to private-looking IPs that our + # ``is_safe_url`` guard classifies as SSRF risks. Routing the + # fetch through discord.py's own HTTP client handles DNS + # internally so our guard isn't consulted for the attachment + # path. (issue #6587) + # + # If ``att.read()`` is unavailable (unexpected object shape / test + # stub) or the bot session fetch fails, we fall back to the existing + # SSRF-gated URL downloaders. The fallback keeps defense-in-depth + # against any future Discord payload-schema drift that could slip a + # non-CDN URL into the ``att.url`` field. (issue #11345) + # ------------------------------------------------------------------ + + async def _read_attachment_bytes(self, att) -> Optional[bytes]: + """Read an attachment via discord.py's authenticated bot session. + + Returns the raw bytes on success, or ``None`` if ``att`` doesn't + expose a callable ``read()`` or the read itself fails. Callers + should treat ``None`` as a signal to fall back to the URL-based + downloaders. + """ + reader = getattr(att, "read", None) + if reader is None or not callable(reader): + return None + try: + return await reader() + except Exception as e: + logger.warning( + "[Discord] Authenticated attachment read failed for %s: %s", + getattr(att, "filename", None) or getattr(att, "url", ""), + e, + ) + return None + + async def _cache_discord_image(self, att, ext: str) -> str: + """Cache a Discord image attachment to local disk. + + Primary path: ``att.read()`` + ``cache_image_from_bytes`` + (authenticated, no SSRF gate). + + Fallback: ``cache_image_from_url`` (plain httpx, SSRF-gated). + """ + raw_bytes = await self._read_attachment_bytes(att) + if raw_bytes is not None: + try: + return cache_image_from_bytes(raw_bytes, ext=ext) + except Exception as e: + logger.debug( + "[Discord] cache_image_from_bytes rejected att.read() data; falling back to URL: %s", + e, + ) + return await cache_image_from_url(att.url, ext=ext) + + async def _cache_discord_audio(self, att, ext: str) -> str: + """Cache a Discord audio attachment to local disk. + + Primary path: ``att.read()`` + ``cache_audio_from_bytes`` + (authenticated, no SSRF gate). + + Fallback: ``cache_audio_from_url`` (plain httpx, SSRF-gated). + """ + raw_bytes = await self._read_attachment_bytes(att) + if raw_bytes is not None: + try: + return cache_audio_from_bytes(raw_bytes, ext=ext) + except Exception as e: + logger.debug( + "[Discord] cache_audio_from_bytes failed; falling back to URL: %s", + e, + ) + return await cache_audio_from_url(att.url, ext=ext) + + async def _cache_discord_document(self, att, ext: str) -> bytes: + """Download a Discord document attachment and return the raw bytes. + + Primary path: ``att.read()`` (authenticated, no SSRF gate). + + Fallback: SSRF-gated ``aiohttp`` download. This closes the gap + where the old document path made raw ``aiohttp.ClientSession`` + requests with no safety check (#11345). The caller is responsible + for passing the returned bytes to ``cache_document_from_bytes`` + (and, where applicable, for injecting text content). + """ + raw_bytes = await self._read_attachment_bytes(att) + if raw_bytes is not None: + return raw_bytes + + # Fallback: SSRF-gated URL download. + if not is_safe_url(att.url): + raise ValueError( + f"Blocked unsafe attachment URL (SSRF protection): {att.url}" + ) + import aiohttp + from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp + _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY") + _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy) + async with aiohttp.ClientSession(**_sess_kw) as session: + async with session.get( + att.url, + timeout=aiohttp.ClientTimeout(total=30), + **_req_kw, + ) as resp: + if resp.status != 200: + raise Exception(f"HTTP {resp.status}") + return await resp.read() + async def _handle_message(self, message: DiscordMessage) -> None: """Handle incoming Discord messages.""" # In server channels (not DMs), require the bot to be @mentioned @@ -2265,12 +2982,11 @@ class DiscordAdapter(BasePlatformAdapter): logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids) return - free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "") - free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()} + free_channels = self._discord_free_response_channels() if parent_channel_id: channel_ids.add(parent_channel_id) - require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no") + require_mention = self._discord_require_mention() # Voice-linked text channels act as free-response while voice is active. # Only the exact bound channel gets the exemption, not sibling threads. voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()} @@ -2298,9 +3014,10 @@ class DiscordAdapter(BasePlatformAdapter): if not is_thread and not isinstance(message.channel, discord.DMChannel): no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "") no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()} - skip_thread = bool(channel_ids & no_thread_channels) + skip_thread = bool(channel_ids & no_thread_channels) or is_free_channel auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes") - if auto_thread and not skip_thread and not is_voice_linked_channel: + is_reply_message = getattr(message, "type", None) == discord.MessageType.reply + if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message: thread = await self._auto_create_thread(message) if thread: is_thread = True @@ -2361,6 +3078,7 @@ class DiscordAdapter(BasePlatformAdapter): user_name=message.author.display_name, thread_id=thread_id, chat_topic=chat_topic, + is_bot=getattr(message.author, "bot", False), ) # Build media URLs -- download image attachments to local cache so the @@ -2376,7 +3094,7 @@ class DiscordAdapter(BasePlatformAdapter): ext = "." + content_type.split("/")[-1].split(";")[0] if ext not in (".jpg", ".jpeg", ".png", ".gif", ".webp"): ext = ".jpg" - cached_path = await cache_image_from_url(att.url, ext=ext) + cached_path = await self._cache_discord_image(att, ext) media_urls.append(cached_path) media_types.append(content_type) print(f"[Discord] Cached user image: {cached_path}", flush=True) @@ -2390,7 +3108,7 @@ class DiscordAdapter(BasePlatformAdapter): ext = "." + content_type.split("/")[-1].split(";")[0] if ext not in (".ogg", ".mp3", ".wav", ".webm", ".m4a"): ext = ".ogg" - cached_path = await cache_audio_from_url(att.url, ext=ext) + cached_path = await self._cache_discord_audio(att, ext) media_urls.append(cached_path) media_types.append(content_type) print(f"[Discord] Cached user audio: {cached_path}", flush=True) @@ -2421,19 +3139,7 @@ class DiscordAdapter(BasePlatformAdapter): ) else: try: - import aiohttp - from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp - _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY") - _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy) - async with aiohttp.ClientSession(**_sess_kw) as session: - async with session.get( - att.url, - timeout=aiohttp.ClientTimeout(total=30), - **_req_kw, - ) as resp: - if resp.status != 200: - raise Exception(f"HTTP {resp.status}") - raw_bytes = await resp.read() + raw_bytes = await self._cache_discord_document(att, ext) cached_path = cache_document_from_bytes( raw_bytes, att.filename or f"document{ext}" ) @@ -2474,6 +3180,7 @@ class DiscordAdapter(BasePlatformAdapter): _parent_id = str(getattr(_chan, "parent_id", "") or "") _chan_id = str(getattr(_chan, "id", "")) _skills = self._resolve_channel_skills(_chan_id, _parent_id or None) + _channel_prompt = self._resolve_channel_prompt(_chan_id, _parent_id or None) reply_to_id = None reply_to_text = None @@ -2494,6 +3201,7 @@ class DiscordAdapter(BasePlatformAdapter): reply_to_text=reply_to_text, timestamp=message.created_at, auto_skill=_skills, + channel_prompt=_channel_prompt, ) # Track thread participation so the bot won't require @mention for @@ -2571,7 +3279,20 @@ class DiscordAdapter(BasePlatformAdapter): "[Discord] Flushing text batch %s (%d chars)", key, len(event.text or ""), ) - await self.handle_message(event) + # Shield the downstream dispatch so that a subsequent chunk + # arriving while handle_message is mid-flight cannot cancel + # the running agent turn. _enqueue_text_event always cancels + # the prior flush task when a new chunk lands; without this + # shield, CancelledError would propagate from our task down + # into handle_message → the agent's streaming request, + # aborting the response the user was waiting on. The new + # chunk is handled by the fresh flush task regardless. + await asyncio.shield(self.handle_message(event)) + except asyncio.CancelledError: + # Only reached if cancel landed before the pop — the shielded + # handle_message is unaffected either way. Let the task exit + # cleanly so the finally block cleans up. + pass finally: if self._pending_text_batch_tasks.get(key) is current_task: self._pending_text_batch_tasks.pop(key, None) diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index 01b1c3a14b..85cebe5381 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -8,7 +8,8 @@ Supports: - Gateway allowlist integration via FEISHU_ALLOWED_USERS - Persistent dedup state across restarts - Per-chat serial message processing (matches openclaw createChatQueue) -- Persistent ACK emoji reaction on inbound messages +- Processing status reactions: Typing while working, removed on success, + swapped for CrossMark on failure - Reaction events routed as synthetic text events (matches openclaw) - Interactive card button-click events routed as synthetic COMMAND events - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker) @@ -29,6 +30,7 @@ import re import threading import time import uuid +from collections import OrderedDict from dataclasses import dataclass, field from datetime import datetime from pathlib import Path @@ -98,6 +100,7 @@ from gateway.platforms.base import ( BasePlatformAdapter, MessageEvent, MessageType, + ProcessingOutcome, SendResult, SUPPORTED_DOCUMENT_TYPES, cache_document_from_bytes, @@ -119,6 +122,8 @@ _MARKDOWN_HINT_RE = re.compile( re.MULTILINE, ) _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") +_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$") +_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$") _MENTION_RE = re.compile(r"@_user_\d+") _MULTISPACE_RE = re.compile(r"[ \t]{2,}") _POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE) @@ -188,7 +193,17 @@ _APPROVAL_LABEL_MAP: Dict[str, str] = { } _FEISHU_BOT_MSG_TRACK_SIZE = 512 # LRU size for tracking sent message IDs _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003}) # reply target withdrawn/missing → create fallback -_FEISHU_ACK_EMOJI = "OK" + +# Feishu reactions render as prominent badges, unlike Discord/Telegram's +# small footer emoji — a success badge on every message would add noise, so +# we only mark start (Typing) and failure (CrossMark); the reply itself is +# the success signal. +_FEISHU_REACTION_IN_PROGRESS = "Typing" +_FEISHU_REACTION_FAILURE = "CrossMark" +# Bound on the (message_id → reaction_id) handle cache. Happy-path entries +# drain on completion; the cap is a safeguard against unbounded growth from +# delete-failures, not a capacity plan. +_FEISHU_PROCESSING_REACTION_CACHE_SIZE = 1024 # QR onboarding constants _ONBOARD_ACCOUNTS_URLS = { @@ -430,23 +445,66 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int: def _build_markdown_post_payload(content: str) -> str: + rows = _build_markdown_post_rows(content) return json.dumps( { "zh_cn": { - "content": [ - [ - { - "tag": "md", - "text": content, - } - ] - ], + "content": rows, } }, ensure_ascii=False, ) +def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]: + """Build Feishu post rows while isolating fenced code blocks. + + Feishu's `md` renderer can swallow trailing content when a fenced code block + appears inside one large markdown element. Split the reply at real fence + lines so prose before/after the code block remains visible while code stays + in a dedicated row. + """ + if not content: + return [[{"tag": "md", "text": ""}]] + if "```" not in content: + return [[{"tag": "md", "text": content}]] + + rows: List[List[Dict[str, str]]] = [] + current: List[str] = [] + in_code_block = False + + def _flush_current() -> None: + nonlocal current + if not current: + return + segment = "\n".join(current) + if segment.strip(): + rows.append([{"tag": "md", "text": segment}]) + current = [] + + for raw_line in content.splitlines(): + stripped_line = raw_line.strip() + is_fence = bool( + _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line) + if in_code_block + else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line) + ) + + if is_fence: + if not in_code_block: + _flush_current() + current.append(raw_line) + in_code_block = not in_code_block + if not in_code_block: + _flush_current() + continue + + current.append(raw_line) + + _flush_current() + return rows or [[{"tag": "md", "text": content}]] + + def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult: resolved = _resolve_post_payload(payload) if not resolved: @@ -1073,6 +1131,13 @@ class FeishuAdapter(BasePlatformAdapter): self._webhook_rate_counts: Dict[str, tuple[int, float]] = {} # rate_key → (count, window_start) self._webhook_anomaly_counts: Dict[str, tuple[int, str, float]] = {} # ip → (count, last_status, first_seen) self._card_action_tokens: Dict[str, float] = {} # token → first_seen_time + # Inbound events that arrived before the adapter loop was ready + # (e.g. during startup/restart or network-flap reconnect). A single + # drainer thread replays them as soon as the loop becomes available. + self._pending_inbound_events: List[Any] = [] + self._pending_inbound_lock = threading.Lock() + self._pending_drain_scheduled = False + self._pending_inbound_max_depth = 1000 # cap queue; drop oldest beyond self._chat_locks: Dict[str, asyncio.Lock] = {} # chat_id → lock (per-chat serial processing) self._sent_message_ids_to_chat: Dict[str, str] = {} # message_id → chat_id (for reaction routing) self._sent_message_id_order: List[str] = [] # LRU order for _sent_message_ids_to_chat @@ -1089,6 +1154,9 @@ class FeishuAdapter(BasePlatformAdapter): # Exec approval button state (approval_id → {session_key, message_id, chat_id}) self._approval_state: Dict[int, Dict[str, str]] = {} self._approval_counter = itertools.count(1) + # Feishu reaction deletion requires the opaque reaction_id returned + # by create, so we cache it per message_id. + self._pending_processing_reactions: "OrderedDict[str, str]" = OrderedDict() self._load_seen_message_ids() @staticmethod @@ -1219,6 +1287,12 @@ class FeishuAdapter(BasePlatformAdapter): .register_p2_card_action_trigger(self._on_card_action_trigger) .register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat) .register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat) + .register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self._on_p2p_chat_entered) + .register_p2_im_message_recalled_v1(self._on_message_recalled) + .register_p2_customized_event( + "drive.notice.comment_add_v1", + self._on_drive_comment_event, + ) .build() ) @@ -1410,6 +1484,8 @@ class FeishuAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Feishu text/post message.""" if not self._client: @@ -1757,10 +1833,22 @@ class FeishuAdapter(BasePlatformAdapter): # ========================================================================= def _on_message_event(self, data: Any) -> None: - """Normalize Feishu inbound events into MessageEvent.""" + """Normalize Feishu inbound events into MessageEvent. + + Called by the lark_oapi SDK's event dispatcher on a background thread. + If the adapter loop is not currently accepting callbacks (brief window + during startup/restart or network-flap reconnect), the event is queued + for replay instead of dropped. + """ loop = self._loop - if loop is None or bool(getattr(loop, "is_closed", lambda: False)()): - logger.warning("[Feishu] Dropping inbound message before adapter loop is ready") + if not self._loop_accepts_callbacks(loop): + start_drainer = self._enqueue_pending_inbound_event(data) + if start_drainer: + threading.Thread( + target=self._drain_pending_inbound_events, + name="feishu-pending-inbound-drainer", + daemon=True, + ).start() return future = asyncio.run_coroutine_threadsafe( self._handle_message_event_data(data), @@ -1768,6 +1856,124 @@ class FeishuAdapter(BasePlatformAdapter): ) future.add_done_callback(self._log_background_failure) + def _enqueue_pending_inbound_event(self, data: Any) -> bool: + """Append an event to the pending-inbound queue. + + Returns True if the caller should spawn a drainer thread (no drainer + currently scheduled), False if a drainer is already running and will + pick up the new event on its next pass. + """ + with self._pending_inbound_lock: + if len(self._pending_inbound_events) >= self._pending_inbound_max_depth: + # Queue full — drop the oldest to make room. This happens only + # if the loop stays unavailable for an extended period AND the + # WS keeps firing callbacks. Still better than silent drops. + dropped = self._pending_inbound_events.pop(0) + try: + event = getattr(dropped, "event", None) + message = getattr(event, "message", None) + message_id = str(getattr(message, "message_id", "") or "unknown") + except Exception: + message_id = "unknown" + logger.error( + "[Feishu] Pending-inbound queue full (%d); dropped oldest event %s", + self._pending_inbound_max_depth, + message_id, + ) + self._pending_inbound_events.append(data) + depth = len(self._pending_inbound_events) + should_start = not self._pending_drain_scheduled + if should_start: + self._pending_drain_scheduled = True + logger.warning( + "[Feishu] Queued inbound event for replay (loop not ready, queue depth=%d)", + depth, + ) + return should_start + + def _drain_pending_inbound_events(self) -> None: + """Replay queued inbound events once the adapter loop is ready. + + Runs in a dedicated daemon thread. Polls ``_running`` and + ``_loop_accepts_callbacks`` until events can be dispatched or the + adapter shuts down. A single drainer handles the entire queue; + concurrent ``_on_message_event`` calls just append. + """ + poll_interval = 0.25 + max_wait_seconds = 120.0 # safety cap: drop queue after 2 minutes + waited = 0.0 + try: + while True: + if not getattr(self, "_running", True): + # Adapter shutting down — drop queued events rather than + # holding them against a closed loop. + with self._pending_inbound_lock: + dropped = len(self._pending_inbound_events) + self._pending_inbound_events.clear() + if dropped: + logger.warning( + "[Feishu] Dropped %d queued inbound event(s) during shutdown", + dropped, + ) + return + loop = self._loop + if self._loop_accepts_callbacks(loop): + with self._pending_inbound_lock: + batch = self._pending_inbound_events[:] + self._pending_inbound_events.clear() + if not batch: + # Queue emptied between check and grab; done. + with self._pending_inbound_lock: + if not self._pending_inbound_events: + return + continue + dispatched = 0 + requeue: List[Any] = [] + for event in batch: + try: + fut = asyncio.run_coroutine_threadsafe( + self._handle_message_event_data(event), + loop, + ) + fut.add_done_callback(self._log_background_failure) + dispatched += 1 + except RuntimeError: + # Loop closed between check and submit — requeue + # and poll again. + requeue.append(event) + if requeue: + with self._pending_inbound_lock: + self._pending_inbound_events[:0] = requeue + if dispatched: + logger.info( + "[Feishu] Replayed %d queued inbound event(s)", + dispatched, + ) + if not requeue: + # Successfully drained; check if more arrived while + # we were dispatching and exit if not. + with self._pending_inbound_lock: + if not self._pending_inbound_events: + return + # More events queued or requeue pending — loop again. + continue + if waited >= max_wait_seconds: + with self._pending_inbound_lock: + dropped = len(self._pending_inbound_events) + self._pending_inbound_events.clear() + logger.error( + "[Feishu] Adapter loop unavailable for %.0fs; " + "dropped %d queued inbound event(s)", + max_wait_seconds, + dropped, + ) + return + time.sleep(poll_interval) + waited += poll_interval + finally: + with self._pending_inbound_lock: + self._pending_drain_scheduled = False + async def _handle_message_event_data(self, data: Any) -> None: """Shared inbound message handling for websocket and webhook transports.""" event = getattr(data, "event", None) @@ -1782,8 +1988,8 @@ class FeishuAdapter(BasePlatformAdapter): if not message_id or self._is_duplicate(message_id): logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id) return - if getattr(sender, "sender_type", "") == "bot": - logger.debug("[Feishu] Dropping bot-originated event: %s", message_id) + if self._is_self_sent_bot_message(event): + logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id) return chat_type = getattr(message, "chat_type", "p2p") @@ -1820,6 +2026,31 @@ class FeishuAdapter(BasePlatformAdapter): logger.info("[Feishu] Bot removed from chat: %s", chat_id) self._chat_info_cache.pop(chat_id, None) + def _on_p2p_chat_entered(self, data: Any) -> None: + logger.debug("[Feishu] User entered P2P chat with bot") + + def _on_message_recalled(self, data: Any) -> None: + logger.debug("[Feishu] Message recalled by user") + + def _on_drive_comment_event(self, data: Any) -> None: + """Handle drive document comment notification (drive.notice.comment_add_v1). + + Delegates to :mod:`gateway.platforms.feishu_comment` for parsing, + logging, and reaction. Scheduling follows the same + ``run_coroutine_threadsafe`` pattern used by ``_on_message_event``. + """ + from gateway.platforms.feishu_comment import handle_drive_comment_event + + loop = self._loop + if not self._loop_accepts_callbacks(loop): + logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready") + return + future = asyncio.run_coroutine_threadsafe( + handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id), + loop, + ) + future.add_done_callback(self._log_background_failure) + def _on_reaction_event(self, event_type: str, data: Any) -> None: """Route user reactions on bot messages as synthetic text events.""" event = getattr(data, "event", None) @@ -1835,12 +2066,12 @@ class FeishuAdapter(BasePlatformAdapter): operator_type, emoji_type, ) - # Only process reactions from real users. Ignore app/bot-generated reactions - # and Hermes' own ACK emoji to avoid feedback loops. + # Drop bot/app-origin reactions to break the feedback loop from our + # own lifecycle reactions. A human reacting with the same emoji (e.g. + # clicking Typing on a bot message) is still routed through. loop = self._loop if ( operator_type in {"bot", "app"} - or emoji_type == _FEISHU_ACK_EMOJI or not message_id or loop is None or bool(getattr(loop, "is_closed", lambda: False)()) @@ -2064,33 +2295,35 @@ class FeishuAdapter(BasePlatformAdapter): async def _handle_message_with_guards(self, event: MessageEvent) -> None: """Dispatch a single event through the agent pipeline with per-chat serialization - and a persistent ACK emoji reaction before processing starts. + before handing the event off to the agent. - - Per-chat lock: ensures messages in the same chat are processed one at a time - (matches openclaw's createChatQueue serial queue behaviour). - - ACK indicator: adds a CHECK reaction to the triggering message before handing - off to the agent and leaves it in place as a receipt marker. + Per-chat lock ensures messages in the same chat are processed one at a + time (matches openclaw's createChatQueue serial queue behaviour). """ chat_id = getattr(event.source, "chat_id", "") or "" if event.source else "" chat_lock = self._get_chat_lock(chat_id) async with chat_lock: - message_id = event.message_id - if message_id: - await self._add_ack_reaction(message_id) await self.handle_message(event) - async def _add_ack_reaction(self, message_id: str) -> Optional[str]: - """Add a persistent ACK emoji reaction to signal the message was received.""" - if not self._client or not message_id: + # ========================================================================= + # Processing status reactions + # ========================================================================= + + def _reactions_enabled(self) -> bool: + return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in ("false", "0", "no") + + async def _add_reaction(self, message_id: str, emoji_type: str) -> Optional[str]: + """Return the reaction_id on success, else None. The id is needed later for deletion.""" + if not self._client or not message_id or not emoji_type: return None try: - from lark_oapi.api.im.v1 import ( # lazy import — keeps optional dep optional + from lark_oapi.api.im.v1 import ( CreateMessageReactionRequest, CreateMessageReactionRequestBody, ) body = ( CreateMessageReactionRequestBody.builder() - .reaction_type({"emoji_type": _FEISHU_ACK_EMOJI}) + .reaction_type({"emoji_type": emoji_type}) .build() ) request = ( @@ -2103,16 +2336,93 @@ class FeishuAdapter(BasePlatformAdapter): if response and getattr(response, "success", lambda: False)(): data = getattr(response, "data", None) return getattr(data, "reaction_id", None) - logger.warning( - "[Feishu] Failed to add ack reaction to %s: code=%s msg=%s", + logger.debug( + "[Feishu] Add reaction %s on %s rejected: code=%s msg=%s", + emoji_type, message_id, getattr(response, "code", None), getattr(response, "msg", None), ) except Exception: - logger.warning("[Feishu] Failed to add ack reaction to %s", message_id, exc_info=True) + logger.warning( + "[Feishu] Add reaction %s on %s raised", + emoji_type, + message_id, + exc_info=True, + ) return None + async def _remove_reaction(self, message_id: str, reaction_id: str) -> bool: + if not self._client or not message_id or not reaction_id: + return False + try: + from lark_oapi.api.im.v1 import DeleteMessageReactionRequest + request = ( + DeleteMessageReactionRequest.builder() + .message_id(message_id) + .reaction_id(reaction_id) + .build() + ) + response = await asyncio.to_thread(self._client.im.v1.message_reaction.delete, request) + if response and getattr(response, "success", lambda: False)(): + return True + logger.debug( + "[Feishu] Remove reaction %s on %s rejected: code=%s msg=%s", + reaction_id, + message_id, + getattr(response, "code", None), + getattr(response, "msg", None), + ) + except Exception: + logger.warning( + "[Feishu] Remove reaction %s on %s raised", + reaction_id, + message_id, + exc_info=True, + ) + return False + + def _remember_processing_reaction(self, message_id: str, reaction_id: str) -> None: + cache = self._pending_processing_reactions + cache[message_id] = reaction_id + cache.move_to_end(message_id) + while len(cache) > _FEISHU_PROCESSING_REACTION_CACHE_SIZE: + cache.popitem(last=False) + + def _pop_processing_reaction(self, message_id: str) -> Optional[str]: + return self._pending_processing_reactions.pop(message_id, None) + + async def on_processing_start(self, event: MessageEvent) -> None: + if not self._reactions_enabled(): + return + message_id = event.message_id + if not message_id or message_id in self._pending_processing_reactions: + return + reaction_id = await self._add_reaction(message_id, _FEISHU_REACTION_IN_PROGRESS) + if reaction_id: + self._remember_processing_reaction(message_id, reaction_id) + + async def on_processing_complete( + self, event: MessageEvent, outcome: ProcessingOutcome + ) -> None: + if not self._reactions_enabled(): + return + message_id = event.message_id + if not message_id: + return + + start_reaction_id = self._pending_processing_reactions.get(message_id) + if start_reaction_id: + if not await self._remove_reaction(message_id, start_reaction_id): + # Don't stack a second badge on top of a Typing we couldn't + # remove — UI would read as both "working" and "done/failed" + # simultaneously. Keep the handle so LRU eventually evicts it. + return + self._pop_processing_reaction(message_id) + + if outcome is ProcessingOutcome.FAILURE: + await self._add_reaction(message_id, _FEISHU_REACTION_FAILURE) + # ========================================================================= # Webhook server and security # ========================================================================= @@ -2445,6 +2755,8 @@ class FeishuAdapter(BasePlatformAdapter): self._on_reaction_event(event_type, data) elif event_type == "card.action.trigger": self._on_card_action_trigger(data) + elif event_type == "drive.notice.comment_add_v1": + self._on_drive_comment_event(data) else: logger.debug("[Feishu] Ignoring webhook event type: %s", event_type or "unknown") return web.json_response({"code": 0, "msg": "ok"}) @@ -3079,6 +3391,23 @@ class FeishuAdapter(BasePlatformAdapter): return self._post_mentions_bot(normalized.mentioned_ids) return False + def _is_self_sent_bot_message(self, event: Any) -> bool: + """Return True only for Feishu events emitted by this Hermes bot.""" + sender = getattr(event, "sender", None) + sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower() + if sender_type not in {"bot", "app"}: + return False + + sender_id = getattr(sender, "sender_id", None) + sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip() + sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip() + + if self._bot_open_id and sender_open_id == self._bot_open_id: + return True + if self._bot_user_id and sender_user_id == self._bot_user_id: + return True + return False + def _message_mentions_bot(self, mentions: List[Any]) -> bool: """Check whether any mention targets the configured or inferred bot identity.""" for mention in mentions: @@ -3106,10 +3435,55 @@ class FeishuAdapter(BasePlatformAdapter): return False async def _hydrate_bot_identity(self) -> None: - """Best-effort discovery of bot identity for precise group mention gating.""" + """Best-effort discovery of bot identity for precise group mention gating + and self-sent bot event filtering. + + Populates ``_bot_open_id`` and ``_bot_name`` from /open-apis/bot/v3/info + (no extra scopes required beyond the tenant access token). Falls back to + the application info endpoint for ``_bot_name`` only when the first probe + doesn't return it. Each field is hydrated independently — a value already + supplied via env vars (FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID / + FEISHU_BOT_NAME) is preserved and skips its probe. + """ if not self._client: return - if any((self._bot_open_id, self._bot_user_id, self._bot_name)): + if self._bot_open_id and self._bot_name: + # Everything the self-send filter and precise mention gate need is + # already in place; nothing to probe. + return + + # Primary probe: /open-apis/bot/v3/info — returns bot_name + open_id, no + # extra scopes required. This is the same endpoint the onboarding wizard + # uses via probe_bot(). + if not self._bot_open_id or not self._bot_name: + try: + resp = await asyncio.to_thread( + self._client.request, + method="GET", + url="/open-apis/bot/v3/info", + body=None, + raw_response=True, + ) + content = getattr(resp, "content", None) + if content: + payload = json.loads(content) + parsed = _parse_bot_response(payload) or {} + open_id = (parsed.get("bot_open_id") or "").strip() + bot_name = (parsed.get("bot_name") or "").strip() + if open_id and not self._bot_open_id: + self._bot_open_id = open_id + if bot_name and not self._bot_name: + self._bot_name = bot_name + except Exception: + logger.debug( + "[Feishu] /bot/v3/info probe failed during hydration", + exc_info=True, + ) + + # Fallback probe for _bot_name only: application info endpoint. Needs + # admin:app.info:readonly or application:application:self_manage scope, + # so it's best-effort. + if self._bot_name: return try: request = self._build_get_application_request(app_id=self._app_id, lang="en_us") @@ -3118,17 +3492,17 @@ class FeishuAdapter(BasePlatformAdapter): code = getattr(response, "code", None) if code == 99991672: logger.warning( - "[Feishu] Unable to hydrate bot identity from application info. " + "[Feishu] Unable to hydrate bot name from application info. " "Grant admin:app.info:readonly or application:application:self_manage " "so group @mention gating can resolve the bot name precisely." ) return app = getattr(getattr(response, "data", None), "app", None) app_name = (getattr(app, "app_name", None) or "").strip() - if app_name: + if app_name and not self._bot_name: self._bot_name = app_name except Exception: - logger.debug("[Feishu] Failed to hydrate bot identity", exc_info=True) + logger.debug("[Feishu] Failed to hydrate bot name from application info", exc_info=True) # ========================================================================= # Deduplication — seen message ID cache (persistent) diff --git a/gateway/platforms/feishu_comment.py b/gateway/platforms/feishu_comment.py new file mode 100644 index 0000000000..46807630ce --- /dev/null +++ b/gateway/platforms/feishu_comment.py @@ -0,0 +1,1383 @@ +""" +Feishu/Lark drive document comment handling. + +Processes ``drive.notice.comment_add_v1`` events and interacts with the +Drive v2 comment reaction API. Kept in a separate module so that the +main ``feishu.py`` adapter does not grow further and comment-related +logic can evolve independently. + +Flow: + 1. Parse event -> extract file_token, comment_id, reply_id, etc. + 2. Add OK reaction + 3. Parallel fetch: doc meta + comment details (batch_query) + 4. Branch on is_whole: + Whole -> list whole comments timeline + Local -> list comment thread replies + 5. Build prompt (local or whole) + 6. Create AIAgent with feishu_doc + feishu_drive tools -> agent generates reply + 7. Route reply: + Whole -> add_whole_comment + Local -> reply_to_comment (fallback to add_whole_comment on 1069302) +""" + +from __future__ import annotations + +import asyncio +import json +import logging +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Lark SDK helpers (lazy-imported) +# --------------------------------------------------------------------------- + + +def _build_request(method: str, uri: str, paths=None, queries=None, body=None): + """Build a lark_oapi BaseRequest.""" + from lark_oapi import AccessTokenType + from lark_oapi.core.enum import HttpMethod + from lark_oapi.core.model.base_request import BaseRequest + + http_method = HttpMethod.GET if method == "GET" else HttpMethod.POST + + builder = ( + BaseRequest.builder() + .http_method(http_method) + .uri(uri) + .token_types({AccessTokenType.TENANT}) + ) + if paths: + builder = builder.paths(paths) + if queries: + builder = builder.queries(queries) + if body is not None: + builder = builder.body(body) + return builder.build() + + +async def _exec_request(client, method, uri, paths=None, queries=None, body=None): + """Execute a lark API request and return (code, msg, data_dict).""" + logger.info("[Feishu-Comment] API >>> %s %s paths=%s queries=%s body=%s", + method, uri, paths, queries, + json.dumps(body, ensure_ascii=False)[:500] if body else None) + request = _build_request(method, uri, paths, queries, body) + response = await asyncio.to_thread(client.request, request) + + code = getattr(response, "code", None) + msg = getattr(response, "msg", "") + + data: dict = {} + raw = getattr(response, "raw", None) + if raw and hasattr(raw, "content"): + try: + body_json = json.loads(raw.content) + data = body_json.get("data", {}) + except (json.JSONDecodeError, AttributeError): + pass + if not data: + resp_data = getattr(response, "data", None) + if isinstance(resp_data, dict): + data = resp_data + elif resp_data and hasattr(resp_data, "__dict__"): + data = vars(resp_data) + + logger.info("[Feishu-Comment] API <<< %s %s code=%s msg=%s data_keys=%s", + method, uri, code, msg, list(data.keys()) if data else "empty") + if code != 0: + # Log raw response for debugging failed API calls + raw = getattr(response, "raw", None) + raw_content = "" + if raw and hasattr(raw, "content"): + raw_content = raw.content[:500] if isinstance(raw.content, (str, bytes)) else str(raw.content)[:500] + logger.warning("[Feishu-Comment] API FAIL raw response: %s", raw_content) + return code, msg, data + + +# --------------------------------------------------------------------------- +# Event parsing +# --------------------------------------------------------------------------- + + +def parse_drive_comment_event(data: Any) -> Optional[Dict[str, Any]]: + """Extract structured fields from a ``drive.notice.comment_add_v1`` payload. + + *data* may be a ``CustomizedEvent`` (WebSocket) whose ``.event`` is a dict, + or a ``SimpleNamespace`` (Webhook) built from the full JSON body. + + Returns a flat dict with the relevant fields, or ``None`` when the + payload is malformed. + """ + logger.debug("[Feishu-Comment] parse_drive_comment_event: data type=%s", type(data).__name__) + event = getattr(data, "event", None) + if event is None: + logger.debug("[Feishu-Comment] parse_drive_comment_event: no .event attribute, returning None") + return None + + evt: dict = event if isinstance(event, dict) else ( + vars(event) if hasattr(event, "__dict__") else {} + ) + logger.debug("[Feishu-Comment] parse_drive_comment_event: evt keys=%s", list(evt.keys())) + + notice_meta = evt.get("notice_meta") or {} + if not isinstance(notice_meta, dict): + notice_meta = vars(notice_meta) if hasattr(notice_meta, "__dict__") else {} + + from_user = notice_meta.get("from_user_id") or {} + if not isinstance(from_user, dict): + from_user = vars(from_user) if hasattr(from_user, "__dict__") else {} + + to_user = notice_meta.get("to_user_id") or {} + if not isinstance(to_user, dict): + to_user = vars(to_user) if hasattr(to_user, "__dict__") else {} + + return { + "event_id": str(evt.get("event_id") or ""), + "comment_id": str(evt.get("comment_id") or ""), + "reply_id": str(evt.get("reply_id") or ""), + "is_mentioned": bool(evt.get("is_mentioned")), + "timestamp": str(evt.get("timestamp") or ""), + "file_token": str(notice_meta.get("file_token") or ""), + "file_type": str(notice_meta.get("file_type") or ""), + "notice_type": str(notice_meta.get("notice_type") or ""), + "from_open_id": str(from_user.get("open_id") or ""), + "to_open_id": str(to_user.get("open_id") or ""), + } + + +# --------------------------------------------------------------------------- +# Comment reaction API +# --------------------------------------------------------------------------- + +_REACTION_URI = "/open-apis/drive/v2/files/:file_token/comments/reaction" + + +async def add_comment_reaction( + client: Any, + *, + file_token: str, + file_type: str, + reply_id: str, + reaction_type: str = "OK", +) -> bool: + """Add an emoji reaction to a document comment reply. + + Uses the Drive v2 ``update_reaction`` endpoint:: + + POST /open-apis/drive/v2/files/{file_token}/comments/reaction?file_type=... + + Returns ``True`` on success, ``False`` on failure (errors are logged). + """ + try: + from lark_oapi import AccessTokenType # noqa: F401 + except ImportError: + logger.error("[Feishu-Comment] lark_oapi not available") + return False + + body = { + "action": "add", + "reply_id": reply_id, + "reaction_type": reaction_type, + } + + code, msg, _ = await _exec_request( + client, "POST", _REACTION_URI, + paths={"file_token": file_token}, + queries=[("file_type", file_type)], + body=body, + ) + + succeeded = code == 0 + if succeeded: + logger.info( + "[Feishu-Comment] Reaction '%s' added: file=%s:%s reply=%s", + reaction_type, file_type, file_token, reply_id, + ) + else: + logger.warning( + "[Feishu-Comment] Reaction API failed: code=%s msg=%s " + "file=%s:%s reply=%s", + code, msg, file_type, file_token, reply_id, + ) + return succeeded + + +async def delete_comment_reaction( + client: Any, + *, + file_token: str, + file_type: str, + reply_id: str, + reaction_type: str = "OK", +) -> bool: + """Remove an emoji reaction from a document comment reply. + + Best-effort — errors are logged but not raised. + """ + body = { + "action": "delete", + "reply_id": reply_id, + "reaction_type": reaction_type, + } + + code, msg, _ = await _exec_request( + client, "POST", _REACTION_URI, + paths={"file_token": file_token}, + queries=[("file_type", file_type)], + body=body, + ) + + succeeded = code == 0 + if succeeded: + logger.info( + "[Feishu-Comment] Reaction '%s' deleted: file=%s:%s reply=%s", + reaction_type, file_type, file_token, reply_id, + ) + else: + logger.warning( + "[Feishu-Comment] Reaction API failed: code=%s msg=%s " + "file=%s:%s reply=%s", + code, msg, file_type, file_token, reply_id, + ) + return succeeded + + +# --------------------------------------------------------------------------- +# API call layer +# --------------------------------------------------------------------------- + +_BATCH_QUERY_META_URI = "/open-apis/drive/v1/metas/batch_query" +_BATCH_QUERY_COMMENT_URI = "/open-apis/drive/v1/files/:file_token/comments/batch_query" +_LIST_COMMENTS_URI = "/open-apis/drive/v1/files/:file_token/comments" +_LIST_REPLIES_URI = "/open-apis/drive/v1/files/:file_token/comments/:comment_id/replies" +_REPLY_COMMENT_URI = "/open-apis/drive/v1/files/:file_token/comments/:comment_id/replies" +_ADD_COMMENT_URI = "/open-apis/drive/v1/files/:file_token/new_comments" + + +async def query_document_meta( + client: Any, file_token: str, file_type: str, +) -> Dict[str, Any]: + """Fetch document title and URL via batch_query meta API. + + Returns ``{"title": "...", "url": "...", "doc_type": "..."}`` or empty dict. + """ + body = { + "request_docs": [{"doc_token": file_token, "doc_type": file_type}], + "with_url": True, + } + logger.debug("[Feishu-Comment] query_document_meta: file_token=%s file_type=%s", file_token, file_type) + code, msg, data = await _exec_request( + client, "POST", _BATCH_QUERY_META_URI, body=body, + ) + if code != 0: + logger.warning("[Feishu-Comment] Meta batch_query failed: code=%s msg=%s", code, msg) + return {} + + metas = data.get("metas", []) + logger.debug("[Feishu-Comment] query_document_meta: raw metas type=%s value=%s", + type(metas).__name__, str(metas)[:300]) + if not metas: + # Try alternate response shape: metas may be a dict keyed by token + if isinstance(data.get("metas"), dict): + meta = data["metas"].get(file_token, {}) + else: + logger.debug("[Feishu-Comment] query_document_meta: no metas found") + return {} + else: + meta = metas[0] if isinstance(metas, list) else {} + + result = { + "title": meta.get("title", ""), + "url": meta.get("url", ""), + "doc_type": meta.get("doc_type", file_type), + } + logger.info("[Feishu-Comment] query_document_meta: title=%s url=%s", + result["title"], result["url"][:80] if result["url"] else "") + return result + + +_COMMENT_RETRY_LIMIT = 6 +_COMMENT_RETRY_DELAY_S = 1.0 + + +async def batch_query_comment( + client: Any, file_token: str, file_type: str, comment_id: str, +) -> Dict[str, Any]: + """Fetch comment details via batch_query comment API. + + Retries up to 6 times on failure (handles eventual consistency). + + Returns the comment dict with fields like ``is_whole``, ``quote``, + ``reply_list``, etc. Empty dict on failure. + """ + logger.debug("[Feishu-Comment] batch_query_comment: file_token=%s comment_id=%s", file_token, comment_id) + + for attempt in range(_COMMENT_RETRY_LIMIT): + code, msg, data = await _exec_request( + client, "POST", _BATCH_QUERY_COMMENT_URI, + paths={"file_token": file_token}, + queries=[ + ("file_type", file_type), + ("user_id_type", "open_id"), + ], + body={"comment_ids": [comment_id]}, + ) + if code == 0: + break + if attempt < _COMMENT_RETRY_LIMIT - 1: + logger.info( + "[Feishu-Comment] batch_query_comment retry %d/%d: code=%s msg=%s", + attempt + 1, _COMMENT_RETRY_LIMIT, code, msg, + ) + await asyncio.sleep(_COMMENT_RETRY_DELAY_S) + else: + logger.warning( + "[Feishu-Comment] batch_query_comment failed after %d attempts: code=%s msg=%s", + _COMMENT_RETRY_LIMIT, code, msg, + ) + return {} + + # Response: {"items": [{"comment_id": "...", ...}]} + items = data.get("items", []) + logger.debug("[Feishu-Comment] batch_query_comment: got %d items", len(items) if isinstance(items, list) else 0) + if items and isinstance(items, list): + item = items[0] + logger.info("[Feishu-Comment] batch_query_comment: is_whole=%s quote=%s reply_count=%s", + item.get("is_whole"), + (item.get("quote", "") or "")[:60], + len(item.get("reply_list", {}).get("replies", [])) if isinstance(item.get("reply_list"), dict) else "?") + return item + logger.warning("[Feishu-Comment] batch_query_comment: empty items, raw data keys=%s", list(data.keys())) + return {} + + +async def list_whole_comments( + client: Any, file_token: str, file_type: str, +) -> List[Dict[str, Any]]: + """List all whole-document comments (paginated, up to 500).""" + logger.debug("[Feishu-Comment] list_whole_comments: file_token=%s", file_token) + all_comments: List[Dict[str, Any]] = [] + page_token = "" + + for _ in range(5): # max 5 pages + queries = [ + ("file_type", file_type), + ("is_whole", "true"), + ("page_size", "100"), + ("user_id_type", "open_id"), + ] + if page_token: + queries.append(("page_token", page_token)) + + code, msg, data = await _exec_request( + client, "GET", _LIST_COMMENTS_URI, + paths={"file_token": file_token}, + queries=queries, + ) + if code != 0: + logger.warning("[Feishu-Comment] List whole comments failed: code=%s msg=%s", code, msg) + break + + items = data.get("items", []) + if isinstance(items, list): + all_comments.extend(items) + logger.debug("[Feishu-Comment] list_whole_comments: page got %d items, total=%d", + len(items), len(all_comments)) + + if not data.get("has_more"): + break + page_token = data.get("page_token", "") + if not page_token: + break + + logger.info("[Feishu-Comment] list_whole_comments: total %d whole comments fetched", len(all_comments)) + return all_comments + + +async def list_comment_replies( + client: Any, file_token: str, file_type: str, comment_id: str, + *, expect_reply_id: str = "", +) -> List[Dict[str, Any]]: + """List all replies in a comment thread (paginated, up to 500). + + If *expect_reply_id* is set and not found in the first fetch, + retries up to 6 times (handles eventual consistency). + """ + logger.debug("[Feishu-Comment] list_comment_replies: file_token=%s comment_id=%s", file_token, comment_id) + + for attempt in range(_COMMENT_RETRY_LIMIT): + all_replies: List[Dict[str, Any]] = [] + page_token = "" + fetch_ok = True + + for _ in range(5): # max 5 pages + queries = [ + ("file_type", file_type), + ("page_size", "100"), + ("user_id_type", "open_id"), + ] + if page_token: + queries.append(("page_token", page_token)) + + code, msg, data = await _exec_request( + client, "GET", _LIST_REPLIES_URI, + paths={"file_token": file_token, "comment_id": comment_id}, + queries=queries, + ) + if code != 0: + logger.warning("[Feishu-Comment] List replies failed: code=%s msg=%s", code, msg) + fetch_ok = False + break + + items = data.get("items", []) + if isinstance(items, list): + all_replies.extend(items) + + if not data.get("has_more"): + break + page_token = data.get("page_token", "") + if not page_token: + break + + # If we don't need a specific reply, or we found it, return + if not expect_reply_id or not fetch_ok: + break + found = any(r.get("reply_id") == expect_reply_id for r in all_replies) + if found: + break + if attempt < _COMMENT_RETRY_LIMIT - 1: + logger.info( + "[Feishu-Comment] list_comment_replies: reply_id=%s not found, retry %d/%d", + expect_reply_id, attempt + 1, _COMMENT_RETRY_LIMIT, + ) + await asyncio.sleep(_COMMENT_RETRY_DELAY_S) + else: + logger.warning( + "[Feishu-Comment] list_comment_replies: reply_id=%s not found after %d attempts", + expect_reply_id, _COMMENT_RETRY_LIMIT, + ) + + logger.info("[Feishu-Comment] list_comment_replies: total %d replies fetched", len(all_replies)) + return all_replies + + +def _sanitize_comment_text(text: str) -> str: + """Escape characters not allowed in Feishu comment text_run content.""" + return text.replace("&", "&").replace("<", "<").replace(">", ">") + + +async def reply_to_comment( + client: Any, file_token: str, file_type: str, comment_id: str, text: str, +) -> Tuple[bool, int]: + """Post a reply to a local comment thread. + + Returns ``(success, code)``. + """ + text = _sanitize_comment_text(text) + logger.info("[Feishu-Comment] reply_to_comment: comment_id=%s text=%s", + comment_id, text[:100]) + body = { + "content": { + "elements": [ + {"type": "text_run", "text_run": {"text": text}}, + ] + } + } + + code, msg, _ = await _exec_request( + client, "POST", _REPLY_COMMENT_URI, + paths={"file_token": file_token, "comment_id": comment_id}, + queries=[("file_type", file_type)], + body=body, + ) + if code != 0: + logger.warning( + "[Feishu-Comment] reply_to_comment FAILED: code=%s msg=%s comment_id=%s", + code, msg, comment_id, + ) + else: + logger.info("[Feishu-Comment] reply_to_comment OK: comment_id=%s", comment_id) + return code == 0, code + + +async def add_whole_comment( + client: Any, file_token: str, file_type: str, text: str, +) -> bool: + """Add a new whole-document comment. + + Returns ``True`` on success. + """ + text = _sanitize_comment_text(text) + logger.info("[Feishu-Comment] add_whole_comment: file_token=%s text=%s", + file_token, text[:100]) + body = { + "file_type": file_type, + "reply_elements": [ + {"type": "text", "text": text}, + ], + } + + code, msg, _ = await _exec_request( + client, "POST", _ADD_COMMENT_URI, + paths={"file_token": file_token}, + body=body, + ) + if code != 0: + logger.warning("[Feishu-Comment] add_whole_comment FAILED: code=%s msg=%s", code, msg) + else: + logger.info("[Feishu-Comment] add_whole_comment OK") + return code == 0 + + +_REPLY_CHUNK_SIZE = 4000 + + +def _chunk_text(text: str, limit: int = _REPLY_CHUNK_SIZE) -> List[str]: + """Split text into chunks for delivery, preferring line breaks.""" + if len(text) <= limit: + return [text] + chunks = [] + while text: + if len(text) <= limit: + chunks.append(text) + break + # Find last newline within limit + cut = text.rfind("\n", 0, limit) + if cut <= 0: + cut = limit + chunks.append(text[:cut]) + text = text[cut:].lstrip("\n") + return chunks + + +async def deliver_comment_reply( + client: Any, + file_token: str, + file_type: str, + comment_id: str, + text: str, + is_whole: bool, +) -> bool: + """Route agent reply to the correct API, chunking long text. + + - Whole comment -> add_whole_comment + - Local comment -> reply_to_comment, fallback to add_whole_comment on 1069302 + """ + chunks = _chunk_text(text) + logger.info("[Feishu-Comment] deliver_comment_reply: is_whole=%s comment_id=%s text_len=%d chunks=%d", + is_whole, comment_id, len(text), len(chunks)) + + all_ok = True + for i, chunk in enumerate(chunks): + if len(chunks) > 1: + logger.info("[Feishu-Comment] deliver_comment_reply: sending chunk %d/%d (%d chars)", + i + 1, len(chunks), len(chunk)) + + if is_whole: + ok = await add_whole_comment(client, file_token, file_type, chunk) + else: + success, code = await reply_to_comment(client, file_token, file_type, comment_id, chunk) + if success: + ok = True + elif code == 1069302: + logger.info("[Feishu-Comment] Reply not allowed (1069302), falling back to add_whole_comment") + ok = await add_whole_comment(client, file_token, file_type, chunk) + is_whole = True # subsequent chunks also use add_comment + else: + ok = False + + if not ok: + all_ok = False + break + + return all_ok + + +# --------------------------------------------------------------------------- +# Comment content extraction helpers +# --------------------------------------------------------------------------- + + +def _extract_reply_text(reply: Dict[str, Any]) -> str: + """Extract plain text from a comment reply's content structure.""" + content = reply.get("content", {}) + if isinstance(content, str): + try: + content = json.loads(content) + except (json.JSONDecodeError, TypeError): + return content + + elements = content.get("elements", []) + parts = [] + for elem in elements: + if elem.get("type") == "text_run": + text_run = elem.get("text_run", {}) + parts.append(text_run.get("text", "")) + elif elem.get("type") == "docs_link": + docs_link = elem.get("docs_link", {}) + parts.append(docs_link.get("url", "")) + elif elem.get("type") == "person": + person = elem.get("person", {}) + parts.append(f"@{person.get('user_id', 'unknown')}") + return "".join(parts) + + +def _get_reply_user_id(reply: Dict[str, Any]) -> str: + """Extract user_id from a reply dict.""" + user_id = reply.get("user_id", "") + if isinstance(user_id, dict): + return user_id.get("open_id", "") or user_id.get("user_id", "") + return str(user_id) + + +def _extract_semantic_text(reply: Dict[str, Any], self_open_id: str = "") -> str: + """Extract semantic text from a reply, stripping self @mentions and extra whitespace.""" + content = reply.get("content", {}) + if isinstance(content, str): + try: + content = json.loads(content) + except (json.JSONDecodeError, TypeError): + return content + + elements = content.get("elements", []) + parts = [] + for elem in elements: + if elem.get("type") == "person": + person = elem.get("person", {}) + uid = person.get("user_id", "") + # Skip self @mention (it's routing, not content) + if self_open_id and uid == self_open_id: + continue + parts.append(f"@{uid}") + elif elem.get("type") == "text_run": + text_run = elem.get("text_run", {}) + parts.append(text_run.get("text", "")) + elif elem.get("type") == "docs_link": + docs_link = elem.get("docs_link", {}) + parts.append(docs_link.get("url", "")) + return " ".join("".join(parts).split()).strip() + + +# --------------------------------------------------------------------------- +# Document link parsing and wiki resolution +# --------------------------------------------------------------------------- + +import re as _re + +# Matches feishu/lark document URLs and extracts doc_type + token +_FEISHU_DOC_URL_RE = _re.compile( + r"(?:feishu\.cn|larkoffice\.com|larksuite\.com|lark\.suite\.com)" + r"/(?Pwiki|doc|docx|sheet|sheets|slides|mindnote|bitable|base|file)" + r"/(?P[A-Za-z0-9_-]{10,40})" +) + +_WIKI_GET_NODE_URI = "/open-apis/wiki/v2/spaces/get_node" + + +def _extract_docs_links(replies: List[Dict[str, Any]]) -> List[Dict[str, str]]: + """Extract unique document links from a list of comment replies. + + Returns list of ``{"url": "...", "doc_type": "...", "token": "..."}`` dicts. + """ + seen_tokens = set() + links = [] + for reply in replies: + content = reply.get("content", {}) + if isinstance(content, str): + try: + content = json.loads(content) + except (json.JSONDecodeError, TypeError): + continue + for elem in content.get("elements", []): + if elem.get("type") not in ("docs_link", "link"): + continue + link_data = elem.get("docs_link") or elem.get("link") or {} + url = link_data.get("url", "") + if not url: + continue + m = _FEISHU_DOC_URL_RE.search(url) + if not m: + continue + doc_type = m.group("doc_type") + token = m.group("token") + if token in seen_tokens: + continue + seen_tokens.add(token) + links.append({"url": url, "doc_type": doc_type, "token": token}) + return links + + +async def _reverse_lookup_wiki_token( + client: Any, obj_type: str, obj_token: str, +) -> Optional[str]: + """Reverse-lookup: given an obj_token, find its wiki node_token. + + Returns the wiki_token if the document belongs to a wiki space, + or None if it doesn't or the API call fails. + """ + code, msg, data = await _exec_request( + client, "GET", _WIKI_GET_NODE_URI, + queries=[("token", obj_token), ("obj_type", obj_type)], + ) + if code == 0: + node = data.get("node", {}) + wiki_token = node.get("node_token", "") + return wiki_token if wiki_token else None + # code != 0: either not a wiki doc or service error — log and return None + logger.warning("[Feishu-Comment] Wiki reverse lookup failed: code=%s msg=%s obj=%s:%s", code, msg, obj_type, obj_token) + return None + + +async def _resolve_wiki_nodes( + client: Any, + links: List[Dict[str, str]], +) -> List[Dict[str, str]]: + """Resolve wiki links to their underlying document type and token. + + Mutates entries in *links* in-place: replaces ``doc_type`` and ``token`` + with the resolved values for wiki links. Non-wiki links are unchanged. + """ + wiki_links = [l for l in links if l["doc_type"] == "wiki"] + if not wiki_links: + return links + + for link in wiki_links: + wiki_token = link["token"] + code, msg, data = await _exec_request( + client, "GET", _WIKI_GET_NODE_URI, + queries=[("token", wiki_token)], + ) + if code == 0: + node = data.get("node", {}) + resolved_type = node.get("obj_type", "") + resolved_token = node.get("obj_token", "") + if resolved_type and resolved_token: + logger.info( + "[Feishu-Comment] Wiki resolved: %s -> %s:%s", + wiki_token, resolved_type, resolved_token, + ) + link["resolved_type"] = resolved_type + link["resolved_token"] = resolved_token + else: + logger.warning("[Feishu-Comment] Wiki resolve returned empty: %s", wiki_token) + else: + logger.warning("[Feishu-Comment] Wiki resolve failed: code=%s msg=%s token=%s", code, msg, wiki_token) + + return links + + +def _format_referenced_docs( + links: List[Dict[str, str]], current_file_token: str = "", +) -> str: + """Format resolved document links for prompt embedding.""" + if not links: + return "" + lines = ["", "Referenced documents in comments:"] + for link in links: + rtype = link.get("resolved_type", link["doc_type"]) + rtoken = link.get("resolved_token", link["token"]) + is_current = rtoken == current_file_token + suffix = " (same as current document)" if is_current else "" + lines.append(f"- {rtype}:{rtoken}{suffix} ({link['url'][:80]})") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Prompt construction +# --------------------------------------------------------------------------- + +_PROMPT_TEXT_LIMIT = 220 +_LOCAL_TIMELINE_LIMIT = 20 +_WHOLE_TIMELINE_LIMIT = 12 + + +def _truncate(text: str, limit: int = _PROMPT_TEXT_LIMIT) -> str: + """Truncate text for prompt embedding.""" + if len(text) <= limit: + return text + return text[:limit] + "..." + + +def _select_local_timeline( + timeline: List[Tuple[str, str, bool]], + target_index: int, +) -> List[Tuple[str, str, bool]]: + """Select up to _LOCAL_TIMELINE_LIMIT entries centered on target_index. + + Always keeps first, target, and last entries. + """ + if len(timeline) <= _LOCAL_TIMELINE_LIMIT: + return timeline + n = len(timeline) + selected = set() + selected.add(0) # first + selected.add(n - 1) # last + if 0 <= target_index < n: + selected.add(target_index) # current + # Expand outward from target + budget = _LOCAL_TIMELINE_LIMIT - len(selected) + lo, hi = target_index - 1, target_index + 1 + while budget > 0 and (lo >= 0 or hi < n): + if lo >= 0 and lo not in selected: + selected.add(lo) + budget -= 1 + lo -= 1 + if budget > 0 and hi < n and hi not in selected: + selected.add(hi) + budget -= 1 + hi += 1 + return [timeline[i] for i in sorted(selected)] + + +def _select_whole_timeline( + timeline: List[Tuple[str, str, bool]], + current_index: int, + nearest_self_index: int, +) -> List[Tuple[str, str, bool]]: + """Select up to _WHOLE_TIMELINE_LIMIT entries for whole-doc comments. + + Prioritizes current entry and nearest self reply. + """ + if len(timeline) <= _WHOLE_TIMELINE_LIMIT: + return timeline + n = len(timeline) + selected = set() + if 0 <= current_index < n: + selected.add(current_index) + if 0 <= nearest_self_index < n: + selected.add(nearest_self_index) + # Expand outward from current + budget = _WHOLE_TIMELINE_LIMIT - len(selected) + lo, hi = current_index - 1, current_index + 1 + while budget > 0 and (lo >= 0 or hi < n): + if lo >= 0 and lo not in selected: + selected.add(lo) + budget -= 1 + lo -= 1 + if budget > 0 and hi < n and hi not in selected: + selected.add(hi) + budget -= 1 + hi += 1 + if not selected: + # Fallback: take last N entries + return timeline[-_WHOLE_TIMELINE_LIMIT:] + return [timeline[i] for i in sorted(selected)] + + +_COMMON_INSTRUCTIONS = """ +This is a Feishu document comment thread, not an IM chat. +Do NOT call feishu_drive_add_comment or feishu_drive_reply_comment yourself. +Your reply will be posted automatically. Just output the reply text. +Use the thread timeline above as the main context. +If the quoted content is not enough, use feishu_doc_read to read nearby context. +The quoted content is your primary anchor — insert/summarize/explain requests are about it. +Do not guess document content you haven't read. +Reply in the same language as the user's comment unless they request otherwise. +Use plain text only. Do not use Markdown, headings, bullet lists, tables, or code blocks. +Do not show your reasoning process. Do not start with "I will", "Let me", or "I'll first". +Output only the final user-facing reply. +If no reply is needed, output exactly NO_REPLY. +""".strip() + + +def build_local_comment_prompt( + *, + doc_title: str, + doc_url: str, + file_token: str, + file_type: str, + comment_id: str, + quote_text: str, + root_comment_text: str, + target_reply_text: str, + timeline: List[Tuple[str, str, bool]], # [(user_id, text, is_self)] + self_open_id: str, + target_index: int = -1, + referenced_docs: str = "", +) -> str: + """Build the prompt for a local (quoted-text) comment.""" + selected = _select_local_timeline(timeline, target_index) + + lines = [ + f'The user added a reply in "{doc_title}".', + f'Current user comment text: "{_truncate(target_reply_text)}"', + f'Original comment text: "{_truncate(root_comment_text)}"', + f'Quoted content: "{_truncate(quote_text, 500)}"', + "This comment mentioned you (@mention is for routing, not task content).", + f"Document link: {doc_url}", + "Current commented document:", + f"- file_type={file_type}", + f"- file_token={file_token}", + f"- comment_id={comment_id}", + "", + f"Current comment card timeline ({len(selected)}/{len(timeline)} entries):", + ] + + for user_id, text, is_self in selected: + marker = " <-- YOU" if is_self else "" + lines.append(f"[{user_id}] {_truncate(text)}{marker}") + + if referenced_docs: + lines.append(referenced_docs) + + lines.append("") + lines.append(_COMMON_INSTRUCTIONS) + return "\n".join(lines) + + +def build_whole_comment_prompt( + *, + doc_title: str, + doc_url: str, + file_token: str, + file_type: str, + comment_text: str, + timeline: List[Tuple[str, str, bool]], # [(user_id, text, is_self)] + self_open_id: str, + current_index: int = -1, + nearest_self_index: int = -1, + referenced_docs: str = "", +) -> str: + """Build the prompt for a whole-document comment.""" + selected = _select_whole_timeline(timeline, current_index, nearest_self_index) + + lines = [ + f'The user added a comment in "{doc_title}".', + f'Current user comment text: "{_truncate(comment_text)}"', + "This is a whole-document comment.", + "This comment mentioned you (@mention is for routing, not task content).", + f"Document link: {doc_url}", + "Current commented document:", + f"- file_type={file_type}", + f"- file_token={file_token}", + "", + f"Whole-document comment timeline ({len(selected)}/{len(timeline)} entries):", + ] + + for user_id, text, is_self in selected: + marker = " <-- YOU" if is_self else "" + lines.append(f"[{user_id}] {_truncate(text)}{marker}") + + if referenced_docs: + lines.append(referenced_docs) + + lines.append("") + lines.append(_COMMON_INSTRUCTIONS) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Agent execution +# --------------------------------------------------------------------------- + + +def _resolve_model_and_runtime() -> Tuple[str, dict]: + """Resolve model and provider credentials, same as gateway message handling.""" + import os + from gateway.run import _load_gateway_config, _resolve_gateway_model + + user_config = _load_gateway_config() + model = _resolve_gateway_model(user_config) + + from gateway.run import _resolve_runtime_agent_kwargs + runtime_kwargs = _resolve_runtime_agent_kwargs() + + # Fall back to provider's default model if none configured + if not model and runtime_kwargs.get("provider"): + try: + from hermes_cli.models import get_default_model_for_provider + model = get_default_model_for_provider(runtime_kwargs["provider"]) + except Exception: + pass + + return model, runtime_kwargs + + +# --------------------------------------------------------------------------- +# Session cache for cross-card memory within the same document +# --------------------------------------------------------------------------- + +import threading +import time as _time + +_SESSION_MAX_MESSAGES = 50 # keep last N messages per document session +_SESSION_TTL_S = 3600 # expire sessions after 1 hour of inactivity + +_session_cache_lock = threading.Lock() +_session_cache: Dict[str, Dict] = {} # key -> {"messages": [...], "last_access": float} + + +def _session_key(file_type: str, file_token: str) -> str: + return f"comment-doc:{file_type}:{file_token}" + + +def _load_session_history(key: str) -> List[Dict[str, Any]]: + """Load conversation history for a document session.""" + with _session_cache_lock: + entry = _session_cache.get(key) + if entry is None: + return [] + # Check TTL + if _time.time() - entry["last_access"] > _SESSION_TTL_S: + del _session_cache[key] + logger.info("[Feishu-Comment] Session expired: %s", key) + return [] + entry["last_access"] = _time.time() + return list(entry["messages"]) + + +def _save_session_history(key: str, messages: List[Dict[str, Any]]) -> None: + """Save conversation history for a document session (keeps last N messages).""" + # Only keep user/assistant messages (strip system messages and tool internals) + cleaned = [ + m for m in messages + if m.get("role") in ("user", "assistant") and m.get("content") + ] + # Keep last N + if len(cleaned) > _SESSION_MAX_MESSAGES: + cleaned = cleaned[-_SESSION_MAX_MESSAGES:] + with _session_cache_lock: + _session_cache[key] = { + "messages": cleaned, + "last_access": _time.time(), + } + logger.info("[Feishu-Comment] Session saved: %s (%d messages)", key, len(cleaned)) + + +def _run_comment_agent(prompt: str, client: Any, session_key: str = "") -> str: + """Create an AIAgent with feishu tools and run the prompt. + + If *session_key* is provided, loads/saves conversation history for + cross-card memory within the same document. + + Returns the agent's final response text, or empty string on failure. + """ + from run_agent import AIAgent + + logger.info("[Feishu-Comment] _run_comment_agent: injecting lark client into tool thread-locals") + from tools.feishu_doc_tool import set_client as set_doc_client + from tools.feishu_drive_tool import set_client as set_drive_client + set_doc_client(client) + set_drive_client(client) + + try: + model, runtime_kwargs = _resolve_model_and_runtime() + logger.info("[Feishu-Comment] _run_comment_agent: model=%s provider=%s base_url=%s", + model, runtime_kwargs.get("provider"), (runtime_kwargs.get("base_url") or "")[:50]) + + # Load session history for cross-card memory + history = _load_session_history(session_key) if session_key else [] + if history: + logger.info("[Feishu-Comment] _run_comment_agent: loaded %d history messages from session %s", + len(history), session_key) + + agent = AIAgent( + model=model, + base_url=runtime_kwargs.get("base_url"), + api_key=runtime_kwargs.get("api_key"), + provider=runtime_kwargs.get("provider"), + api_mode=runtime_kwargs.get("api_mode"), + credential_pool=runtime_kwargs.get("credential_pool"), + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + max_iterations=15, + enabled_toolsets=["feishu_doc", "feishu_drive"], + ) + logger.info("[Feishu-Comment] _run_comment_agent: calling run_conversation (prompt=%d chars, history=%d)", + len(prompt), len(history)) + result = agent.run_conversation(prompt, conversation_history=history or None) + response = (result.get("final_response") or "").strip() + api_calls = result.get("api_calls", 0) + logger.info("[Feishu-Comment] _run_comment_agent: done api_calls=%d response_len=%d response=%s", + api_calls, len(response), response[:200]) + + # Save updated history + if session_key: + new_messages = result.get("messages", []) + if new_messages: + _save_session_history(session_key, new_messages) + + return response + except Exception as e: + logger.exception("[Feishu-Comment] _run_comment_agent: agent failed: %s", e) + return "" + finally: + set_doc_client(None) + set_drive_client(None) + + +# --------------------------------------------------------------------------- +# Event handler entry point +# --------------------------------------------------------------------------- + +_NO_REPLY_SENTINEL = "NO_REPLY" + + +_ALLOWED_NOTICE_TYPES = {"add_comment", "add_reply"} + + +async def handle_drive_comment_event( + client: Any, data: Any, *, self_open_id: str = "", +) -> None: + """Full orchestration for a drive comment event. + + 1. Parse event + filter (self-reply, notice_type) + 2. Add OK reaction + 3. Fetch doc meta + comment details in parallel + 4. Branch on is_whole: build timeline + 5. Build prompt, run agent + 6. Deliver reply + """ + logger.info("[Feishu-Comment] ========== handle_drive_comment_event START ==========") + parsed = parse_drive_comment_event(data) + if parsed is None: + logger.warning("[Feishu-Comment] Dropping malformed drive comment event") + return + logger.info("[Feishu-Comment] [Step 0/5] Event parsed successfully") + + file_token = parsed["file_token"] + file_type = parsed["file_type"] + comment_id = parsed["comment_id"] + reply_id = parsed["reply_id"] + from_open_id = parsed["from_open_id"] + to_open_id = parsed["to_open_id"] + notice_type = parsed["notice_type"] + + # Filter: self-reply, receiver check, notice_type + if from_open_id and self_open_id and from_open_id == self_open_id: + logger.debug("[Feishu-Comment] Skipping self-authored event: from=%s", from_open_id) + return + if not to_open_id or (self_open_id and to_open_id != self_open_id): + logger.debug("[Feishu-Comment] Skipping event not addressed to self: to=%s", to_open_id or "(empty)") + return + if notice_type and notice_type not in _ALLOWED_NOTICE_TYPES: + logger.debug("[Feishu-Comment] Skipping notice_type=%s", notice_type) + return + if not file_token or not file_type or not comment_id: + logger.warning("[Feishu-Comment] Missing required fields, skipping") + return + + logger.info( + "[Feishu-Comment] Event: notice=%s file=%s:%s comment=%s from=%s", + notice_type, file_type, file_token, comment_id, from_open_id, + ) + + # Access control + from gateway.platforms.feishu_comment_rules import load_config, resolve_rule, is_user_allowed, has_wiki_keys + + comments_cfg = load_config() + rule = resolve_rule(comments_cfg, file_type, file_token) + + # If no exact match and config has wiki keys, try reverse-lookup + if rule.match_source in ("wildcard", "top") and has_wiki_keys(comments_cfg): + wiki_token = await _reverse_lookup_wiki_token(client, file_type, file_token) + if wiki_token: + rule = resolve_rule(comments_cfg, file_type, file_token, wiki_token=wiki_token) + + if not rule.enabled: + logger.info("[Feishu-Comment] Comments disabled for %s:%s, skipping", file_type, file_token) + return + if not is_user_allowed(rule, from_open_id): + logger.info("[Feishu-Comment] User %s denied (policy=%s, rule=%s)", from_open_id, rule.policy, rule.match_source) + return + + logger.info("[Feishu-Comment] Access granted: user=%s policy=%s rule=%s", from_open_id, rule.policy, rule.match_source) + if reply_id: + asyncio.ensure_future( + add_comment_reaction( + client, + file_token=file_token, + file_type=file_type, + reply_id=reply_id, + reaction_type="OK", + ) + ) + + # Step 2: Parallel fetch -- doc meta + comment details + logger.info("[Feishu-Comment] [Step 2/5] Parallel fetch: doc meta + comment batch_query") + meta_task = asyncio.ensure_future( + query_document_meta(client, file_token, file_type) + ) + comment_task = asyncio.ensure_future( + batch_query_comment(client, file_token, file_type, comment_id) + ) + doc_meta, comment_detail = await asyncio.gather(meta_task, comment_task) + + doc_title = doc_meta.get("title", "Untitled") + doc_url = doc_meta.get("url", "") + is_whole = bool(comment_detail.get("is_whole")) + + logger.info( + "[Feishu-Comment] Comment context: title=%s is_whole=%s", + doc_title, is_whole, + ) + + # Step 3: Build timeline based on comment type + logger.info("[Feishu-Comment] [Step 3/5] Building timeline (is_whole=%s)", is_whole) + if is_whole: + # Whole-document comment: fetch all whole comments as timeline + logger.info("[Feishu-Comment] Fetching whole-document comments for timeline...") + whole_comments = await list_whole_comments(client, file_token, file_type) + + timeline: List[Tuple[str, str, bool]] = [] + current_text = "" + current_index = -1 + nearest_self_index = -1 + for wc in whole_comments: + reply_list = wc.get("reply_list", {}) + if isinstance(reply_list, str): + try: + reply_list = json.loads(reply_list) + except (json.JSONDecodeError, TypeError): + reply_list = {} + replies = reply_list.get("replies", []) + for r in replies: + uid = _get_reply_user_id(r) + text = _extract_reply_text(r) + is_self = (uid == self_open_id) if self_open_id else False + idx = len(timeline) + timeline.append((uid, text, is_self)) + if uid == from_open_id: + current_text = _extract_semantic_text(r, self_open_id) + current_index = idx + if is_self: + nearest_self_index = idx + + if not current_text: + for i, (uid, text, is_self) in reversed(list(enumerate(timeline))): + if not is_self: + current_text = text + current_index = i + break + + logger.info("[Feishu-Comment] Whole timeline: %d entries, current_idx=%d, self_idx=%d, text=%s", + len(timeline), current_index, nearest_self_index, + current_text[:80] if current_text else "(empty)") + + # Extract and resolve document links from all replies + all_raw_replies = [] + for wc in whole_comments: + rl = wc.get("reply_list", {}) + if isinstance(rl, str): + try: + rl = json.loads(rl) + except (json.JSONDecodeError, TypeError): + rl = {} + all_raw_replies.extend(rl.get("replies", [])) + doc_links = _extract_docs_links(all_raw_replies) + if doc_links: + doc_links = await _resolve_wiki_nodes(client, doc_links) + ref_docs_text = _format_referenced_docs(doc_links, file_token) + + prompt = build_whole_comment_prompt( + doc_title=doc_title, + doc_url=doc_url, + file_token=file_token, + file_type=file_type, + comment_text=current_text, + timeline=timeline, + self_open_id=self_open_id, + current_index=current_index, + nearest_self_index=nearest_self_index, + referenced_docs=ref_docs_text, + ) + + else: + # Local comment: fetch the comment thread replies + logger.info("[Feishu-Comment] Fetching comment thread replies...") + replies = await list_comment_replies( + client, file_token, file_type, comment_id, + expect_reply_id=reply_id, + ) + + quote_text = comment_detail.get("quote", "") + + timeline = [] + root_text = "" + target_text = "" + target_index = -1 + for i, r in enumerate(replies): + uid = _get_reply_user_id(r) + text = _extract_reply_text(r) + is_self = (uid == self_open_id) if self_open_id else False + timeline.append((uid, text, is_self)) + if i == 0: + root_text = _extract_semantic_text(r, self_open_id) + rid = r.get("reply_id", "") + if rid and rid == reply_id: + target_text = _extract_semantic_text(r, self_open_id) + target_index = i + + if not target_text and timeline: + for i, (uid, text, is_self) in reversed(list(enumerate(timeline))): + if uid == from_open_id: + target_text = text + target_index = i + break + + logger.info("[Feishu-Comment] Local timeline: %d entries, target_idx=%d, quote=%s root=%s target=%s", + len(timeline), target_index, + quote_text[:60] if quote_text else "(empty)", + root_text[:60] if root_text else "(empty)", + target_text[:60] if target_text else "(empty)") + + # Extract and resolve document links from replies + doc_links = _extract_docs_links(replies) + if doc_links: + doc_links = await _resolve_wiki_nodes(client, doc_links) + ref_docs_text = _format_referenced_docs(doc_links, file_token) + + prompt = build_local_comment_prompt( + doc_title=doc_title, + doc_url=doc_url, + file_token=file_token, + file_type=file_type, + comment_id=comment_id, + quote_text=quote_text, + root_comment_text=root_text, + target_reply_text=target_text, + timeline=timeline, + self_open_id=self_open_id, + target_index=target_index, + referenced_docs=ref_docs_text, + ) + + logger.info("[Feishu-Comment] [Step 4/5] Prompt built (%d chars), running agent...", len(prompt)) + logger.debug("[Feishu-Comment] Full prompt:\n%s", prompt) + + # Step 4: Run agent in a thread (run_conversation is synchronous) + # Session key groups all comment cards on the same document + sess_key = _session_key(file_type, file_token) + loop = asyncio.get_running_loop() + response = await loop.run_in_executor( + None, _run_comment_agent, prompt, client, sess_key, + ) + + if not response or _NO_REPLY_SENTINEL in response: + logger.info("[Feishu-Comment] Agent returned NO_REPLY, skipping delivery") + else: + logger.info("[Feishu-Comment] Agent response (%d chars): %s", len(response), response[:200]) + + # Step 5: Deliver reply + logger.info("[Feishu-Comment] [Step 5/5] Delivering reply (is_whole=%s, comment_id=%s)", is_whole, comment_id) + success = await deliver_comment_reply( + client, file_token, file_type, comment_id, response, is_whole, + ) + if success: + logger.info("[Feishu-Comment] Reply delivered successfully") + else: + logger.error("[Feishu-Comment] Failed to deliver reply") + + # Cleanup: remove OK reaction (best-effort, non-blocking) + if reply_id: + await delete_comment_reaction( + client, + file_token=file_token, + file_type=file_type, + reply_id=reply_id, + reaction_type="OK", + ) + + logger.info("[Feishu-Comment] ========== handle_drive_comment_event END ==========") diff --git a/gateway/platforms/feishu_comment_rules.py b/gateway/platforms/feishu_comment_rules.py new file mode 100644 index 0000000000..054ef95698 --- /dev/null +++ b/gateway/platforms/feishu_comment_rules.py @@ -0,0 +1,429 @@ +""" +Feishu document comment access-control rules. + +3-tier rule resolution: exact doc > wildcard "*" > top-level > code defaults. +Each field (enabled/policy/allow_from) falls back independently. +Config: ~/.hermes/feishu_comment_rules.json (mtime-cached, hot-reload). +Pairing store: ~/.hermes/feishu_comment_pairing.json. +""" + +from __future__ import annotations + +import json +import logging +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, Optional + +from hermes_constants import get_hermes_home + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- +# +# Uses the canonical ``get_hermes_home()`` helper (HERMES_HOME-aware and +# profile-safe). Resolved at import time; this module is lazy-imported by +# the Feishu comment event handler, which runs long after profile overrides +# have been applied, so freezing paths here is safe. + +RULES_FILE = get_hermes_home() / "feishu_comment_rules.json" +PAIRING_FILE = get_hermes_home() / "feishu_comment_pairing.json" + +# --------------------------------------------------------------------------- +# Data models +# --------------------------------------------------------------------------- + +_VALID_POLICIES = ("allowlist", "pairing") + + +@dataclass(frozen=True) +class CommentDocumentRule: + """Per-document rule. ``None`` means 'inherit from lower tier'.""" + enabled: Optional[bool] = None + policy: Optional[str] = None + allow_from: Optional[frozenset] = None + + +@dataclass(frozen=True) +class CommentsConfig: + """Top-level comment access config.""" + enabled: bool = True + policy: str = "pairing" + allow_from: frozenset = field(default_factory=frozenset) + documents: Dict[str, CommentDocumentRule] = field(default_factory=dict) + + +@dataclass(frozen=True) +class ResolvedCommentRule: + """Fully resolved rule after field-by-field fallback.""" + enabled: bool + policy: str + allow_from: frozenset + match_source: str # e.g. "exact:docx:xxx" | "wildcard" | "top" | "default" + + +# --------------------------------------------------------------------------- +# Mtime-cached file loading +# --------------------------------------------------------------------------- + +class _MtimeCache: + """Generic mtime-based file cache. ``stat()`` per access, re-read only on change.""" + + def __init__(self, path: Path): + self._path = path + self._mtime: float = 0.0 + self._data: Optional[dict] = None + + def load(self) -> dict: + try: + st = self._path.stat() + mtime = st.st_mtime + except FileNotFoundError: + self._mtime = 0.0 + self._data = {} + return {} + + if mtime == self._mtime and self._data is not None: + return self._data + + try: + with open(self._path, "r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, dict): + data = {} + except (json.JSONDecodeError, OSError): + logger.warning("[Feishu-Rules] Failed to read %s, using empty config", self._path) + data = {} + + self._mtime = mtime + self._data = data + return data + + +_rules_cache = _MtimeCache(RULES_FILE) +_pairing_cache = _MtimeCache(PAIRING_FILE) + + +# --------------------------------------------------------------------------- +# Config parsing +# --------------------------------------------------------------------------- + +def _parse_frozenset(raw: Any) -> Optional[frozenset]: + """Parse a list of strings into a frozenset; return None if key absent.""" + if raw is None: + return None + if isinstance(raw, (list, tuple)): + return frozenset(str(u).strip() for u in raw if str(u).strip()) + return None + + +def _parse_document_rule(raw: dict) -> CommentDocumentRule: + enabled = raw.get("enabled") + if enabled is not None: + enabled = bool(enabled) + policy = raw.get("policy") + if policy is not None: + policy = str(policy).strip().lower() + if policy not in _VALID_POLICIES: + policy = None + allow_from = _parse_frozenset(raw.get("allow_from")) + return CommentDocumentRule(enabled=enabled, policy=policy, allow_from=allow_from) + + +def load_config() -> CommentsConfig: + """Load comment rules from disk (mtime-cached).""" + raw = _rules_cache.load() + if not raw: + return CommentsConfig() + + documents: Dict[str, CommentDocumentRule] = {} + raw_docs = raw.get("documents", {}) + if isinstance(raw_docs, dict): + for key, rule_raw in raw_docs.items(): + if isinstance(rule_raw, dict): + documents[str(key)] = _parse_document_rule(rule_raw) + + policy = str(raw.get("policy", "pairing")).strip().lower() + if policy not in _VALID_POLICIES: + policy = "pairing" + + return CommentsConfig( + enabled=raw.get("enabled", True), + policy=policy, + allow_from=_parse_frozenset(raw.get("allow_from")) or frozenset(), + documents=documents, + ) + + +# --------------------------------------------------------------------------- +# Rule resolution (§8.4 field-by-field fallback) +# --------------------------------------------------------------------------- + +def has_wiki_keys(cfg: CommentsConfig) -> bool: + """Check if any document rule key starts with 'wiki:'.""" + return any(k.startswith("wiki:") for k in cfg.documents) + + +def resolve_rule( + cfg: CommentsConfig, + file_type: str, + file_token: str, + wiki_token: str = "", +) -> ResolvedCommentRule: + """Resolve effective rule: exact doc → wiki key → wildcard → top-level → defaults.""" + exact_key = f"{file_type}:{file_token}" + + exact = cfg.documents.get(exact_key) + exact_src = f"exact:{exact_key}" + if exact is None and wiki_token: + wiki_key = f"wiki:{wiki_token}" + exact = cfg.documents.get(wiki_key) + exact_src = f"exact:{wiki_key}" + + wildcard = cfg.documents.get("*") + + layers = [] + if exact is not None: + layers.append((exact, exact_src)) + if wildcard is not None: + layers.append((wildcard, "wildcard")) + + def _pick(field_name: str): + for layer, source in layers: + val = getattr(layer, field_name) + if val is not None: + return val, source + return getattr(cfg, field_name), "top" + + enabled, en_src = _pick("enabled") + policy, pol_src = _pick("policy") + allow_from, _ = _pick("allow_from") + + # match_source = highest-priority tier that contributed any field + priority_order = {"exact": 0, "wildcard": 1, "top": 2} + best_src = min( + [en_src, pol_src], + key=lambda s: priority_order.get(s.split(":")[0], 3), + ) + + return ResolvedCommentRule( + enabled=enabled, + policy=policy, + allow_from=allow_from, + match_source=best_src, + ) + + +# --------------------------------------------------------------------------- +# Pairing store +# --------------------------------------------------------------------------- + +def _load_pairing_approved() -> set: + """Return set of approved user open_ids (mtime-cached).""" + data = _pairing_cache.load() + approved = data.get("approved", {}) + if isinstance(approved, dict): + return set(approved.keys()) + if isinstance(approved, list): + return set(str(u) for u in approved if u) + return set() + + +def _save_pairing(data: dict) -> None: + PAIRING_FILE.parent.mkdir(parents=True, exist_ok=True) + tmp = PAIRING_FILE.with_suffix(".tmp") + with open(tmp, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + tmp.replace(PAIRING_FILE) + # Invalidate cache so next load picks up change + _pairing_cache._mtime = 0.0 + _pairing_cache._data = None + + +def pairing_add(user_open_id: str) -> bool: + """Add a user to the pairing-approved list. Returns True if newly added.""" + data = _pairing_cache.load() + approved = data.get("approved", {}) + if not isinstance(approved, dict): + approved = {} + if user_open_id in approved: + return False + approved[user_open_id] = {"approved_at": time.time()} + data["approved"] = approved + _save_pairing(data) + return True + + +def pairing_remove(user_open_id: str) -> bool: + """Remove a user from the pairing-approved list. Returns True if removed.""" + data = _pairing_cache.load() + approved = data.get("approved", {}) + if not isinstance(approved, dict): + return False + if user_open_id not in approved: + return False + del approved[user_open_id] + data["approved"] = approved + _save_pairing(data) + return True + + +def pairing_list() -> Dict[str, Any]: + """Return the approved dict {user_open_id: {approved_at: ...}}.""" + data = _pairing_cache.load() + approved = data.get("approved", {}) + return dict(approved) if isinstance(approved, dict) else {} + + +# --------------------------------------------------------------------------- +# Access check (public API for feishu_comment.py) +# --------------------------------------------------------------------------- + +def is_user_allowed(rule: ResolvedCommentRule, user_open_id: str) -> bool: + """Check if user passes the resolved rule's policy gate.""" + if user_open_id in rule.allow_from: + return True + if rule.policy == "pairing": + return user_open_id in _load_pairing_approved() + return False + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def _print_status() -> None: + cfg = load_config() + print(f"Rules file: {RULES_FILE}") + print(f" exists: {RULES_FILE.exists()}") + print(f"Pairing file: {PAIRING_FILE}") + print(f" exists: {PAIRING_FILE.exists()}") + print() + print(f"Top-level:") + print(f" enabled: {cfg.enabled}") + print(f" policy: {cfg.policy}") + print(f" allow_from: {sorted(cfg.allow_from) if cfg.allow_from else '[]'}") + print() + if cfg.documents: + print(f"Document rules ({len(cfg.documents)}):") + for key, rule in sorted(cfg.documents.items()): + parts = [] + if rule.enabled is not None: + parts.append(f"enabled={rule.enabled}") + if rule.policy is not None: + parts.append(f"policy={rule.policy}") + if rule.allow_from is not None: + parts.append(f"allow_from={sorted(rule.allow_from)}") + print(f" [{key}] {', '.join(parts) if parts else '(empty — inherits all)'}") + else: + print("Document rules: (none)") + print() + approved = pairing_list() + print(f"Pairing approved ({len(approved)}):") + for uid, meta in sorted(approved.items()): + ts = meta.get("approved_at", 0) + print(f" {uid} (approved_at={ts})") + + +def _do_check(doc_key: str, user_open_id: str) -> None: + cfg = load_config() + parts = doc_key.split(":", 1) + if len(parts) != 2: + print(f"Error: doc_key must be 'fileType:fileToken', got '{doc_key}'") + return + file_type, file_token = parts + rule = resolve_rule(cfg, file_type, file_token) + allowed = is_user_allowed(rule, user_open_id) + print(f"Document: {doc_key}") + print(f"User: {user_open_id}") + print(f"Resolved rule:") + print(f" enabled: {rule.enabled}") + print(f" policy: {rule.policy}") + print(f" allow_from: {sorted(rule.allow_from) if rule.allow_from else '[]'}") + print(f" match_source: {rule.match_source}") + print(f"Result: {'ALLOWED' if allowed else 'DENIED'}") + + +def _main() -> int: + import sys + + try: + from hermes_cli.env_loader import load_hermes_dotenv + load_hermes_dotenv() + except Exception: + pass + + usage = ( + "Usage: python -m gateway.platforms.feishu_comment_rules [args]\n" + "\n" + "Commands:\n" + " status Show rules config and pairing state\n" + " check Simulate access check\n" + " pairing add Add user to pairing-approved list\n" + " pairing remove Remove user from pairing-approved list\n" + " pairing list List pairing-approved users\n" + "\n" + f"Rules config file: {RULES_FILE}\n" + " Edit this JSON file directly to configure policies and document rules.\n" + " Changes take effect on the next comment event (no restart needed).\n" + ) + + args = sys.argv[1:] + if not args: + print(usage) + return 1 + + cmd = args[0] + + if cmd == "status": + _print_status() + + elif cmd == "check": + if len(args) < 3: + print("Usage: check ") + return 1 + _do_check(args[1], args[2]) + + elif cmd == "pairing": + if len(args) < 2: + print("Usage: pairing [args]") + return 1 + sub = args[1] + if sub == "add": + if len(args) < 3: + print("Usage: pairing add ") + return 1 + if pairing_add(args[2]): + print(f"Added: {args[2]}") + else: + print(f"Already approved: {args[2]}") + elif sub == "remove": + if len(args) < 3: + print("Usage: pairing remove ") + return 1 + if pairing_remove(args[2]): + print(f"Removed: {args[2]}") + else: + print(f"Not in approved list: {args[2]}") + elif sub == "list": + approved = pairing_list() + if not approved: + print("(no approved users)") + for uid, meta in sorted(approved.items()): + print(f" {uid} approved_at={meta.get('approved_at', '?')}") + else: + print(f"Unknown pairing subcommand: {sub}") + return 1 + else: + print(f"Unknown command: {cmd}\n") + print(usage) + return 1 + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(_main()) diff --git a/gateway/platforms/helpers.py b/gateway/platforms/helpers.py index c834dd89ca..18d97fcb7a 100644 --- a/gateway/platforms/helpers.py +++ b/gateway/platforms/helpers.py @@ -49,7 +49,10 @@ class MessageDeduplicator: return False now = time.time() if msg_id in self._seen: - return True + if now - self._seen[msg_id] < self._ttl: + return True + # Entry has expired — remove it and treat as new + del self._seen[msg_id] self._seen[msg_id] = now if len(self._seen) > self._max_size: cutoff = now - self._ttl diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index 816d88b034..a5f9352b55 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -30,11 +30,10 @@ import mimetypes import os import re import time +from html import escape as _html_escape from pathlib import Path from typing import Any, Dict, Optional, Set -from html import escape as _html_escape - try: from mautrix.types import ( ContentURI, @@ -60,28 +59,33 @@ except ImportError: REACTION = "m.reaction" ROOM_ENCRYPTED = "m.room.encrypted" ROOM_NAME = "m.room.name" + EventType = _EventTypeStub # type: ignore[misc,assignment] class _PaginationDirectionStub: # type: ignore[no-redef] BACKWARD = "b" FORWARD = "f" + PaginationDirection = _PaginationDirectionStub # type: ignore[misc,assignment] class _PresenceStateStub: # type: ignore[no-redef] ONLINE = "online" OFFLINE = "offline" UNAVAILABLE = "unavailable" + PresenceState = _PresenceStateStub # type: ignore[misc,assignment] class _RoomCreatePresetStub: # type: ignore[no-redef] PRIVATE = "private_chat" PUBLIC = "public_chat" TRUSTED_PRIVATE = "trusted_private_chat" + RoomCreatePreset = _RoomCreatePresetStub # type: ignore[misc,assignment] class _TrustStateStub: # type: ignore[no-redef] UNVERIFIED = 0 VERIFIED = 1 + TrustState = _TrustStateStub # type: ignore[misc,assignment] from gateway.config import Platform, PlatformConfig @@ -103,20 +107,16 @@ MAX_MESSAGE_LENGTH = 4000 # Store directory for E2EE keys and sync state. # Uses get_hermes_home() so each profile gets its own Matrix store. from hermes_constants import get_hermes_dir as _get_hermes_dir + _STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store") _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db" # Grace period: ignore messages older than this many seconds before startup. _STARTUP_GRACE_SECONDS = 5 -# Pending undecrypted events: cap and TTL for retry buffer. -_MAX_PENDING_EVENTS = 100 -_PENDING_EVENT_TTL = 300 # seconds — stop retrying after 5 min - _E2EE_INSTALL_HINT = ( - "Install with: pip install 'mautrix[encryption]' " - "(requires libolm C library)" + "Install with: pip install 'mautrix[encryption]' (requires libolm C library)" ) @@ -124,6 +124,7 @@ def _check_e2ee_deps() -> bool: """Return True if mautrix E2EE dependencies (python-olm) are available.""" try: from mautrix.crypto import OlmMachine # noqa: F401 + return True except (ImportError, AttributeError): return False @@ -145,14 +146,17 @@ def check_matrix_requirements() -> bool: import mautrix # noqa: F401 except ImportError: logger.warning( - "Matrix: mautrix not installed. " - "Run: pip install 'mautrix[encryption]'" + "Matrix: mautrix not installed. Run: pip install 'mautrix[encryption]'" ) return False # If encryption is requested, verify E2EE deps are available at startup # rather than silently degrading to plaintext-only at connect time. - encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes") + encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in ( + "true", + "1", + "yes", + ) if encryption_requested and not _check_e2ee_deps(): logger.error( "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. " @@ -204,25 +208,21 @@ class MatrixAdapter(BasePlatformAdapter): super().__init__(config, Platform.MATRIX) self._homeserver: str = ( - config.extra.get("homeserver", "") - or os.getenv("MATRIX_HOMESERVER", "") + config.extra.get("homeserver", "") or os.getenv("MATRIX_HOMESERVER", "") ).rstrip("/") self._access_token: str = config.token or os.getenv("MATRIX_ACCESS_TOKEN", "") - self._user_id: str = ( - config.extra.get("user_id", "") - or os.getenv("MATRIX_USER_ID", "") + self._user_id: str = config.extra.get("user_id", "") or os.getenv( + "MATRIX_USER_ID", "" ) - self._password: str = ( - config.extra.get("password", "") - or os.getenv("MATRIX_PASSWORD", "") + self._password: str = config.extra.get("password", "") or os.getenv( + "MATRIX_PASSWORD", "" ) self._encryption: bool = config.extra.get( "encryption", os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes"), ) - self._device_id: str = ( - config.extra.get("device_id", "") - or os.getenv("MATRIX_DEVICE_ID", "") + self._device_id: str = config.extra.get("device_id", "") or os.getenv( + "MATRIX_DEVICE_ID", "" ) self._client: Any = None # mautrix.client.Client @@ -237,22 +237,32 @@ class MatrixAdapter(BasePlatformAdapter): self._joined_rooms: Set[str] = set() # Event deduplication (bounded deque keeps newest entries) from collections import deque + self._processed_events: deque = deque(maxlen=1000) self._processed_events_set: set = set() # Buffer for undecrypted events pending key receipt. # Each entry: (room_id, event, timestamp) - self._pending_megolm: list = [] # Thread participation tracking (for require_mention bypass) self._threads = ThreadParticipationTracker("matrix") # Mention/thread gating — parsed once from env vars. - self._require_mention: bool = os.getenv("MATRIX_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no") + self._require_mention: bool = os.getenv( + "MATRIX_REQUIRE_MENTION", "true" + ).lower() not in ("false", "0", "no") free_rooms_raw = os.getenv("MATRIX_FREE_RESPONSE_ROOMS", "") - self._free_rooms: Set[str] = {r.strip() for r in free_rooms_raw.split(",") if r.strip()} - self._auto_thread: bool = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ("true", "1", "yes") - self._dm_mention_threads: bool = os.getenv("MATRIX_DM_MENTION_THREADS", "false").lower() in ("true", "1", "yes") + self._free_rooms: Set[str] = { + r.strip() for r in free_rooms_raw.split(",") if r.strip() + } + self._auto_thread: bool = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ( + "true", + "1", + "yes", + ) + self._dm_mention_threads: bool = os.getenv( + "MATRIX_DM_MENTION_THREADS", "false" + ).lower() in ("true", "1", "yes") # Reactions: configurable via MATRIX_REACTIONS (default: true). self._reactions_enabled: bool = os.getenv( @@ -262,8 +272,12 @@ class MatrixAdapter(BasePlatformAdapter): # Text batching: merge rapid successive messages (Telegram-style). # Matrix clients split long messages around 4000 chars. - self._text_batch_delay_seconds = float(os.getenv("HERMES_MATRIX_TEXT_BATCH_DELAY_SECONDS", "0.6")) - self._text_batch_split_delay_seconds = float(os.getenv("HERMES_MATRIX_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) + self._text_batch_delay_seconds = float( + os.getenv("HERMES_MATRIX_TEXT_BATCH_DELAY_SECONDS", "0.6") + ) + self._text_batch_split_delay_seconds = float( + os.getenv("HERMES_MATRIX_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0") + ) self._pending_text_batches: Dict[str, MessageEvent] = {} self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {} @@ -284,6 +298,38 @@ class MatrixAdapter(BasePlatformAdapter): # E2EE helpers # ------------------------------------------------------------------ + @staticmethod + def _extract_server_ed25519(device_keys_obj: Any) -> Optional[str]: + """Extract the ed25519 identity key from a DeviceKeys object.""" + for kid, kval in (getattr(device_keys_obj, "keys", {}) or {}).items(): + if str(kid).startswith("ed25519:"): + return str(kval) + return None + + async def _reverify_keys_after_upload( + self, client: Any, local_ed25519: str + ) -> bool: + """Re-query the server after share_keys() and verify our ed25519 key matches.""" + try: + resp = await client.query_keys({client.mxid: [client.device_id]}) + dk = getattr(resp, "device_keys", {}) or {} + ud = dk.get(str(client.mxid)) or {} + dev = ud.get(str(client.device_id)) + if dev: + server_ed = self._extract_server_ed25519(dev) + if server_ed != local_ed25519: + logger.error( + "Matrix: device %s has immutable identity keys that " + "don't match this installation. Generate a new access " + "token with a fresh device.", + client.device_id, + ) + return False + except Exception as exc: + logger.error("Matrix: post-upload key verification failed: %s", exc) + return False + return True + async def _verify_device_keys_on_server(self, client: Any, olm: Any) -> bool: """Verify our device keys are on the homeserver after loading crypto state. @@ -294,15 +340,15 @@ class MatrixAdapter(BasePlatformAdapter): resp = await client.query_keys({client.mxid: [client.device_id]}) except Exception as exc: logger.error( - "Matrix: cannot verify device keys on server: %s — refusing E2EE", exc, + "Matrix: cannot verify device keys on server: %s — refusing E2EE", + exc, ) return False - # query_keys returns typed objects (QueryKeysResponse, DeviceKeys - # with KeyID keys). Normalise to plain strings for comparison. device_keys_map = getattr(resp, "device_keys", {}) or {} our_user_devices = device_keys_map.get(str(client.mxid)) or {} our_keys = our_user_devices.get(str(client.device_id)) + local_ed25519 = olm.account.identity_keys.get("ed25519") if not our_keys: logger.warning("Matrix: device keys missing from server — re-uploading") @@ -312,21 +358,12 @@ class MatrixAdapter(BasePlatformAdapter): except Exception as exc: logger.error("Matrix: failed to re-upload device keys: %s", exc) return False - return True + return await self._reverify_keys_after_upload(client, local_ed25519) - # DeviceKeys.keys is a dict[KeyID, str]. Iterate to find the - # ed25519 key rather than constructing a KeyID for lookup. - server_ed25519 = None - keys_dict = getattr(our_keys, "keys", {}) or {} - for key_id, key_value in keys_dict.items(): - if str(key_id).startswith("ed25519:"): - server_ed25519 = str(key_value) - break - local_ed25519 = olm.account.identity_keys.get("ed25519") + server_ed25519 = self._extract_server_ed25519(our_keys) if server_ed25519 != local_ed25519: if olm.account.shared: - # Restored account from DB but server has different keys — corrupted state. logger.error( "Matrix: server has different identity keys for device %s — " "local crypto state is stale. Delete %s and restart.", @@ -335,8 +372,6 @@ class MatrixAdapter(BasePlatformAdapter): ) return False - # Fresh account (never uploaded). Server has stale keys from a - # previous installation. Try to delete the old device and re-upload. logger.warning( "Matrix: server has stale keys for device %s — attempting re-upload", client.device_id, @@ -348,10 +383,10 @@ class MatrixAdapter(BasePlatformAdapter): else "DELETE", f"/_matrix/client/v3/devices/{client.device_id}", ) - logger.info("Matrix: deleted stale device %s from server", client.device_id) + logger.info( + "Matrix: deleted stale device %s from server", client.device_id + ) except Exception: - # Device deletion often requires UIA or may simply not be - # permitted — that's fine, share_keys will try to overwrite. pass try: await olm.share_keys() @@ -363,6 +398,7 @@ class MatrixAdapter(BasePlatformAdapter): exc, ) return False + return await self._reverify_keys_after_upload(client, local_ed25519) return True @@ -448,7 +484,9 @@ class MatrixAdapter(BasePlatformAdapter): await api.session.close() return False else: - logger.error("Matrix: need MATRIX_ACCESS_TOKEN or MATRIX_USER_ID + MATRIX_PASSWORD") + logger.error( + "Matrix: need MATRIX_ACCESS_TOKEN or MATRIX_USER_ID + MATRIX_PASSWORD" + ) await api.session.close() return False @@ -472,7 +510,9 @@ class MatrixAdapter(BasePlatformAdapter): # Remove legacy pickle file from pre-SQLite era. legacy_pickle = _STORE_DIR / "crypto_store.pickle" if legacy_pickle.exists(): - logger.info("Matrix: removing legacy crypto_store.pickle (migrated to SQLite)") + logger.info( + "Matrix: removing legacy crypto_store.pickle (migrated to SQLite)" + ) legacy_pickle.unlink() # Open SQLite-backed crypto store. @@ -508,6 +548,37 @@ class MatrixAdapter(BasePlatformAdapter): await api.session.close() return False + # Proactively flush one-time keys to detect stale OTK + # conflicts early. When crypto state is wiped but the + # same device ID is reused, the server may still hold OTKs + # signed with the old ed25519 key. Identity key re-upload + # succeeds but OTK uploads fail ("already exists" with + # mismatched signature). Peers then cannot establish Olm + # sessions and all new messages are undecryptable. + try: + await olm.share_keys() + except Exception as exc: + exc_str = str(exc) + if "already exists" in exc_str: + logger.error( + "Matrix: device %s has stale one-time keys on the " + "server signed with a previous identity key. " + "Peers cannot establish new Olm sessions with " + "this device. Delete the device from the " + "homeserver and restart, or generate a new " + "access token to get a fresh device ID.", + client.device_id, + ) + await crypto_db.stop() + await api.session.close() + return False + # Non-OTK errors are transient (network, etc.) — log + # but allow startup to continue. + logger.warning( + "Matrix: share_keys() warning during startup: %s", + exc, + ) + # Import cross-signing private keys from SSSS and self-sign # the current device. Required after any device-key rotation # (fresh crypto.db, share_keys re-upload) — otherwise the @@ -519,7 +590,9 @@ class MatrixAdapter(BasePlatformAdapter): await olm.verify_with_recovery_key(recovery_key) logger.info("Matrix: cross-signing verified via recovery key") except Exception as exc: - logger.warning("Matrix: recovery key verification failed: %s", exc) + logger.warning( + "Matrix: recovery key verification failed: %s", exc + ) client.crypto = olm logger.info( @@ -530,21 +603,23 @@ class MatrixAdapter(BasePlatformAdapter): except Exception as exc: logger.error( "Matrix: failed to create E2EE client: %s. %s", - exc, _E2EE_INSTALL_HINT, + exc, + _E2EE_INSTALL_HINT, ) await api.session.close() return False # Register event handlers. from mautrix.client import InternalEventType as IntEvt + from mautrix.client.dispatcher import MembershipEventDispatcher + + # Without this the INVITE handler below never fires. + client.add_dispatcher(MembershipEventDispatcher) client.add_event_handler(EventType.ROOM_MESSAGE, self._on_room_message) client.add_event_handler(EventType.REACTION, self._on_reaction) client.add_event_handler(IntEvt.INVITE, self._on_invite) - if self._encryption and getattr(client, "crypto", None): - client.add_event_handler(EventType.ROOM_ENCRYPTED, self._on_encrypted_event) - # Initial sync to catch up, then start background sync. self._startup_ts = time.time() self._closing = False @@ -553,7 +628,8 @@ class MatrixAdapter(BasePlatformAdapter): sync_data = await client.sync(timeout=10000, full_state=True) if isinstance(sync_data, dict): rooms_join = sync_data.get("rooms", {}).get("join", {}) - self._joined_rooms = set(rooms_join.keys()) + self._joined_rooms.clear() + self._joined_rooms.update(rooms_join.keys()) # Store the next_batch token so incremental syncs start # from where the initial sync left off. nb = sync_data.get("next_batch") @@ -575,7 +651,10 @@ class MatrixAdapter(BasePlatformAdapter): except Exception as exc: logger.warning("Matrix: initial sync event dispatch error: %s", exc) else: - logger.warning("Matrix: initial sync returned unexpected type %s", type(sync_data).__name__) + logger.warning( + "Matrix: initial sync returned unexpected type %s", + type(sync_data).__name__, + ) except Exception as exc: logger.warning("Matrix: initial sync error: %s", exc) @@ -648,9 +727,7 @@ class MatrixAdapter(BasePlatformAdapter): # Reply-to support. if reply_to: - msg_content["m.relates_to"] = { - "m.in_reply_to": {"event_id": reply_to} - } + msg_content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to}} # Thread support: if metadata has thread_id, send as threaded reply. thread_id = (metadata or {}).get("thread_id") @@ -688,10 +765,18 @@ class MatrixAdapter(BasePlatformAdapter): timeout=45, ) last_event_id = str(event_id) - logger.info("Matrix: sent event %s to %s (after key share)", last_event_id, chat_id) + logger.info( + "Matrix: sent event %s to %s (after key share)", + last_event_id, + chat_id, + ) continue except Exception as retry_exc: - logger.error("Matrix: failed to send to %s after retry: %s", chat_id, retry_exc) + logger.error( + "Matrix: failed to send to %s after retry: %s", + chat_id, + retry_exc, + ) return SendResult(success=False, error=str(retry_exc)) logger.error("Matrix: failed to send to %s: %s", chat_id, exc) return SendResult(success=False, error=str(exc)) @@ -706,7 +791,8 @@ class MatrixAdapter(BasePlatformAdapter): if self._client: try: name_evt = await self._client.get_state_event( - RoomID(chat_id), EventType.ROOM_NAME, + RoomID(chat_id), + EventType.ROOM_NAME, ) if name_evt and hasattr(name_evt, "name") and name_evt.name: name = name_evt.name @@ -729,8 +815,17 @@ class MatrixAdapter(BasePlatformAdapter): except Exception: pass + async def stop_typing(self, chat_id: str) -> None: + """Clear the typing indicator.""" + if self._client: + try: + await self._client.set_typing(RoomID(chat_id), timeout=0) + except Exception: + pass + + async def edit_message( - self, chat_id: str, message_id: str, content: str + self, chat_id: str, message_id: str, content: str, *, finalize: bool = False ) -> SendResult: """Edit an existing message (via m.replace).""" @@ -757,7 +852,9 @@ class MatrixAdapter(BasePlatformAdapter): try: event_id = await self._client.send_message_event( - RoomID(chat_id), EventType.ROOM_MESSAGE, msg_content, + RoomID(chat_id), + EventType.ROOM_MESSAGE, + msg_content, ) return SendResult(success=True, message_id=str(event_id)) except Exception as exc: @@ -773,22 +870,31 @@ class MatrixAdapter(BasePlatformAdapter): ) -> SendResult: """Download an image URL and upload it to Matrix.""" from tools.url_safety import is_safe_url + if not is_safe_url(image_url): logger.warning("Matrix: blocked unsafe image URL (SSRF protection)") - return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata) + return await super().send_image( + chat_id, image_url, caption, reply_to, metadata=metadata + ) try: # Try aiohttp first (always available), fall back to httpx try: import aiohttp as _aiohttp + async with _aiohttp.ClientSession(trust_env=True) as http: - async with http.get(image_url, timeout=_aiohttp.ClientTimeout(total=30)) as resp: + async with http.get( + image_url, timeout=_aiohttp.ClientTimeout(total=30) + ) as resp: resp.raise_for_status() data = await resp.read() ct = resp.content_type or "image/png" - fname = image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png" + fname = ( + image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png" + ) except ImportError: import httpx + async with httpx.AsyncClient() as http: resp = await http.get(image_url, follow_redirects=True, timeout=30) resp.raise_for_status() @@ -797,9 +903,13 @@ class MatrixAdapter(BasePlatformAdapter): fname = image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png" except Exception as exc: logger.warning("Matrix: failed to download image %s: %s", image_url, exc) - return await self.send(chat_id, f"{caption or ''}\n{image_url}".strip(), reply_to) + return await self.send( + chat_id, f"{caption or ''}\n{image_url}".strip(), reply_to + ) - return await self._upload_and_send(chat_id, data, fname, ct, "m.image", caption, reply_to, metadata) + return await self._upload_and_send( + chat_id, data, fname, ct, "m.image", caption, reply_to, metadata + ) async def send_image_file( self, @@ -810,7 +920,9 @@ class MatrixAdapter(BasePlatformAdapter): metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: """Upload a local image file to Matrix.""" - return await self._send_local_file(chat_id, image_path, "m.image", caption, reply_to, metadata=metadata) + return await self._send_local_file( + chat_id, image_path, "m.image", caption, reply_to, metadata=metadata + ) async def send_document( self, @@ -822,7 +934,9 @@ class MatrixAdapter(BasePlatformAdapter): metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: """Upload a local file as a document.""" - return await self._send_local_file(chat_id, file_path, "m.file", caption, reply_to, file_name, metadata) + return await self._send_local_file( + chat_id, file_path, "m.file", caption, reply_to, file_name, metadata + ) async def send_voice( self, @@ -834,8 +948,13 @@ class MatrixAdapter(BasePlatformAdapter): ) -> SendResult: """Upload an audio file as a voice message (MSC3245 native voice).""" return await self._send_local_file( - chat_id, audio_path, "m.audio", caption, reply_to, - metadata=metadata, is_voice=True + chat_id, + audio_path, + "m.audio", + caption, + reply_to, + metadata=metadata, + is_voice=True, ) async def send_video( @@ -847,7 +966,9 @@ class MatrixAdapter(BasePlatformAdapter): metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: """Upload a video file.""" - return await self._send_local_file(chat_id, video_path, "m.video", caption, reply_to, metadata=metadata) + return await self._send_local_file( + chat_id, video_path, "m.video", caption, reply_to, metadata=metadata + ) def format_message(self, content: str) -> str: """Pass-through — Matrix supports standard Markdown natively.""" @@ -873,12 +994,30 @@ class MatrixAdapter(BasePlatformAdapter): ) -> SendResult: """Upload bytes to Matrix and send as a media message.""" + upload_data = data + encrypted_file = None + if self._encryption and getattr(self._client, "crypto", None): + state_store = getattr(self._client, "state_store", None) + if state_store: + try: + room_encrypted = bool(await state_store.is_encrypted(RoomID(room_id))) + except Exception: + room_encrypted = False + if room_encrypted: + try: + from mautrix.crypto.attachments import encrypt_attachment + upload_data, encrypted_file = encrypt_attachment(data) + except Exception as exc: + logger.error("Matrix: attachment encryption failed: %s", exc) + return SendResult(success=False, error=str(exc)) + # Upload to homeserver. try: mxc_url = await self._client.upload_media( - data, + upload_data, mime_type=content_type, filename=filename, + size=len(upload_data), ) except Exception as exc: logger.error("Matrix: upload failed: %s", exc) @@ -888,21 +1027,24 @@ class MatrixAdapter(BasePlatformAdapter): msg_content: Dict[str, Any] = { "msgtype": msgtype, "body": caption or filename, - "url": str(mxc_url), "info": { "mimetype": content_type, "size": len(data), }, } + if encrypted_file is not None: + file_payload = encrypted_file.serialize() + file_payload["url"] = str(mxc_url) + msg_content["file"] = file_payload + else: + msg_content["url"] = str(mxc_url) # Add MSC3245 voice flag for native voice messages. if is_voice: msg_content["org.matrix.msc3245.voice"] = {} if reply_to: - msg_content["m.relates_to"] = { - "m.in_reply_to": {"event_id": reply_to} - } + msg_content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to}} thread_id = (metadata or {}).get("thread_id") if thread_id: @@ -914,7 +1056,9 @@ class MatrixAdapter(BasePlatformAdapter): try: event_id = await self._client.send_message_event( - RoomID(room_id), EventType.ROOM_MESSAGE, msg_content, + RoomID(room_id), + EventType.ROOM_MESSAGE, + msg_content, ) return SendResult(success=True, message_id=str(event_id)) except Exception as exc: @@ -932,7 +1076,7 @@ class MatrixAdapter(BasePlatformAdapter): is_voice: bool = False, ) -> SendResult: """Read a local file and upload it.""" - p = Path(file_path) + p = Path(file_path).expanduser() if not p.exists(): return await self.send( room_id, f"{caption or ''}\n(file not found: {file_path})", reply_to @@ -942,7 +1086,9 @@ class MatrixAdapter(BasePlatformAdapter): ct = mimetypes.guess_type(fname)[0] or "application/octet-stream" data = p.read_bytes() - return await self._upload_and_send(room_id, data, fname, ct, msgtype, caption, reply_to, metadata, is_voice) + return await self._upload_and_send( + room_id, data, fname, ct, msgtype, caption, reply_to, metadata, is_voice + ) # ------------------------------------------------------------------ # Sync loop @@ -956,7 +1102,8 @@ class MatrixAdapter(BasePlatformAdapter): while not self._closing: try: sync_data = await client.sync( - since=next_batch, timeout=30000, + since=next_batch, + timeout=30000, ) # nio returns SyncError objects (not exceptions) for auth @@ -965,7 +1112,10 @@ class MatrixAdapter(BasePlatformAdapter): if _sync_msg and isinstance(_sync_msg, str): _lower = _sync_msg.lower() if "m_unknown_token" in _lower or "unknown_token" in _lower: - logger.error("Matrix: permanent auth error from sync: %s — stopping", _sync_msg) + logger.error( + "Matrix: permanent auth error from sync: %s — stopping", + _sync_msg, + ) return if isinstance(sync_data, dict): @@ -990,10 +1140,6 @@ class MatrixAdapter(BasePlatformAdapter): except Exception as exc: logger.warning("Matrix: sync event dispatch error: %s", exc) - # Retry any buffered undecrypted events. - if self._pending_megolm: - await self._retry_pending_decryptions() - except asyncio.CancelledError: return except Exception as exc: @@ -1001,64 +1147,19 @@ class MatrixAdapter(BasePlatformAdapter): return # Detect permanent auth/permission failures. err_str = str(exc).lower() - if "401" in err_str or "403" in err_str or "unauthorized" in err_str or "forbidden" in err_str: - logger.error("Matrix: permanent auth error: %s — stopping sync", exc) + if ( + "401" in err_str + or "403" in err_str + or "unauthorized" in err_str + or "forbidden" in err_str + ): + logger.error( + "Matrix: permanent auth error: %s — stopping sync", exc + ) return logger.warning("Matrix: sync error: %s — retrying in 5s", exc) await asyncio.sleep(5) - async def _retry_pending_decryptions(self) -> None: - """Retry decrypting buffered encrypted events after new keys arrive.""" - client = self._client - if not client or not self._pending_megolm: - return - crypto = getattr(client, "crypto", None) - if not crypto: - return - - now = time.time() - still_pending: list = [] - - for room_id, event, ts in self._pending_megolm: - # Drop events that have aged past the TTL. - if now - ts > _PENDING_EVENT_TTL: - logger.debug( - "Matrix: dropping expired pending event %s (age %.0fs)", - getattr(event, "event_id", "?"), now - ts, - ) - continue - - try: - decrypted = await crypto.decrypt_megolm_event(event) - except Exception: - still_pending.append((room_id, event, ts)) - continue - - if decrypted is None or decrypted is event: - still_pending.append((room_id, event, ts)) - continue - - logger.info( - "Matrix: decrypted buffered event %s", - getattr(event, "event_id", "?"), - ) - - # Route to the appropriate handler. - # Remove from dedup set so _on_room_message doesn't drop it - # (the encrypted event ID was already registered by _on_encrypted_event). - decrypted_id = str(getattr(decrypted, "event_id", getattr(event, "event_id", ""))) - if decrypted_id: - self._processed_events_set.discard(decrypted_id) - try: - await self._on_room_message(decrypted) - except Exception as exc: - logger.warning( - "Matrix: error processing decrypted event %s: %s", - getattr(event, "event_id", "?"), exc, - ) - - self._pending_megolm = still_pending - # ------------------------------------------------------------------ # Event callbacks # ------------------------------------------------------------------ @@ -1078,7 +1179,11 @@ class MatrixAdapter(BasePlatformAdapter): return # Startup grace: ignore old messages from initial sync. - raw_ts = getattr(event, "timestamp", None) or getattr(event, "server_timestamp", None) or 0 + raw_ts = ( + getattr(event, "timestamp", None) + or getattr(event, "server_timestamp", None) + or 0 + ) event_ts = raw_ts / 1000.0 if raw_ts else 0.0 if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS: return @@ -1118,9 +1223,13 @@ class MatrixAdapter(BasePlatformAdapter): # Dispatch by msgtype. media_msgtypes = ("m.image", "m.audio", "m.video", "m.file") if msgtype in media_msgtypes: - await self._handle_media_message(room_id, sender, event_id, event_ts, source_content, relates_to, msgtype) + await self._handle_media_message( + room_id, sender, event_id, event_ts, source_content, relates_to, msgtype + ) elif msgtype == "m.text": - await self._handle_text_message(room_id, sender, event_id, event_ts, source_content, relates_to) + await self._handle_text_message( + room_id, sender, event_id, event_ts, source_content, relates_to + ) async def _resolve_message_context( self, @@ -1146,7 +1255,9 @@ class MatrixAdapter(BasePlatformAdapter): formatted_body = source_content.get("formatted_body") # m.mentions.user_ids (MSC3952 / Matrix v1.7) — authoritative mention signal. mentions_block = source_content.get("m.mentions") or {} - mention_user_ids = mentions_block.get("user_ids") if isinstance(mentions_block, dict) else None + mention_user_ids = ( + mentions_block.get("user_ids") if isinstance(mentions_block, dict) else None + ) is_mentioned = self._is_bot_mentioned(body, formatted_body, mention_user_ids) # Require-mention gating. @@ -1162,8 +1273,8 @@ class MatrixAdapter(BasePlatformAdapter): thread_id = event_id self._threads.mark(thread_id) - # Strip mention from body. - if is_mentioned: + # Strip mention from body (only when mention-gating is active). + if is_mentioned and self._require_mention: body = self._strip_mention(body) # Auto-thread. @@ -1202,7 +1313,12 @@ class MatrixAdapter(BasePlatformAdapter): return ctx = await self._resolve_message_context( - room_id, sender, event_id, body, source_content, relates_to, + room_id, + sender, + event_id, + body, + source_content, + relates_to, ) if ctx is None: return @@ -1280,7 +1396,9 @@ class MatrixAdapter(BasePlatformAdapter): if url and url.startswith("mxc://"): http_url = self._mxc_to_http(url) - is_encrypted_media = bool(file_content and isinstance(file_content, dict) and file_content.get("url")) + is_encrypted_media = bool( + file_content and isinstance(file_content, dict) and file_content.get("url") + ) media_type = "application/octet-stream" msg_type = MessageType.DOCUMENT @@ -1304,9 +1422,9 @@ class MatrixAdapter(BasePlatformAdapter): # Cache media locally when downstream tools need a real file path. cached_path = None - should_cache_locally = ( - msg_type == MessageType.PHOTO or is_voice_message or is_encrypted_media - ) + should_cache_locally = msg_type in ( + MessageType.PHOTO, MessageType.AUDIO, MessageType.VIDEO, MessageType.DOCUMENT, + ) or is_voice_message or is_encrypted_media if should_cache_locally and url: try: file_bytes = await self._client.download_media(ContentURI(url)) @@ -1314,17 +1432,35 @@ class MatrixAdapter(BasePlatformAdapter): if is_encrypted_media: from mautrix.crypto.attachments import decrypt_attachment - hashes_value = file_content.get("hashes") if isinstance(file_content, dict) else None - hash_value = hashes_value.get("sha256") if isinstance(hashes_value, dict) else None + hashes_value = ( + file_content.get("hashes") + if isinstance(file_content, dict) + else None + ) + hash_value = ( + hashes_value.get("sha256") + if isinstance(hashes_value, dict) + else None + ) - key_value = file_content.get("key") if isinstance(file_content, dict) else None + key_value = ( + file_content.get("key") + if isinstance(file_content, dict) + else None + ) if isinstance(key_value, dict): key_value = key_value.get("k") - iv_value = file_content.get("iv") if isinstance(file_content, dict) else None + iv_value = ( + file_content.get("iv") + if isinstance(file_content, dict) + else None + ) if key_value and hash_value and iv_value: - file_bytes = decrypt_attachment(file_bytes, key_value, hash_value, iv_value) + file_bytes = decrypt_attachment( + file_bytes, key_value, hash_value, iv_value + ) else: logger.warning( "[Matrix] Encrypted media event missing decryption metadata for %s", @@ -1350,25 +1486,46 @@ class MatrixAdapter(BasePlatformAdapter): cached_path = cache_image_from_bytes(file_bytes, ext=ext) logger.info("[Matrix] Cached user image at %s", cached_path) elif msg_type in (MessageType.AUDIO, MessageType.VOICE): - ext = Path(body or ("voice.ogg" if is_voice_message else "audio.ogg")).suffix or ".ogg" + ext = ( + Path( + body + or ( + "voice.ogg" if is_voice_message else "audio.ogg" + ) + ).suffix + or ".ogg" + ) cached_path = cache_audio_from_bytes(file_bytes, ext=ext) else: filename = body or ( - "video.mp4" if msg_type == MessageType.VIDEO else "document" + "video.mp4" + if msg_type == MessageType.VIDEO + else "document" + ) + cached_path = cache_document_from_bytes( + file_bytes, filename ) - cached_path = cache_document_from_bytes(file_bytes, filename) except Exception as e: logger.warning("[Matrix] Failed to cache media: %s", e) ctx = await self._resolve_message_context( - room_id, sender, event_id, body, source_content, relates_to, + room_id, + sender, + event_id, + body, + source_content, + relates_to, ) if ctx is None: return body, is_dm, chat_type, thread_id, display_name, source = ctx allow_http_fallback = bool(http_url) and not is_encrypted_media - media_urls = [cached_path] if cached_path else ([http_url] if allow_http_fallback else None) + media_urls = ( + [cached_path] + if cached_path + else ([http_url] if allow_http_fallback else None) + ) media_types = [media_type] if media_urls else None msg_event = MessageEvent( @@ -1383,23 +1540,6 @@ class MatrixAdapter(BasePlatformAdapter): await self.handle_message(msg_event) - async def _on_encrypted_event(self, event: Any) -> None: - """Handle encrypted events that could not be auto-decrypted.""" - room_id = str(getattr(event, "room_id", "")) - event_id = str(getattr(event, "event_id", "")) - - if self._is_duplicate_event(event_id): - return - - logger.warning( - "Matrix: could not decrypt event %s in %s — buffering for retry", - event_id, room_id, - ) - - self._pending_megolm.append((room_id, event, time.time())) - if len(self._pending_megolm) > _MAX_PENDING_EVENTS: - self._pending_megolm = self._pending_megolm[-_MAX_PENDING_EVENTS:] - async def _on_invite(self, event: Any) -> None: """Auto-join rooms when invited.""" @@ -1422,7 +1562,10 @@ class MatrixAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ async def _send_reaction( - self, room_id: str, event_id: str, emoji: str, + self, + room_id: str, + event_id: str, + emoji: str, ) -> Optional[str]: """Send an emoji reaction to a message in a room. Returns the reaction event_id on success, None on failure. @@ -1439,7 +1582,9 @@ class MatrixAdapter(BasePlatformAdapter): } try: resp_event_id = await self._client.send_message_event( - RoomID(room_id), EventType.REACTION, content, + RoomID(room_id), + EventType.REACTION, + content, ) logger.debug("Matrix: sent reaction %s to %s", emoji, event_id) return str(resp_event_id) @@ -1448,7 +1593,10 @@ class MatrixAdapter(BasePlatformAdapter): return None async def _redact_reaction( - self, room_id: str, reaction_event_id: str, reason: str = "", + self, + room_id: str, + reaction_event_id: str, + reason: str = "", ) -> bool: """Remove a reaction by redacting its event.""" return await self.redact_message(room_id, reaction_event_id, reason) @@ -1465,7 +1613,9 @@ class MatrixAdapter(BasePlatformAdapter): self._pending_reactions[(room_id, msg_id)] = reaction_event_id async def on_processing_complete( - self, event: MessageEvent, outcome: ProcessingOutcome, + self, + event: MessageEvent, + outcome: ProcessingOutcome, ) -> None: """Replace eyes with checkmark (success) or cross (failure).""" if not self._reactions_enabled: @@ -1499,7 +1649,11 @@ class MatrixAdapter(BasePlatformAdapter): room_id = str(getattr(event, "room_id", "")) content = getattr(event, "content", None) if content: - relates_to = content.get("m.relates_to", {}) if isinstance(content, dict) else getattr(content, "relates_to", {}) + relates_to = ( + content.get("m.relates_to", {}) + if isinstance(content, dict) + else getattr(content, "relates_to", {}) + ) reacts_to = "" key = "" if isinstance(relates_to, dict): @@ -1510,7 +1664,10 @@ class MatrixAdapter(BasePlatformAdapter): key = str(getattr(relates_to, "key", "")) logger.info( "Matrix: reaction %s from %s on %s in %s", - key, sender, reacts_to, room_id, + key, + sender, + reacts_to, + room_id, ) # ------------------------------------------------------------------ @@ -1520,10 +1677,15 @@ class MatrixAdapter(BasePlatformAdapter): def _text_batch_key(self, event: MessageEvent) -> str: """Session-scoped key for text message batching.""" from gateway.session import build_session_key + return build_session_key( event.source, - group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True), - thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False), + group_sessions_per_user=self.config.extra.get( + "group_sessions_per_user", True + ), + thread_sessions_per_user=self.config.extra.get( + "thread_sessions_per_user", False + ), ) def _enqueue_text_event(self, event: MessageEvent) -> None: @@ -1536,7 +1698,9 @@ class MatrixAdapter(BasePlatformAdapter): self._pending_text_batches[key] = event else: if event.text: - existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text + existing.text = ( + f"{existing.text}\n{event.text}" if existing.text else event.text + ) existing._last_chunk_len = chunk_len # type: ignore[attr-defined] if event.media_urls: existing.media_urls.extend(event.media_urls) @@ -1565,7 +1729,8 @@ class MatrixAdapter(BasePlatformAdapter): return logger.info( "[Matrix] Flushing text batch %s (%d chars)", - key, len(event.text or ""), + key, + len(event.text or ""), ) await self.handle_message(event) finally: @@ -1578,11 +1743,13 @@ class MatrixAdapter(BasePlatformAdapter): def _background_read_receipt(self, room_id: str, event_id: str) -> None: """Fire-and-forget read receipt with error logging.""" + async def _send() -> None: try: await self.send_read_receipt(room_id, event_id) except Exception as exc: # pragma: no cover — defensive logger.debug("Matrix: background read receipt failed: %s", exc) + asyncio.ensure_future(_send()) async def send_read_receipt(self, room_id: str, event_id: str) -> bool: @@ -1590,11 +1757,21 @@ class MatrixAdapter(BasePlatformAdapter): if not self._client: return False try: - await self._client.set_read_markers( - RoomID(room_id), - fully_read_event=EventID(event_id), - read_receipt=EventID(event_id), - ) + room = RoomID(room_id) + event = EventID(event_id) + if hasattr(self._client, "set_fully_read_marker"): + await self._client.set_fully_read_marker(room, event, event) + elif hasattr(self._client, "send_receipt"): + await self._client.send_receipt(room, event) + elif hasattr(self._client, "set_read_markers"): + await self._client.set_read_markers( + room, + fully_read_event=event, + read_receipt=event, + ) + else: + logger.debug("Matrix: client has no read receipt method") + return False logger.debug("Matrix: sent read receipt for %s in %s", event_id, room_id) return True except Exception as exc: @@ -1606,14 +1783,19 @@ class MatrixAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ async def redact_message( - self, room_id: str, event_id: str, reason: str = "", + self, + room_id: str, + event_id: str, + reason: str = "", ) -> bool: """Redact (delete) a message or event from a room.""" if not self._client: return False try: await self._client.redact( - RoomID(room_id), EventID(event_id), reason=reason or None, + RoomID(room_id), + EventID(event_id), + reason=reason or None, ) logger.info("Matrix: redacted %s in %s", event_id, room_id) return True @@ -1704,7 +1886,10 @@ class MatrixAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ async def _send_simple_message( - self, chat_id: str, text: str, msgtype: str, + self, + chat_id: str, + text: str, + msgtype: str, ) -> SendResult: """Send a simple message (emote, notice) with optional HTML formatting.""" if not self._client or not text: @@ -1718,7 +1903,9 @@ class MatrixAdapter(BasePlatformAdapter): try: event_id = await self._client.send_message_event( - RoomID(chat_id), EventType.ROOM_MESSAGE, msg_content, + RoomID(chat_id), + EventType.ROOM_MESSAGE, + msg_content, ) return SendResult(success=True, message_id=str(event_id)) except Exception as exc: @@ -1733,7 +1920,9 @@ class MatrixAdapter(BasePlatformAdapter): if self._dm_rooms.get(room_id, False): return True # Fallback: check member count via state store. - state_store = getattr(self._client, "state_store", None) if self._client else None + state_store = ( + getattr(self._client, "state_store", None) if self._client else None + ) if state_store: try: members = await state_store.get_members(room_id) @@ -1767,10 +1956,7 @@ class MatrixAdapter(BasePlatformAdapter): if isinstance(rooms, list): dm_room_ids.update(str(r) for r in rooms) - self._dm_rooms = { - rid: (rid in dm_room_ids) - for rid in self._joined_rooms - } + self._dm_rooms = {rid: (rid in dm_room_ids) for rid in self._joined_rooms} # ------------------------------------------------------------------ # Mention detection helpers @@ -1800,7 +1986,9 @@ class MatrixAdapter(BasePlatformAdapter): return True if self._user_id and ":" in self._user_id: localpart = self._user_id.split(":")[0].lstrip("@") - if localpart and re.search(r'\b' + re.escape(localpart) + r'\b', body, re.IGNORECASE): + if localpart and re.search( + r"\b" + re.escape(localpart) + r"\b", body, re.IGNORECASE + ): return True if formatted_body and self._user_id: if f"matrix.to/#/{self._user_id}" in formatted_body: @@ -1808,18 +1996,20 @@ class MatrixAdapter(BasePlatformAdapter): return False def _strip_mention(self, body: str) -> str: - """Remove bot mention from message body.""" + """Strip the bot's full MXID (``@user:server``) from *body*. + + The bare localpart is intentionally *not* stripped — it would + mangle file paths like ``/home/hermes/media/file.png``. + """ if self._user_id: body = body.replace(self._user_id, "") - if self._user_id and ":" in self._user_id: - localpart = self._user_id.split(":")[0].lstrip("@") - if localpart: - body = re.sub(r'\b' + re.escape(localpart) + r'\b', '', body, flags=re.IGNORECASE) return body.strip() async def _get_display_name(self, room_id: str, user_id: str) -> str: """Get a user's display name in a room, falling back to user_id.""" - state_store = getattr(self._client, "state_store", None) if self._client else None + state_store = ( + getattr(self._client, "state_store", None) if self._client else None + ) if state_store: try: member = await state_store.get_member(room_id, user_id) @@ -1907,9 +2097,7 @@ class MatrixAdapter(BasePlatformAdapter): # Inline code: `code` result = re.sub( r"`([^`\n]+)`", - lambda m: _protect_html( - f"{_html_escape(m.group(1))}" - ), + lambda m: _protect_html(f"{_html_escape(m.group(1))}"), result, ) @@ -1954,11 +2142,18 @@ class MatrixAdapter(BasePlatformAdapter): continue # Blockquote - if line.startswith("> ") or line == ">" or line.startswith("> ") or line == ">": + if ( + line.startswith("> ") + or line == ">" + or line.startswith("> ") + or line == ">" + ): bq_lines = [] while i < len(lines) and ( - lines[i].startswith("> ") or lines[i] == ">" - or lines[i].startswith("> ") or lines[i] == ">" + lines[i].startswith("> ") + or lines[i] == ">" + or lines[i].startswith("> ") + or lines[i] == ">" ): ln = lines[i] if ln.startswith("> "): @@ -1999,13 +2194,19 @@ class MatrixAdapter(BasePlatformAdapter): result = "\n".join(out_lines) # Inline transforms. - result = re.sub(r"\*\*(.+?)\*\*", r"\1", result, flags=re.DOTALL) + result = re.sub( + r"\*\*(.+?)\*\*", r"\1", result, flags=re.DOTALL + ) result = re.sub(r"__(.+?)__", r"\1", result, flags=re.DOTALL) result = re.sub(r"\*(.+?)\*", r"\1", result, flags=re.DOTALL) - result = re.sub(r"(?\1", result, flags=re.DOTALL) + result = re.sub( + r"(?\1", result, flags=re.DOTALL + ) result = re.sub(r"~~(.+?)~~", r"\1", result, flags=re.DOTALL) result = re.sub(r"\n", "
\n", result) - result = re.sub(r"
\n(\n()
", r"\1", result) # Restore protected regions. diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py index 23a86f02b1..10539bf646 100644 --- a/gateway/platforms/mattermost.py +++ b/gateway/platforms/mattermost.py @@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter): ) async def edit_message( - self, chat_id: str, message_id: str, content: str + self, chat_id: str, message_id: str, content: str, *, finalize: bool = False ) -> SendResult: """Edit an existing post.""" formatted = self.format_message(content) @@ -718,6 +718,12 @@ class MattermostAdapter(BasePlatformAdapter): thread_id=thread_id, ) + # Per-channel ephemeral prompt + from gateway.platforms.base import resolve_channel_prompt + _channel_prompt = resolve_channel_prompt( + self.config.extra, channel_id, None, + ) + msg_event = MessageEvent( text=message_text, message_type=msg_type, @@ -726,6 +732,7 @@ class MattermostAdapter(BasePlatformAdapter): message_id=post_id, media_urls=media_urls if media_urls else None, media_types=media_types if media_types else None, + channel_prompt=_channel_prompt, ) await self.handle_message(msg_event) diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py new file mode 100644 index 0000000000..7119dd979e --- /dev/null +++ b/gateway/platforms/qqbot/__init__.py @@ -0,0 +1,57 @@ +""" +QQBot platform package. + +Re-exports the main adapter symbols from ``adapter.py`` (the original +``qqbot.py``) so that **all existing import paths remain unchanged**:: + + from gateway.platforms.qqbot import QQAdapter # works + from gateway.platforms.qqbot import check_qq_requirements # works + +New modules: + - ``constants`` — shared constants (API URLs, timeouts, message types) + - ``utils`` — User-Agent builder, config helpers + - ``crypto`` — AES-256-GCM key generation and decryption + - ``onboard`` — QR-code scan-to-configure flow +""" + +# -- Adapter (original qqbot.py) ------------------------------------------ +from .adapter import ( # noqa: F401 + QQAdapter, + QQCloseError, + check_qq_requirements, + _coerce_list, + _ssrf_redirect_guard, +) + +# -- Onboard (QR-code scan-to-configure) ----------------------------------- +from .onboard import ( # noqa: F401 + BindStatus, + create_bind_task, + poll_bind_result, + build_connect_url, +) +from .crypto import decrypt_secret, generate_bind_key # noqa: F401 + +# -- Utils ----------------------------------------------------------------- +from .utils import build_user_agent, get_api_headers, coerce_list # noqa: F401 + +__all__ = [ + # adapter + "QQAdapter", + "QQCloseError", + "check_qq_requirements", + "_coerce_list", + "_ssrf_redirect_guard", + # onboard + "BindStatus", + "create_bind_task", + "poll_bind_result", + "build_connect_url", + # crypto + "decrypt_secret", + "generate_bind_key", + # utils + "build_user_agent", + "get_api_headers", + "coerce_list", +] diff --git a/gateway/platforms/qqbot.py b/gateway/platforms/qqbot/adapter.py similarity index 70% rename from gateway/platforms/qqbot.py rename to gateway/platforms/qqbot/adapter.py index 7103689c98..ced7442711 100644 --- a/gateway/platforms/qqbot.py +++ b/gateway/platforms/qqbot/adapter.py @@ -46,6 +46,7 @@ from urllib.parse import urlparse try: import aiohttp + AIOHTTP_AVAILABLE = True except ImportError: AIOHTTP_AVAILABLE = False @@ -53,6 +54,7 @@ except ImportError: try: import httpx + HTTPX_AVAILABLE = True except ImportError: HTTPX_AVAILABLE = False @@ -64,6 +66,7 @@ from gateway.platforms.base import ( MessageEvent, MessageType, SendResult, + _ssrf_redirect_guard, cache_document_from_bytes, cache_image_from_bytes, ) @@ -82,39 +85,40 @@ class QQCloseError(Exception): self.code = int(code) if code else None self.reason = str(reason) if reason else "" super().__init__(f"WebSocket closed (code={self.code}, reason={self.reason})") + + # --------------------------------------------------------------------------- -# Constants +# Constants — imported from the shared constants module. # --------------------------------------------------------------------------- -API_BASE = "https://api.sgroup.qq.com" -TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken" -GATEWAY_URL_PATH = "/gateway" - -DEFAULT_API_TIMEOUT = 30.0 -FILE_UPLOAD_TIMEOUT = 120.0 -CONNECT_TIMEOUT_SECONDS = 20.0 - -RECONNECT_BACKOFF = [2, 5, 10, 30, 60] -MAX_RECONNECT_ATTEMPTS = 100 -RATE_LIMIT_DELAY = 60 # seconds -QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds -MAX_QUICK_DISCONNECT_COUNT = 3 - -MAX_MESSAGE_LENGTH = 4000 -DEDUP_WINDOW_SECONDS = 300 -DEDUP_MAX_SIZE = 1000 - -# QQ Bot message types -MSG_TYPE_TEXT = 0 -MSG_TYPE_MARKDOWN = 2 -MSG_TYPE_MEDIA = 7 -MSG_TYPE_INPUT_NOTIFY = 6 - -# QQ Bot file media types -MEDIA_TYPE_IMAGE = 1 -MEDIA_TYPE_VIDEO = 2 -MEDIA_TYPE_VOICE = 3 -MEDIA_TYPE_FILE = 4 +from gateway.platforms.qqbot.constants import ( + API_BASE, + TOKEN_URL, + GATEWAY_URL_PATH, + DEFAULT_API_TIMEOUT, + FILE_UPLOAD_TIMEOUT, + CONNECT_TIMEOUT_SECONDS, + RECONNECT_BACKOFF, + MAX_RECONNECT_ATTEMPTS, + RATE_LIMIT_DELAY, + QUICK_DISCONNECT_THRESHOLD, + MAX_QUICK_DISCONNECT_COUNT, + MAX_MESSAGE_LENGTH, + DEDUP_WINDOW_SECONDS, + DEDUP_MAX_SIZE, + MSG_TYPE_TEXT, + MSG_TYPE_MARKDOWN, + MSG_TYPE_MEDIA, + MSG_TYPE_INPUT_NOTIFY, + MEDIA_TYPE_IMAGE, + MEDIA_TYPE_VIDEO, + MEDIA_TYPE_VOICE, + MEDIA_TYPE_FILE, +) +from gateway.platforms.qqbot.utils import ( + coerce_list as _coerce_list_impl, + build_user_agent, +) def check_qq_requirements() -> bool: @@ -124,24 +128,30 @@ def check_qq_requirements() -> bool: def _coerce_list(value: Any) -> List[str]: """Coerce config values into a trimmed string list.""" - if value is None: - return [] - if isinstance(value, str): - return [item.strip() for item in value.split(",") if item.strip()] - if isinstance(value, (list, tuple, set)): - return [str(item).strip() for item in value if str(item).strip()] - return [str(value).strip()] if str(value).strip() else [] + return _coerce_list_impl(value) # --------------------------------------------------------------------------- # QQAdapter # --------------------------------------------------------------------------- + class QQAdapter(BasePlatformAdapter): """QQ Bot adapter backed by the official QQ Bot WebSocket Gateway + REST API.""" # QQ Bot API does not support editing sent messages. SUPPORTS_MESSAGE_EDITING = False + MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH + _TYPING_INPUT_SECONDS = 60 # input_notify duration reported to QQ + _TYPING_DEBOUNCE_SECONDS = 50 # refresh before it expires + + @property + def _log_tag(self) -> str: + """Log prefix including app_id for multi-instance disambiguation.""" + app_id = getattr(self, "_app_id", None) + if app_id: + return f"QQBot:{app_id}" + return "QQBot" def _fail_pending(self, reason: str) -> None: """Fail all pending response futures.""" @@ -150,21 +160,25 @@ class QQAdapter(BasePlatformAdapter): fut.set_exception(RuntimeError(reason)) self._pending_responses.clear() - MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH - def __init__(self, config: PlatformConfig): super().__init__(config, Platform.QQBOT) extra = config.extra or {} self._app_id = str(extra.get("app_id") or os.getenv("QQ_APP_ID", "")).strip() - self._client_secret = str(extra.get("client_secret") or os.getenv("QQ_CLIENT_SECRET", "")).strip() + self._client_secret = str( + extra.get("client_secret") or os.getenv("QQ_CLIENT_SECRET", "") + ).strip() self._markdown_support = bool(extra.get("markdown_support", True)) # Auth/ACL policies self._dm_policy = str(extra.get("dm_policy", "open")).strip().lower() - self._allow_from = _coerce_list(extra.get("allow_from") or extra.get("allowFrom")) + self._allow_from = _coerce_list( + extra.get("allow_from") or extra.get("allowFrom") + ) self._group_policy = str(extra.get("group_policy", "open")).strip().lower() - self._group_allow_from = _coerce_list(extra.get("group_allow_from") or extra.get("groupAllowFrom")) + self._group_allow_from = _coerce_list( + extra.get("group_allow_from") or extra.get("groupAllowFrom") + ) # Connection state self._session: Optional[aiohttp.ClientSession] = None @@ -181,6 +195,11 @@ class QQAdapter(BasePlatformAdapter): self._pending_responses: Dict[str, asyncio.Future] = {} self._seen_messages: Dict[str, float] = {} + # Last inbound message ID per chat — used by send_typing + self._last_msg_id: Dict[str, str] = {} + # Typing debounce: chat_id → last send_typing timestamp + self._typing_sent_at: Dict[str, float] = {} + # Token cache self._access_token: Optional[str] = None self._token_expires_at: float = 0.0 @@ -206,34 +225,36 @@ class QQAdapter(BasePlatformAdapter): if not AIOHTTP_AVAILABLE: message = "QQ startup failed: aiohttp not installed" self._set_fatal_error("qq_missing_dependency", message, retryable=True) - logger.warning("[%s] %s. Run: pip install aiohttp", self.name, message) + logger.warning("[%s] %s. Run: pip install aiohttp", self._log_tag, message) return False if not HTTPX_AVAILABLE: message = "QQ startup failed: httpx not installed" self._set_fatal_error("qq_missing_dependency", message, retryable=True) - logger.warning("[%s] %s. Run: pip install httpx", self.name, message) + logger.warning("[%s] %s. Run: pip install httpx", self._log_tag, message) return False if not self._app_id or not self._client_secret: message = "QQ startup failed: QQ_APP_ID and QQ_CLIENT_SECRET are required" self._set_fatal_error("qq_missing_credentials", message, retryable=True) - logger.warning("[%s] %s", self.name, message) + logger.warning("[%s] %s", self._log_tag, message) return False # Prevent duplicate connections with the same credentials - if not self._acquire_platform_lock( - "qqbot-appid", self._app_id, "QQBot app ID" - ): + if not self._acquire_platform_lock("qqbot-appid", self._app_id, "QQBot app ID"): return False try: - self._http_client = httpx.AsyncClient(timeout=30.0, follow_redirects=True) + self._http_client = httpx.AsyncClient( + timeout=30.0, + follow_redirects=True, + event_hooks={"response": [_ssrf_redirect_guard]}, + ) # 1. Get access token await self._ensure_token() # 2. Get WebSocket gateway URL gateway_url = await self._get_gateway_url() - logger.info("[%s] Gateway URL: %s", self.name, gateway_url) + logger.info("[%s] Gateway URL: %s", self._log_tag, gateway_url) # 3. Open WebSocket await self._open_ws(gateway_url) @@ -242,12 +263,12 @@ class QQAdapter(BasePlatformAdapter): self._listen_task = asyncio.create_task(self._listen_loop()) self._heartbeat_task = asyncio.create_task(self._heartbeat_loop()) self._mark_connected() - logger.info("[%s] Connected", self.name) + logger.info("[%s] Connected", self._log_tag) return True except Exception as exc: message = f"QQ startup failed: {exc}" self._set_fatal_error("qq_connect_error", message, retryable=True) - logger.error("[%s] %s", self.name, message, exc_info=True) + logger.error("[%s] %s", self._log_tag, message, exc_info=True) await self._cleanup() self._release_platform_lock() return False @@ -275,7 +296,7 @@ class QQAdapter(BasePlatformAdapter): await self._cleanup() self._release_platform_lock() - logger.info("[%s] Disconnected", self.name) + logger.info("[%s] Disconnected", self._log_tag) async def _cleanup(self) -> None: """Close WebSocket, HTTP session, and client.""" @@ -324,12 +345,16 @@ class QQAdapter(BasePlatformAdapter): token = data.get("access_token") if not token: - raise RuntimeError(f"QQ Bot token response missing access_token: {data}") + raise RuntimeError( + f"QQ Bot token response missing access_token: {data}" + ) expires_in = int(data.get("expires_in", 7200)) self._access_token = token self._token_expires_at = time.time() + expires_in - logger.info("[%s] Access token refreshed, expires in %ds", self.name, expires_in) + logger.info( + "[%s] Access token refreshed, expires in %ds", self._log_tag, expires_in + ) return self._access_token async def _get_gateway_url(self) -> str: @@ -338,7 +363,10 @@ class QQAdapter(BasePlatformAdapter): try: resp = await self._http_client.get( f"{API_BASE}{GATEWAY_URL_PATH}", - headers={"Authorization": f"QQBot {token}"}, + headers={ + "Authorization": f"QQBot {token}", + "User-Agent": build_user_agent(), + }, timeout=DEFAULT_API_TIMEOUT, ) resp.raise_for_status() @@ -368,9 +396,12 @@ class QQAdapter(BasePlatformAdapter): self._session = aiohttp.ClientSession() self._ws = await self._session.ws_connect( gateway_url, + headers={ + "User-Agent": build_user_agent(), + }, timeout=CONNECT_TIMEOUT_SECONDS, ) - logger.info("[%s] WebSocket connected to %s", self.name, gateway_url) + logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url) async def _listen_loop(self) -> None: """Read WebSocket events and reconnect on errors. @@ -399,23 +430,34 @@ class QQAdapter(BasePlatformAdapter): return code = exc.code - logger.warning("[%s] WebSocket closed: code=%s reason=%s", - self.name, code, exc.reason) + logger.warning( + "[%s] WebSocket closed: code=%s reason=%s", + self._log_tag, + code, + exc.reason, + ) # Quick disconnect detection (permission issues, misconfiguration) duration = time.monotonic() - connect_time if duration < QUICK_DISCONNECT_THRESHOLD and connect_time > 0: quick_disconnect_count += 1 - logger.info("[%s] Quick disconnect (%.1fs), count: %d", - self.name, duration, quick_disconnect_count) + logger.info( + "[%s] Quick disconnect (%.1fs), count: %d", + self._log_tag, + duration, + quick_disconnect_count, + ) if quick_disconnect_count >= MAX_QUICK_DISCONNECT_COUNT: logger.error( "[%s] Too many quick disconnects. " "Check: 1) AppID/Secret correct 2) Bot permissions on QQ Open Platform", - self.name, + self._log_tag, + ) + self._set_fatal_error( + "qq_quick_disconnect", + "Too many quick disconnects — check bot permissions", + retryable=True, ) - self._set_fatal_error("qq_quick_disconnect", - "Too many quick disconnects — check bot permissions", retryable=True) return else: quick_disconnect_count = 0 @@ -426,13 +468,21 @@ class QQAdapter(BasePlatformAdapter): # Stop reconnecting for fatal codes if code in (4914, 4915): desc = "offline/sandbox-only" if code == 4914 else "banned" - logger.error("[%s] Bot is %s. Check QQ Open Platform.", self.name, desc) - self._set_fatal_error(f"qq_{desc}", f"Bot is {desc}", retryable=False) + logger.error( + "[%s] Bot is %s. Check QQ Open Platform.", self._log_tag, desc + ) + self._set_fatal_error( + f"qq_{desc}", f"Bot is {desc}", retryable=False + ) return # Rate limited if code == 4008: - logger.info("[%s] Rate limited (4008), waiting %ds", self.name, RATE_LIMIT_DELAY) + logger.info( + "[%s] Rate limited (4008), waiting %ds", + self._log_tag, + RATE_LIMIT_DELAY, + ) if backoff_idx >= MAX_RECONNECT_ATTEMPTS: return await asyncio.sleep(RATE_LIMIT_DELAY) @@ -445,14 +495,38 @@ class QQAdapter(BasePlatformAdapter): # Token invalid → clear cached token so _ensure_token() refreshes if code == 4004: - logger.info("[%s] Invalid token (4004), will refresh and reconnect", self.name) + logger.info( + "[%s] Invalid token (4004), will refresh and reconnect", + self._log_tag, + ) self._access_token = None self._token_expires_at = 0.0 # Session invalid → clear session, will re-identify on next Hello - if code in (4006, 4007, 4009, 4900, 4901, 4902, 4903, 4904, 4905, - 4906, 4907, 4908, 4909, 4910, 4911, 4912, 4913): - logger.info("[%s] Session error (%d), clearing session for re-identify", self.name, code) + if code in ( + 4006, + 4007, + 4009, + 4900, + 4901, + 4902, + 4903, + 4904, + 4905, + 4906, + 4907, + 4908, + 4909, + 4910, + 4911, + 4912, + 4913, + ): + logger.info( + "[%s] Session error (%d), clearing session for re-identify", + self._log_tag, + code, + ) self._session_id = None self._last_seq = None @@ -465,12 +539,12 @@ class QQAdapter(BasePlatformAdapter): except Exception as exc: if not self._running: return - logger.warning("[%s] WebSocket error: %s", self.name, exc) + logger.warning("[%s] WebSocket error: %s", self._log_tag, exc) self._mark_disconnected() self._fail_pending("Connection interrupted") if backoff_idx >= MAX_RECONNECT_ATTEMPTS: - logger.error("[%s] Max reconnect attempts reached", self.name) + logger.error("[%s] Max reconnect attempts reached", self._log_tag) return if await self._reconnect(backoff_idx): @@ -482,7 +556,12 @@ class QQAdapter(BasePlatformAdapter): async def _reconnect(self, backoff_idx: int) -> bool: """Attempt to reconnect the WebSocket. Returns True on success.""" delay = RECONNECT_BACKOFF[min(backoff_idx, len(RECONNECT_BACKOFF) - 1)] - logger.info("[%s] Reconnecting in %ds (attempt %d)...", self.name, delay, backoff_idx + 1) + logger.info( + "[%s] Reconnecting in %ds (attempt %d)...", + self._log_tag, + delay, + backoff_idx + 1, + ) await asyncio.sleep(delay) self._heartbeat_interval = 30.0 # reset until Hello @@ -491,10 +570,10 @@ class QQAdapter(BasePlatformAdapter): gateway_url = await self._get_gateway_url() await self._open_ws(gateway_url) self._mark_connected() - logger.info("[%s] Reconnected", self.name) + logger.info("[%s] Reconnected", self._log_tag) return True except Exception as exc: - logger.warning("[%s] Reconnect failed: %s", self.name, exc) + logger.warning("[%s] Reconnect failed: %s", self._log_tag, exc) return False async def _read_events(self) -> None: @@ -531,7 +610,7 @@ class QQAdapter(BasePlatformAdapter): # d should be the latest sequence number received, or null await self._ws.send_json({"op": 1, "d": self._last_seq}) except Exception as exc: - logger.debug("[%s] Heartbeat failed: %s", self.name, exc) + logger.debug("[%s] Heartbeat failed: %s", self._log_tag, exc) except asyncio.CancelledError: pass @@ -549,7 +628,11 @@ class QQAdapter(BasePlatformAdapter): "op": 2, "d": { "token": f"QQBot {token}", - "intents": (1 << 25) | (1 << 30) | (1 << 12), # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE + "intents": (1 << 25) + | (1 << 30) + | ( + 1 << 12 + ), # C2C_GROUP_AT_MESSAGES + PUBLIC_GUILD_MESSAGES + DIRECT_MESSAGE "shard": [0, 1], "properties": { "$os": "macOS", @@ -561,11 +644,13 @@ class QQAdapter(BasePlatformAdapter): try: if self._ws and not self._ws.closed: await self._ws.send_json(identify_payload) - logger.info("[%s] Identify sent", self.name) + logger.info("[%s] Identify sent", self._log_tag) else: - logger.warning("[%s] Cannot send Identify: WebSocket not connected", self.name) + logger.warning( + "[%s] Cannot send Identify: WebSocket not connected", self._log_tag + ) except Exception as exc: - logger.error("[%s] Failed to send Identify: %s", self.name, exc) + logger.error("[%s] Failed to send Identify: %s", self._log_tag, exc) async def _send_resume(self) -> None: """Send op 6 Resume to re-authenticate after a reconnection. @@ -584,12 +669,18 @@ class QQAdapter(BasePlatformAdapter): try: if self._ws and not self._ws.closed: await self._ws.send_json(resume_payload) - logger.info("[%s] Resume sent (session_id=%s, seq=%s)", - self.name, self._session_id, self._last_seq) + logger.info( + "[%s] Resume sent (session_id=%s, seq=%s)", + self._log_tag, + self._session_id, + self._last_seq, + ) else: - logger.warning("[%s] Cannot send Resume: WebSocket not connected", self.name) + logger.warning( + "[%s] Cannot send Resume: WebSocket not connected", self._log_tag + ) except Exception as exc: - logger.error("[%s] Failed to send Resume: %s", self.name, exc) + logger.error("[%s] Failed to send Resume: %s", self._log_tag, exc) # If resume fails, clear session and fall back to identify on next Hello self._session_id = None self._last_seq = None @@ -622,8 +713,12 @@ class QQAdapter(BasePlatformAdapter): interval_ms = d_data.get("heartbeat_interval", 30000) # Send heartbeats at 80% of the server interval to stay safe self._heartbeat_interval = interval_ms / 1000.0 * 0.8 - logger.debug("[%s] Hello received, heartbeat_interval=%dms (sending every %.1fs)", - self.name, interval_ms, self._heartbeat_interval) + logger.debug( + "[%s] Hello received, heartbeat_interval=%dms (sending every %.1fs)", + self._log_tag, + interval_ms, + self._heartbeat_interval, + ) # Authenticate: send Resume if we have a session, else Identify. # Use _create_task which is safe when no event loop is running (tests). if self._session_id and self._last_seq is not None: @@ -637,26 +732,30 @@ class QQAdapter(BasePlatformAdapter): if t == "READY": self._handle_ready(d) elif t == "RESUMED": - logger.info("[%s] Session resumed", self.name) - elif t in ("C2C_MESSAGE_CREATE", "GROUP_AT_MESSAGE_CREATE", - "DIRECT_MESSAGE_CREATE", "GUILD_MESSAGE_CREATE", - "GUILD_AT_MESSAGE_CREATE"): + logger.info("[%s] Session resumed", self._log_tag) + elif t in ( + "C2C_MESSAGE_CREATE", + "GROUP_AT_MESSAGE_CREATE", + "DIRECT_MESSAGE_CREATE", + "GUILD_MESSAGE_CREATE", + "GUILD_AT_MESSAGE_CREATE", + ): asyncio.create_task(self._on_message(t, d)) else: - logger.debug("[%s] Unhandled dispatch: %s", self.name, t) + logger.debug("[%s] Unhandled dispatch: %s", self._log_tag, t) return # op 11 = Heartbeat ACK if op == 11: return - logger.debug("[%s] Unknown op: %s", self.name, op) + logger.debug("[%s] Unknown op: %s", self._log_tag, op) def _handle_ready(self, d: Any) -> None: """Handle the READY event — store session_id for resume.""" if isinstance(d, dict): self._session_id = d.get("session_id") - logger.info("[%s] Ready, session_id=%s", self.name, self._session_id) + logger.info("[%s] Ready, session_id=%s", self._log_tag, self._session_id) # ------------------------------------------------------------------ # JSON helpers @@ -667,7 +766,7 @@ class QQAdapter(BasePlatformAdapter): try: payload = json.loads(raw) except Exception: - logger.debug("[%s] Failed to parse JSON: %r", "QQBot", raw) + logger.warning("[QQBot] Failed to parse JSON: %r", raw) return None return payload if isinstance(payload, dict) else None @@ -682,6 +781,12 @@ class QQAdapter(BasePlatformAdapter): # Inbound message handling # ------------------------------------------------------------------ + async def handle_message(self, event: MessageEvent) -> None: + """Cache the last message ID per chat, then delegate to base.""" + if event.message_id and event.source.chat_id: + self._last_msg_id[event.source.chat_id] = event.message_id + await super().handle_message(event) + async def _on_message(self, event_type: str, d: Any) -> None: """Process an inbound QQ Bot message event.""" if not isinstance(d, dict): @@ -690,7 +795,9 @@ class QQAdapter(BasePlatformAdapter): # Extract common fields msg_id = str(d.get("id", "")) if not msg_id or self._is_duplicate(msg_id): - logger.debug("[%s] Duplicate or missing message id: %s", self.name, msg_id) + logger.debug( + "[%s] Duplicate or missing message id: %s", self._log_tag, msg_id + ) return timestamp = str(d.get("timestamp", "")) @@ -708,7 +815,12 @@ class QQAdapter(BasePlatformAdapter): await self._handle_dm_message(d, msg_id, content, author, timestamp) async def _handle_c2c_message( - self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str + self, + d: Dict[str, Any], + msg_id: str, + content: str, + author: Dict[str, Any], + timestamp: str, ) -> None: """Handle a C2C (private) message event.""" user_openid = str(author.get("user_openid", "")) @@ -719,17 +831,28 @@ class QQAdapter(BasePlatformAdapter): text = content attachments_raw = d.get("attachments") - logger.info("[QQ] C2C message: id=%s content=%r attachments=%s", - msg_id, content[:50] if content else "", - f"{len(attachments_raw) if isinstance(attachments_raw, list) else 0} items" - if attachments_raw else "None") + logger.info( + "[%s] C2C message: id=%s content=%r attachments=%s", + self._log_tag, + msg_id, + content[:50] if content else "", + ( + f"{len(attachments_raw) if isinstance(attachments_raw, list) else 0} items" + if attachments_raw + else "None" + ), + ) if attachments_raw and isinstance(attachments_raw, list): for _i, _att in enumerate(attachments_raw): if isinstance(_att, dict): - logger.info("[QQ] attachment[%d]: content_type=%s url=%s filename=%s", - _i, _att.get("content_type", ""), - str(_att.get("url", ""))[:80], - _att.get("filename", "")) + logger.info( + "[%s] attachment[%d]: content_type=%s url=%s filename=%s", + self._log_tag, + _i, + _att.get("content_type", ""), + str(_att.get("url", ""))[:80], + _att.get("filename", ""), + ) # Process all attachments uniformly (images, voice, files) att_result = await self._process_attachments(attachments_raw) @@ -741,13 +864,23 @@ class QQAdapter(BasePlatformAdapter): # Append voice transcripts to the text body if voice_transcripts: voice_block = "\n".join(voice_transcripts) - text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + text = ( + (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + ) # Append non-media attachment info if attachment_info: - text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info + text = ( + (text + "\n\n" + attachment_info).strip() + if text.strip() + else attachment_info + ) - logger.info("[QQ] After processing: images=%d, voice=%d", - len(image_urls), len(voice_transcripts)) + logger.info( + "[%s] After processing: images=%d, voice=%d", + self._log_tag, + len(image_urls), + len(voice_transcripts), + ) if not text.strip() and not image_urls: return @@ -770,13 +903,20 @@ class QQAdapter(BasePlatformAdapter): await self.handle_message(event) async def _handle_group_message( - self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str + self, + d: Dict[str, Any], + msg_id: str, + content: str, + author: Dict[str, Any], + timestamp: str, ) -> None: """Handle a group @-message event.""" group_openid = str(d.get("group_openid", "")) if not group_openid: return - if not self._is_group_allowed(group_openid, str(author.get("member_openid", ""))): + if not self._is_group_allowed( + group_openid, str(author.get("member_openid", "")) + ): return # Strip the @bot mention prefix from content @@ -790,9 +930,15 @@ class QQAdapter(BasePlatformAdapter): # Append voice transcripts if voice_transcripts: voice_block = "\n".join(voice_transcripts) - text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + text = ( + (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + ) if attachment_info: - text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info + text = ( + (text + "\n\n" + attachment_info).strip() + if text.strip() + else attachment_info + ) if not text.strip() and not image_urls: return @@ -815,7 +961,12 @@ class QQAdapter(BasePlatformAdapter): await self.handle_message(event) async def _handle_guild_message( - self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str + self, + d: Dict[str, Any], + msg_id: str, + content: str, + author: Dict[str, Any], + timestamp: str, ) -> None: """Handle a guild/channel message event.""" channel_id = str(d.get("channel_id", "")) @@ -834,9 +985,15 @@ class QQAdapter(BasePlatformAdapter): if voice_transcripts: voice_block = "\n".join(voice_transcripts) - text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + text = ( + (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + ) if attachment_info: - text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info + text = ( + (text + "\n\n" + attachment_info).strip() + if text.strip() + else attachment_info + ) if not text.strip() and not image_urls: return @@ -860,7 +1017,12 @@ class QQAdapter(BasePlatformAdapter): await self.handle_message(event) async def _handle_dm_message( - self, d: Dict[str, Any], msg_id: str, content: str, author: Dict[str, Any], timestamp: str + self, + d: Dict[str, Any], + msg_id: str, + content: str, + author: Dict[str, Any], + timestamp: str, ) -> None: """Handle a guild DM message event.""" guild_id = str(d.get("guild_id", "")) @@ -876,9 +1038,15 @@ class QQAdapter(BasePlatformAdapter): if voice_transcripts: voice_block = "\n".join(voice_transcripts) - text = (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + text = ( + (text + "\n\n" + voice_block).strip() if text.strip() else voice_block + ) if attachment_info: - text = (text + "\n\n" + attachment_info).strip() if text.strip() else attachment_info + text = ( + (text + "\n\n" + attachment_info).strip() + if text.strip() + else attachment_info + ) if not text.strip() and not image_urls: return @@ -904,7 +1072,6 @@ class QQAdapter(BasePlatformAdapter): # Attachment processing # ------------------------------------------------------------------ - @staticmethod def _detect_message_type(media_urls: list, media_types: list): """Determine MessageType from attachment content types.""" @@ -921,11 +1088,16 @@ class QQAdapter(BasePlatformAdapter): return MessageType.PHOTO # Unknown content type with an attachment — don't assume PHOTO # to prevent non-image files from being sent to vision analysis. - logger.debug("[QQ] Unknown media content_type '%s', defaulting to TEXT", first_type) + logger.debug( + "[%s] Unknown media content_type '%s', defaulting to TEXT", + self._log_tag, + first_type, + ) return MessageType.TEXT async def _process_attachments( - self, attachments: Any, + self, + attachments: Any, ) -> Dict[str, Any]: """Process inbound attachments (all message types). @@ -939,8 +1111,12 @@ class QQAdapter(BasePlatformAdapter): - attachment_info: str — text description of non-image, non-voice attachments """ if not isinstance(attachments, list): - return {"image_urls": [], "image_media_types": [], - "voice_transcripts": [], "attachment_info": ""} + return { + "image_urls": [], + "image_media_types": [], + "voice_transcripts": [], + "attachment_info": "", + } image_urls: List[str] = [] image_media_types: List[str] = [] @@ -962,30 +1138,39 @@ class QQAdapter(BasePlatformAdapter): url = "" continue - logger.debug("[QQ] Processing attachment: content_type=%s, url=%s, filename=%s", - ct, url[:80], filename) + logger.debug( + "[%s] Processing attachment: content_type=%s, url=%s, filename=%s", + self._log_tag, + ct, + url[:80], + filename, + ) if self._is_voice_content_type(ct, filename): # Voice: use QQ's asr_refer_text first, then voice_wav_url, then STT. asr_refer = ( str(att.get("asr_refer_text", "")).strip() - if isinstance(att.get("asr_refer_text"), str) else "" + if isinstance(att.get("asr_refer_text"), str) + else "" ) voice_wav_url = ( str(att.get("voice_wav_url", "")).strip() - if isinstance(att.get("voice_wav_url"), str) else "" + if isinstance(att.get("voice_wav_url"), str) + else "" ) transcript = await self._stt_voice_attachment( - url, ct, filename, + url, + ct, + filename, asr_refer_text=asr_refer or None, voice_wav_url=voice_wav_url or None, ) if transcript: voice_transcripts.append(f"[Voice] {transcript}") - logger.info("[QQ] Voice transcript: %s", transcript) + logger.debug("[%s] Voice transcript: %s", self._log_tag, transcript) else: - logger.warning("[QQ] Voice STT failed for %s", url[:60]) + logger.warning("[%s] Voice STT failed for %s", self._log_tag, url[:60]) voice_transcripts.append("[Voice] [语音识别失败]") elif ct.startswith("image/"): # Image: download and cache locally. @@ -995,9 +1180,13 @@ class QQAdapter(BasePlatformAdapter): image_urls.append(cached_path) image_media_types.append(ct or "image/jpeg") elif cached_path: - logger.warning("[QQ] Cached image path does not exist: %s", cached_path) + logger.warning( + "[%s] Cached image path does not exist: %s", + self._log_tag, + cached_path, + ) except Exception as exc: - logger.debug("[QQ] Failed to cache image: %s", exc) + logger.debug("[%s] Failed to cache image: %s", self._log_tag, exc) else: # Other attachments (video, file, etc.): record as text. try: @@ -1005,7 +1194,7 @@ class QQAdapter(BasePlatformAdapter): if cached_path: other_attachments.append(f"[Attachment: {filename or ct}]") except Exception as exc: - logger.debug("[QQ] Failed to cache attachment: %s", exc) + logger.debug("[%s] Failed to cache attachment: %s", self._log_tag, exc) attachment_info = "\n".join(other_attachments) if other_attachments else "" return { @@ -1018,6 +1207,7 @@ class QQAdapter(BasePlatformAdapter): async def _download_and_cache(self, url: str, content_type: str) -> Optional[str]: """Download a URL and cache it locally.""" from tools.url_safety import is_safe_url + if not is_safe_url(url): raise ValueError(f"Blocked unsafe URL: {url[:80]}") @@ -1026,12 +1216,16 @@ class QQAdapter(BasePlatformAdapter): try: resp = await self._http_client.get( - url, timeout=30.0, headers=self._qq_media_headers(), + url, + timeout=30.0, + headers=self._qq_media_headers(), ) resp.raise_for_status() data = resp.content except Exception as exc: - logger.debug("[%s] Download failed for %s: %s", self.name, url[:80], exc) + logger.debug( + "[%s] Download failed for %s: %s", self._log_tag, url[:80], exc + ) return None if content_type.startswith("image/"): @@ -1052,7 +1246,17 @@ class QQAdapter(BasePlatformAdapter): fn = filename.strip().lower() if ct == "voice" or ct.startswith("audio/"): return True - _VOICE_EXTENSIONS = (".silk", ".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac", ".speex", ".flac") + _VOICE_EXTENSIONS = ( + ".silk", + ".amr", + ".mp3", + ".wav", + ".ogg", + ".m4a", + ".aac", + ".speex", + ".flac", + ) if any(fn.endswith(ext) for ext in _VOICE_EXTENSIONS): return True return False @@ -1069,13 +1273,13 @@ class QQAdapter(BasePlatformAdapter): return {} async def _stt_voice_attachment( - self, - url: str, - content_type: str, - filename: str, - *, - asr_refer_text: Optional[str] = None, - voice_wav_url: Optional[str] = None, + self, + url: str, + content_type: str, + filename: str, + *, + asr_refer_text: Optional[str] = None, + voice_wav_url: Optional[str] = None, ) -> Optional[str]: """Download a voice attachment, convert to wav, and transcribe. @@ -1088,7 +1292,9 @@ class QQAdapter(BasePlatformAdapter): """ # 1. Use QQ's built-in ASR text if available if asr_refer_text: - logger.info("[QQ] STT: using QQ asr_refer_text: %r", asr_refer_text[:100]) + logger.debug( + "[%s] STT: using QQ asr_refer_text: %r", self._log_tag, asr_refer_text[:100] + ) return asr_refer_text # Determine which URL to download (prefer voice_wav_url — already WAV) @@ -1099,45 +1305,75 @@ class QQAdapter(BasePlatformAdapter): voice_wav_url = f"https:{voice_wav_url}" download_url = voice_wav_url is_pre_wav = True - logger.info("[QQ] STT: using voice_wav_url (pre-converted WAV)") + logger.debug("[%s] STT: using voice_wav_url (pre-converted WAV)", self._log_tag) + + from tools.url_safety import is_safe_url + if not is_safe_url(download_url): + logger.warning("[QQ] STT blocked unsafe URL: %s", download_url[:80]) + return None try: # 2. Download audio (QQ CDN requires Authorization header) if not self._http_client: - logger.warning("[QQ] STT: no HTTP client") + logger.warning("[%s] STT: no HTTP client", self._log_tag) return None download_headers = self._qq_media_headers() - logger.info("[QQ] STT: downloading voice from %s (pre_wav=%s, headers=%s)", - download_url[:80], is_pre_wav, bool(download_headers)) + logger.debug( + "[%s] STT: downloading voice from %s (pre_wav=%s, headers=%s)", + self._log_tag, + download_url[:80], + is_pre_wav, + bool(download_headers), + ) resp = await self._http_client.get( - download_url, timeout=30.0, headers=download_headers, follow_redirects=True, + download_url, + timeout=30.0, + headers=download_headers, + follow_redirects=True, ) resp.raise_for_status() audio_data = resp.content - logger.info("[QQ] STT: downloaded %d bytes, content_type=%s", - len(audio_data), resp.headers.get("content-type", "unknown")) + logger.debug( + "[%s] STT: downloaded %d bytes, content_type=%s", + self._log_tag, + len(audio_data), + resp.headers.get("content-type", "unknown"), + ) if len(audio_data) < 10: - logger.warning("[QQ] STT: downloaded data too small (%d bytes), skipping", len(audio_data)) + logger.warning( + "[%s] STT: downloaded data too small (%d bytes), skipping", + self._log_tag, + len(audio_data), + ) return None # 3. Convert to wav (skip if we already have a pre-converted WAV) if is_pre_wav: import tempfile + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: tmp.write(audio_data) wav_path = tmp.name - logger.info("[QQ] STT: using pre-converted WAV directly (%d bytes)", len(audio_data)) + logger.debug( + "[%s] STT: using pre-converted WAV directly (%d bytes)", + self._log_tag, + len(audio_data), + ) else: - logger.info("[QQ] STT: converting to wav, filename=%r", filename) + logger.debug( + "[%s] STT: converting to wav, filename=%r", self._log_tag, filename + ) wav_path = await self._convert_audio_to_wav_file(audio_data, filename) if not wav_path or not Path(wav_path).exists(): - logger.warning("[QQ] STT: ffmpeg conversion produced no output") + logger.warning( + "[%s] STT: ffmpeg conversion produced no output", self._log_tag + ) return None # 4. Call STT API - logger.info("[QQ] STT: calling ASR on %s", wav_path) + logger.debug("[%s] STT: calling ASR on %s", self._log_tag, wav_path) transcript = await self._call_stt(wav_path) # 5. Cleanup temp file @@ -1147,15 +1383,22 @@ class QQAdapter(BasePlatformAdapter): pass if transcript: - logger.info("[QQ] STT success: %r", transcript[:100]) + logger.debug("[%s] STT success: %r", self._log_tag, transcript[:100]) else: - logger.warning("[QQ] STT: ASR returned empty transcript") + logger.warning("[%s] STT: ASR returned empty transcript", self._log_tag) return transcript except (httpx.HTTPStatusError, httpx.TransportError, IOError) as exc: - logger.warning("[QQ] STT failed for voice attachment: %s: %s", type(exc).__name__, exc) + logger.warning( + "[%s] STT failed for voice attachment: %s: %s", + self._log_tag, + type(exc).__name__, + exc, + ) return None - async def _convert_audio_to_wav_file(self, audio_data: bytes, filename: str) -> Optional[str]: + async def _convert_audio_to_wav_file( + self, audio_data: bytes, filename: str + ) -> Optional[str]: """Convert audio bytes to a temp .wav file using pilk (SILK) or ffmpeg. QQ voice messages are typically SILK format which ffmpeg cannot decode. @@ -1165,9 +1408,18 @@ class QQAdapter(BasePlatformAdapter): """ import tempfile - ext = Path(filename).suffix.lower() if Path(filename).suffix else self._guess_ext_from_data(audio_data) - logger.info("[QQ] STT: audio_data size=%d, ext=%r, first_20_bytes=%r", - len(audio_data), ext, audio_data[:20]) + ext = ( + Path(filename).suffix.lower() + if Path(filename).suffix + else self._guess_ext_from_data(audio_data) + ) + logger.info( + "[%s] STT: audio_data size=%d, ext=%r, first_20_bytes=%r", + self._log_tag, + len(audio_data), + ext, + audio_data[:20], + ) with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src: tmp_src.write(audio_data) @@ -1219,8 +1471,7 @@ class QQAdapter(BasePlatformAdapter): """Check if bytes look like a SILK audio file.""" return data[:4] == b"#!SILK" or data[:2] == b"\x02!" or data[:9] == b"#!SILK_V3" - @staticmethod - async def _convert_silk_to_wav(src_path: str, wav_path: str) -> Optional[str]: + async def _convert_silk_to_wav(self, src_path: str, wav_path: str) -> Optional[str]: """Convert audio file to WAV using the pilk library. Tries the file as-is first, then as .silk if the extension differs. @@ -1229,31 +1480,43 @@ class QQAdapter(BasePlatformAdapter): try: import pilk except ImportError: - logger.warning("[QQ] pilk not installed — cannot decode SILK audio. Run: pip install pilk") + logger.warning( + "[%s] pilk not installed — cannot decode SILK audio. Run: pip install pilk", + self._log_tag, + ) return None # Try converting the file as-is try: pilk.silk_to_wav(src_path, wav_path, rate=16000) if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44: - logger.info("[QQ] pilk converted %s to wav (%d bytes)", - Path(src_path).name, Path(wav_path).stat().st_size) + logger.debug( + "[%s] pilk converted %s to wav (%d bytes)", + self._log_tag, + Path(src_path).name, + Path(wav_path).stat().st_size, + ) return wav_path except Exception as exc: - logger.debug("[QQ] pilk direct conversion failed: %s", exc) + logger.debug("[%s] pilk direct conversion failed: %s", self._log_tag, exc) # Try renaming to .silk and converting (pilk checks the extension) silk_path = src_path.rsplit(".", 1)[0] + ".silk" try: import shutil + shutil.copy2(src_path, silk_path) pilk.silk_to_wav(silk_path, wav_path, rate=16000) if Path(wav_path).exists() and Path(wav_path).stat().st_size > 44: - logger.info("[QQ] pilk converted %s (as .silk) to wav (%d bytes)", - Path(src_path).name, Path(wav_path).stat().st_size) + logger.debug( + "[%s] pilk converted %s (as .silk) to wav (%d bytes)", + self._log_tag, + Path(src_path).name, + Path(wav_path).stat().st_size, + ) return wav_path except Exception as exc: - logger.debug("[QQ] pilk .silk conversion failed: %s", exc) + logger.debug("[%s] pilk .silk conversion failed: %s", self._log_tag, exc) finally: try: os.unlink(silk_path) @@ -1262,8 +1525,7 @@ class QQAdapter(BasePlatformAdapter): return None - @staticmethod - async def _convert_raw_to_wav(audio_data: bytes, wav_path: str) -> Optional[str]: + async def _convert_raw_to_wav(self, audio_data: bytes, wav_path: str) -> Optional[str]: """Last resort: try writing audio data as raw PCM 16-bit mono 16kHz WAV. This will produce garbage if the data isn't raw PCM, but at least @@ -1271,6 +1533,7 @@ class QQAdapter(BasePlatformAdapter): """ try: import wave + with wave.open(wav_path, "w") as wf: wf.setnchannels(1) wf.setsampwidth(2) @@ -1278,33 +1541,52 @@ class QQAdapter(BasePlatformAdapter): wf.writeframes(audio_data) return wav_path except Exception as exc: - logger.debug("[QQ] raw PCM fallback failed: %s", exc) + logger.debug("[%s] raw PCM fallback failed: %s", self._log_tag, exc) return None - @staticmethod - async def _convert_ffmpeg_to_wav(src_path: str, wav_path: str) -> Optional[str]: + async def _convert_ffmpeg_to_wav(self, src_path: str, wav_path: str) -> Optional[str]: """Convert audio file to WAV using ffmpeg.""" try: proc = await asyncio.create_subprocess_exec( - "ffmpeg", "-y", "-i", src_path, "-ar", "16000", "-ac", "1", wav_path, + "ffmpeg", + "-y", + "-i", + src_path, + "-ar", + "16000", + "-ac", + "1", + wav_path, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE, ) await asyncio.wait_for(proc.wait(), timeout=30) if proc.returncode != 0: stderr = await proc.stderr.read() if proc.stderr else b"" - logger.warning("[QQ] ffmpeg failed for %s: %s", - Path(src_path).name, stderr[:200].decode(errors="replace")) + logger.warning( + "[%s] ffmpeg failed for %s: %s", + self._log_tag, + Path(src_path).name, + stderr[:200].decode(errors="replace"), + ) return None except (asyncio.TimeoutError, FileNotFoundError) as exc: - logger.warning("[QQ] ffmpeg conversion error: %s", exc) + logger.warning("[%s] ffmpeg conversion error: %s", self._log_tag, exc) return None if not Path(wav_path).exists() or Path(wav_path).stat().st_size <= 44: - logger.warning("[QQ] ffmpeg produced no/small output for %s", Path(src_path).name) + logger.warning( + "[%s] ffmpeg produced no/small output for %s", + self._log_tag, + Path(src_path).name, + ) return None - logger.info("[QQ] ffmpeg converted %s to wav (%d bytes)", - Path(src_path).name, Path(wav_path).stat().st_size) + logger.debug( + "[%s] ffmpeg converted %s to wav (%d bytes)", + self._log_tag, + Path(src_path).name, + Path(wav_path).stat().st_size, + ) return wav_path def _resolve_stt_config(self) -> Optional[Dict[str, str]]: @@ -1343,7 +1625,8 @@ class QQAdapter(BasePlatformAdapter): return { "base_url": base_url, "api_key": api_key, - "model": model or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"), + "model": model + or ("glm-asr" if provider in ("zai", "glm") else "whisper-1"), } # 2. QQ-specific env vars (set by `hermes setup gateway` / `hermes gateway`) @@ -1371,7 +1654,10 @@ class QQAdapter(BasePlatformAdapter): """ stt_cfg = self._resolve_stt_config() if not stt_cfg: - logger.warning("[QQ] STT not configured (no stt config or QQ_STT_API_KEY)") + logger.warning( + "[%s] STT not configured (no stt config or QQ_STT_API_KEY)", + self._log_tag, + ) return None base_url = stt_cfg["base_url"] @@ -1401,17 +1687,37 @@ class QQAdapter(BasePlatformAdapter): return text.strip() return None except (httpx.HTTPStatusError, IOError) as exc: - logger.warning("[QQ] STT API call failed (model=%s, base=%s): %s", - model, base_url[:50], exc) + logger.warning( + "[%s] STT API call failed (model=%s, base=%s): %s", + self._log_tag, + model, + base_url[:50], + exc, + ) return None - async def _convert_audio_to_wav(self, audio_data: bytes, source_url: str) -> Optional[str]: + async def _convert_audio_to_wav( + self, audio_data: bytes, source_url: str + ) -> Optional[str]: """Convert audio bytes to .wav using pilk (SILK) or ffmpeg, caching the result.""" import tempfile # Determine source format from magic bytes or URL - ext = Path(urlparse(source_url).path).suffix.lower() if urlparse(source_url).path else "" - if not ext or ext not in (".silk", ".amr", ".mp3", ".wav", ".ogg", ".m4a", ".aac", ".flac"): + ext = ( + Path(urlparse(source_url).path).suffix.lower() + if urlparse(source_url).path + else "" + ) + if not ext or ext not in ( + ".silk", + ".amr", + ".mp3", + ".wav", + ".ogg", + ".m4a", + ".aac", + ".flac", + ): ext = self._guess_ext_from_data(audio_data) with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp_src: @@ -1427,8 +1733,12 @@ class QQAdapter(BasePlatformAdapter): result = await self._convert_ffmpeg_to_wav(src_path, wav_path) if not result: - logger.warning("[%s] audio conversion failed for %s (format=%s)", - self.name, source_url[:60], ext) + logger.warning( + "[%s] audio conversion failed for %s (format=%s)", + self._log_tag, + source_url[:60], + ext, + ) return cache_document_from_bytes(audio_data, f"qq_voice{ext}") except Exception: return cache_document_from_bytes(audio_data, f"qq_voice{ext}") @@ -1444,7 +1754,7 @@ class QQAdapter(BasePlatformAdapter): os.unlink(wav_path) return cache_document_from_bytes(wav_data, "qq_voice.wav") except Exception as exc: - logger.debug("[%s] Failed to read converted wav: %s", self.name, exc) + logger.debug("[%s] Failed to read converted wav: %s", self._log_tag, exc) return None # ------------------------------------------------------------------ @@ -1452,11 +1762,11 @@ class QQAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ async def _api_request( - self, - method: str, - path: str, - body: Optional[Dict[str, Any]] = None, - timeout: float = DEFAULT_API_TIMEOUT, + self, + method: str, + path: str, + body: Optional[Dict[str, Any]] = None, + timeout: float = DEFAULT_API_TIMEOUT, ) -> Dict[str, Any]: """Make an authenticated REST API request to QQ Bot API.""" if not self._http_client: @@ -1466,6 +1776,7 @@ class QQAdapter(BasePlatformAdapter): headers = { "Authorization": f"QQBot {token}", "Content-Type": "application/json", + "User-Agent": build_user_agent(), } try: @@ -1487,17 +1798,21 @@ class QQAdapter(BasePlatformAdapter): raise RuntimeError(f"QQ Bot API timeout [{path}]: {exc}") from exc async def _upload_media( - self, - target_type: str, - target_id: str, - file_type: int, - url: Optional[str] = None, - file_data: Optional[str] = None, - srv_send_msg: bool = False, - file_name: Optional[str] = None, + self, + target_type: str, + target_id: str, + file_type: int, + url: Optional[str] = None, + file_data: Optional[str] = None, + srv_send_msg: bool = False, + file_name: Optional[str] = None, ) -> Dict[str, Any]: """Upload media and return file_info.""" - path = f"/v2/users/{target_id}/files" if target_type == "c2c" else f"/v2/groups/{target_id}/files" + path = ( + f"/v2/users/{target_id}/files" + if target_type == "c2c" + else f"/v2/groups/{target_id}/files" + ) body: Dict[str, Any] = { "file_type": file_type, @@ -1514,23 +1829,55 @@ class QQAdapter(BasePlatformAdapter): last_exc = None for attempt in range(3): try: - return await self._api_request("POST", path, body, timeout=FILE_UPLOAD_TIMEOUT) + return await self._api_request( + "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT + ) except RuntimeError as exc: last_exc = exc err_msg = str(exc) - if any(kw in err_msg for kw in ("400", "401", "Invalid", "timeout", "Timeout")): + if any( + kw in err_msg + for kw in ("400", "401", "Invalid", "timeout", "Timeout") + ): raise if attempt < 2: await asyncio.sleep(1.5 * (attempt + 1)) raise last_exc # type: ignore[misc] + # Maximum time (seconds) to wait for reconnection before giving up on send. + _RECONNECT_WAIT_SECONDS = 15.0 + # How often (seconds) to poll is_connected while waiting. + _RECONNECT_POLL_INTERVAL = 0.5 + + async def _wait_for_reconnection(self) -> bool: + """Wait for the WebSocket listener to reconnect. + + The listener loop (_listen_loop) auto-reconnects on disconnect, but + there is a race window where send() is called right after a disconnect + and before the reconnect completes. This method polls is_connected + for up to _RECONNECT_WAIT_SECONDS. + + Returns True if reconnected, False if still disconnected. + """ + logger.info("[%s] Not connected — waiting for reconnection (up to %.0fs)", + self._log_tag, self._RECONNECT_WAIT_SECONDS) + waited = 0.0 + while waited < self._RECONNECT_WAIT_SECONDS: + await asyncio.sleep(self._RECONNECT_POLL_INTERVAL) + waited += self._RECONNECT_POLL_INTERVAL + if self.is_connected: + logger.info("[%s] Reconnected after %.1fs", self._log_tag, waited) + return True + logger.warning("[%s] Still not connected after %.0fs", self._log_tag, self._RECONNECT_WAIT_SECONDS) + return False + async def send( - self, - chat_id: str, - content: str, - reply_to: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: """Send a text or markdown message to a QQ user or group. @@ -1540,7 +1887,8 @@ class QQAdapter(BasePlatformAdapter): del metadata if not self.is_connected: - return SendResult(success=False, error="Not connected") + if not await self._wait_for_reconnection(): + return SendResult(success=False, error="Not connected", retryable=True) if not content or not content.strip(): return SendResult(success=True) @@ -1558,7 +1906,10 @@ class QQAdapter(BasePlatformAdapter): return last_result async def _send_chunk( - self, chat_id: str, content: str, reply_to: Optional[str] = None, + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, ) -> SendResult: """Send a single chunk with retry + exponential backoff.""" last_exc: Optional[Exception] = None @@ -1573,28 +1924,39 @@ class QQAdapter(BasePlatformAdapter): elif chat_type == "guild": return await self._send_guild_text(chat_id, content, reply_to) else: - return SendResult(success=False, error=f"Unknown chat type for {chat_id}") + return SendResult( + success=False, error=f"Unknown chat type for {chat_id}" + ) except Exception as exc: last_exc = exc err = str(exc).lower() # Permanent errors — don't retry - if any(k in err for k in ("invalid", "forbidden", "not found", "bad request")): + if any( + k in err + for k in ("invalid", "forbidden", "not found", "bad request") + ): break # Transient — back off and retry if attempt < 2: delay = 1.0 * (2 ** attempt) - logger.warning("[%s] send retry %d/3 after %.1fs: %s", - self.name, attempt + 1, delay, exc) + logger.warning( + "[%s] send retry %d/3 after %.1fs: %s", + self._log_tag, + attempt + 1, + delay, + exc, + ) await asyncio.sleep(delay) error_msg = str(last_exc) if last_exc else "Unknown error" - logger.error("[%s] Send failed: %s", self.name, error_msg) - retryable = not any(k in error_msg.lower() - for k in ("invalid", "forbidden", "not found")) + logger.error("[%s] Send failed: %s", self._log_tag, error_msg) + retryable = not any( + k in error_msg.lower() for k in ("invalid", "forbidden", "not found") + ) return SendResult(success=False, error=error_msg, retryable=retryable) async def _send_c2c_text( - self, openid: str, content: str, reply_to: Optional[str] = None + self, openid: str, content: str, reply_to: Optional[str] = None ) -> SendResult: """Send text to a C2C user via REST API.""" msg_seq = self._next_msg_seq(reply_to or openid) @@ -1607,7 +1969,7 @@ class QQAdapter(BasePlatformAdapter): return SendResult(success=True, message_id=msg_id, raw_response=data) async def _send_group_text( - self, group_openid: str, content: str, reply_to: Optional[str] = None + self, group_openid: str, content: str, reply_to: Optional[str] = None ) -> SendResult: """Send text to a group via REST API.""" msg_seq = self._next_msg_seq(reply_to or group_openid) @@ -1615,15 +1977,17 @@ class QQAdapter(BasePlatformAdapter): if reply_to: body["msg_id"] = reply_to - data = await self._api_request("POST", f"/v2/groups/{group_openid}/messages", body) + data = await self._api_request( + "POST", f"/v2/groups/{group_openid}/messages", body + ) msg_id = str(data.get("id", uuid.uuid4().hex[:12])) return SendResult(success=True, message_id=msg_id, raw_response=data) async def _send_guild_text( - self, channel_id: str, content: str, reply_to: Optional[str] = None + self, channel_id: str, content: str, reply_to: Optional[str] = None ) -> SendResult: """Send text to a guild channel via REST API.""" - body: Dict[str, Any] = {"content": content[:self.MAX_MESSAGE_LENGTH]} + body: Dict[str, Any] = {"content": content[: self.MAX_MESSAGE_LENGTH]} if reply_to: body["msg_id"] = reply_to @@ -1631,19 +1995,21 @@ class QQAdapter(BasePlatformAdapter): msg_id = str(data.get("id", uuid.uuid4().hex[:12])) return SendResult(success=True, message_id=msg_id, raw_response=data) - def _build_text_body(self, content: str, reply_to: Optional[str] = None) -> Dict[str, Any]: + def _build_text_body( + self, content: str, reply_to: Optional[str] = None + ) -> Dict[str, Any]: """Build the message body for C2C/group text sending.""" msg_seq = self._next_msg_seq(reply_to or "default") if self._markdown_support: body: Dict[str, Any] = { - "markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]}, + "markdown": {"content": content[: self.MAX_MESSAGE_LENGTH]}, "msg_type": MSG_TYPE_MARKDOWN, "msg_seq": msg_seq, } else: body = { - "content": content[:self.MAX_MESSAGE_LENGTH], + "content": content[: self.MAX_MESSAGE_LENGTH], "msg_type": MSG_TYPE_TEXT, "msg_seq": msg_seq, } @@ -1660,105 +2026,135 @@ class QQAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ async def send_image( - self, - chat_id: str, - image_url: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: """Send an image natively via QQ Bot API upload.""" del metadata - result = await self._send_media(chat_id, image_url, MEDIA_TYPE_IMAGE, "image", caption, reply_to) + result = await self._send_media( + chat_id, image_url, MEDIA_TYPE_IMAGE, "image", caption, reply_to + ) if result.success or not self._is_url(image_url): return result # Fallback to text URL - logger.warning("[%s] Image send failed, falling back to text: %s", self.name, result.error) + logger.warning( + "[%s] Image send failed, falling back to text: %s", + self._log_tag, + result.error, + ) fallback = f"{caption}\n{image_url}" if caption else image_url return await self.send(chat_id=chat_id, content=fallback, reply_to=reply_to) async def send_image_file( - self, - chat_id: str, - image_path: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - **kwargs, + self, + chat_id: str, + image_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, ) -> SendResult: """Send a local image file natively.""" del kwargs - return await self._send_media(chat_id, image_path, MEDIA_TYPE_IMAGE, "image", caption, reply_to) + return await self._send_media( + chat_id, image_path, MEDIA_TYPE_IMAGE, "image", caption, reply_to + ) async def send_voice( - self, - chat_id: str, - audio_path: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - **kwargs, + self, + chat_id: str, + audio_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, ) -> SendResult: """Send a voice message natively.""" del kwargs - return await self._send_media(chat_id, audio_path, MEDIA_TYPE_VOICE, "voice", caption, reply_to) + return await self._send_media( + chat_id, audio_path, MEDIA_TYPE_VOICE, "voice", caption, reply_to + ) async def send_video( - self, - chat_id: str, - video_path: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - **kwargs, + self, + chat_id: str, + video_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, ) -> SendResult: """Send a video natively.""" del kwargs - return await self._send_media(chat_id, video_path, MEDIA_TYPE_VIDEO, "video", caption, reply_to) + return await self._send_media( + chat_id, video_path, MEDIA_TYPE_VIDEO, "video", caption, reply_to + ) async def send_document( - self, - chat_id: str, - file_path: str, - caption: Optional[str] = None, - file_name: Optional[str] = None, - reply_to: Optional[str] = None, - **kwargs, + self, + chat_id: str, + file_path: str, + caption: Optional[str] = None, + file_name: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, ) -> SendResult: """Send a file/document natively.""" del kwargs - return await self._send_media(chat_id, file_path, MEDIA_TYPE_FILE, "file", caption, reply_to, - file_name=file_name) + return await self._send_media( + chat_id, + file_path, + MEDIA_TYPE_FILE, + "file", + caption, + reply_to, + file_name=file_name, + ) async def _send_media( - self, - chat_id: str, - media_source: str, - file_type: int, - kind: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - file_name: Optional[str] = None, + self, + chat_id: str, + media_source: str, + file_type: int, + kind: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + file_name: Optional[str] = None, ) -> SendResult: """Upload media and send as a native message.""" if not self.is_connected: - return SendResult(success=False, error="Not connected") + if not await self._wait_for_reconnection(): + return SendResult(success=False, error="Not connected", retryable=True) try: # Resolve media source - data, content_type, resolved_name = await self._load_media(media_source, file_name) + data, content_type, resolved_name = await self._load_media( + media_source, file_name + ) # Route chat_type = self._guess_chat_type(chat_id) - target_path = f"/v2/users/{chat_id}/files" if chat_type == "c2c" else f"/v2/groups/{chat_id}/files" + target_path = ( + f"/v2/users/{chat_id}/files" + if chat_type == "c2c" + else f"/v2/groups/{chat_id}/files" + ) if chat_type == "guild": # Guild channels don't support native media upload in the same way # Send as URL fallback - return SendResult(success=False, error="Guild media send not supported via this path") + return SendResult( + success=False, error="Guild media send not supported via this path" + ) # Upload upload = await self._upload_media( - chat_type, chat_id, file_type, + chat_type, + chat_id, + file_type, file_data=data if not self._is_url(media_source) else None, url=media_source if self._is_url(media_source) else None, srv_send_msg=False, @@ -1767,7 +2163,9 @@ class QQAdapter(BasePlatformAdapter): file_info = upload.get("file_info") if not file_info: - return SendResult(success=False, error=f"Upload returned no file_info: {upload}") + return SendResult( + success=False, error=f"Upload returned no file_info: {upload}" + ) # Send media message msg_seq = self._next_msg_seq(chat_id) @@ -1777,13 +2175,17 @@ class QQAdapter(BasePlatformAdapter): "msg_seq": msg_seq, } if caption: - body["content"] = caption[:self.MAX_MESSAGE_LENGTH] + body["content"] = caption[: self.MAX_MESSAGE_LENGTH] if reply_to: body["msg_id"] = reply_to send_data = await self._api_request( "POST", - f"/v2/users/{chat_id}/messages" if chat_type == "c2c" else f"/v2/groups/{chat_id}/messages", + ( + f"/v2/users/{chat_id}/messages" + if chat_type == "c2c" + else f"/v2/groups/{chat_id}/messages" + ), body, ) return SendResult( @@ -1792,11 +2194,11 @@ class QQAdapter(BasePlatformAdapter): raw_response=send_data, ) except Exception as exc: - logger.error("[%s] Media send failed: %s", self.name, exc) + logger.error("[%s] Media send failed: %s", self._log_tag, exc) return SendResult(success=False, error=str(exc)) async def _load_media( - self, source: str, file_name: Optional[str] = None + self, source: str, file_name: Optional[str] = None ) -> Tuple[str, str, str]: """Load media from URL or local path. Returns (base64_or_url, content_type, filename).""" source = str(source).strip() @@ -1827,7 +2229,9 @@ class QQAdapter(BasePlatformAdapter): raw = local_path.read_bytes() resolved_name = file_name or local_path.name - content_type = mimetypes.guess_type(str(local_path))[0] or "application/octet-stream" + content_type = ( + mimetypes.guess_type(str(local_path))[0] or "application/octet-stream" + ) b64 = base64.b64encode(raw).decode("ascii") return b64, content_type, resolved_name @@ -1836,27 +2240,44 @@ class QQAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ async def send_typing(self, chat_id: str, metadata=None) -> None: - """Send an input notify to a C2C user (only supported for C2C).""" - del metadata + """Send an input notify to a C2C user (only supported for C2C). + Debounced to one request per ~50s (the API sets a 60s indicator). + The QQ API requires the originating message ID — retrieved from + ``_last_msg_id`` which is populated by ``_on_message``. + """ if not self.is_connected: return - # Only C2C supports input notify chat_type = self._guess_chat_type(chat_id) if chat_type != "c2c": return + msg_id = self._last_msg_id.get(chat_id) + if not msg_id: + return + + # Debounce — skip if we sent recently + now = time.time() + last_sent = self._typing_sent_at.get(chat_id, 0.0) + if now - last_sent < self._TYPING_DEBOUNCE_SECONDS: + return + try: msg_seq = self._next_msg_seq(chat_id) body = { "msg_type": MSG_TYPE_INPUT_NOTIFY, - "input_notify": {"input_type": 1, "input_second": 60}, + "msg_id": msg_id, + "input_notify": { + "input_type": 1, + "input_second": self._TYPING_INPUT_SECONDS, + }, "msg_seq": msg_seq, } await self._api_request("POST", f"/v2/users/{chat_id}/messages", body) + self._typing_sent_at[chat_id] = now except Exception as exc: - logger.debug("[%s] send_typing failed: %s", self.name, exc) + logger.debug("[%s] send_typing failed: %s", self._log_tag, exc) # ------------------------------------------------------------------ # Format @@ -1903,7 +2324,8 @@ class QQAdapter(BasePlatformAdapter): """Strip the @bot mention prefix from group message content.""" # QQ group @-messages may have the bot's QQ/ID as prefix import re - stripped = re.sub(r'^@\S+\s*', '', content.strip()) + + stripped = re.sub(r"^@\S+\s*", "", content.strip()) return stripped def _is_dm_allowed(self, user_id: str) -> bool: diff --git a/gateway/platforms/qqbot/constants.py b/gateway/platforms/qqbot/constants.py new file mode 100644 index 0000000000..ddae3c133e --- /dev/null +++ b/gateway/platforms/qqbot/constants.py @@ -0,0 +1,74 @@ +"""QQBot package-level constants shared across adapter, onboard, and other modules.""" + +from __future__ import annotations + +import os + +# --------------------------------------------------------------------------- +# QQBot adapter version — bump on functional changes to the adapter package. +# --------------------------------------------------------------------------- + +QQBOT_VERSION = "1.1.0" + +# --------------------------------------------------------------------------- +# API endpoints +# --------------------------------------------------------------------------- + +# The portal domain is configurable via QQ_API_HOST for corporate proxies +# or test environments. Default: q.qq.com (production). +PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com") + +API_BASE = "https://api.sgroup.qq.com" +TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken" +GATEWAY_URL_PATH = "/gateway" + +# QR-code onboard endpoints (on the portal host) +ONBOARD_CREATE_PATH = "/lite/create_bind_task" +ONBOARD_POLL_PATH = "/lite/poll_bind_result" +QR_URL_TEMPLATE = ( + "https://q.qq.com/qqbot/openclaw/connect.html" + "?task_id={task_id}&_wv=2&source=hermes" +) + +# --------------------------------------------------------------------------- +# Timeouts & retry +# --------------------------------------------------------------------------- + +DEFAULT_API_TIMEOUT = 30.0 +FILE_UPLOAD_TIMEOUT = 120.0 +CONNECT_TIMEOUT_SECONDS = 20.0 + +RECONNECT_BACKOFF = [2, 5, 10, 30, 60] +MAX_RECONNECT_ATTEMPTS = 100 +RATE_LIMIT_DELAY = 60 # seconds +QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds +MAX_QUICK_DISCONNECT_COUNT = 3 + +ONBOARD_POLL_INTERVAL = 2.0 # seconds between poll_bind_result calls +ONBOARD_API_TIMEOUT = 10.0 + +# --------------------------------------------------------------------------- +# Message limits +# --------------------------------------------------------------------------- + +MAX_MESSAGE_LENGTH = 4000 +DEDUP_WINDOW_SECONDS = 300 +DEDUP_MAX_SIZE = 1000 + +# --------------------------------------------------------------------------- +# QQ Bot message types +# --------------------------------------------------------------------------- + +MSG_TYPE_TEXT = 0 +MSG_TYPE_MARKDOWN = 2 +MSG_TYPE_MEDIA = 7 +MSG_TYPE_INPUT_NOTIFY = 6 + +# --------------------------------------------------------------------------- +# QQ Bot file media types +# --------------------------------------------------------------------------- + +MEDIA_TYPE_IMAGE = 1 +MEDIA_TYPE_VIDEO = 2 +MEDIA_TYPE_VOICE = 3 +MEDIA_TYPE_FILE = 4 diff --git a/gateway/platforms/qqbot/crypto.py b/gateway/platforms/qqbot/crypto.py new file mode 100644 index 0000000000..426bd29de5 --- /dev/null +++ b/gateway/platforms/qqbot/crypto.py @@ -0,0 +1,45 @@ +"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption.""" + +from __future__ import annotations + +import base64 +import os + + +def generate_bind_key() -> str: + """Generate a 256-bit random AES key and return it as base64. + + The key is passed to ``create_bind_task`` so the server can encrypt + the bot's *client_secret* before returning it. Only this CLI holds + the key, ensuring the secret never travels in plaintext. + """ + return base64.b64encode(os.urandom(32)).decode() + + +def decrypt_secret(encrypted_base64: str, key_base64: str) -> str: + """Decrypt a base64-encoded AES-256-GCM ciphertext. + + Ciphertext layout (after base64-decoding):: + + IV (12 bytes) ‖ ciphertext (N bytes) ‖ AuthTag (16 bytes) + + Args: + encrypted_base64: The ``bot_encrypt_secret`` value from + ``poll_bind_result``. + key_base64: The base64 AES key generated by + :func:`generate_bind_key`. + + Returns: + The decrypted *client_secret* as a UTF-8 string. + """ + from cryptography.hazmat.primitives.ciphers.aead import AESGCM + + key = base64.b64decode(key_base64) + raw = base64.b64decode(encrypted_base64) + + iv = raw[:12] + ciphertext_with_tag = raw[12:] # AESGCM expects ciphertext + tag concatenated + + aesgcm = AESGCM(key) + plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None) + return plaintext.decode("utf-8") diff --git a/gateway/platforms/qqbot/onboard.py b/gateway/platforms/qqbot/onboard.py new file mode 100644 index 0000000000..65750b3f10 --- /dev/null +++ b/gateway/platforms/qqbot/onboard.py @@ -0,0 +1,124 @@ +""" +QQBot scan-to-configure (QR code onboard) module. + +Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to +generate a QR-code URL and poll for scan completion. On success the caller +receives the bot's *app_id*, *client_secret* (decrypted locally), and the +scanner's *user_openid* — enough to fully configure the QQBot gateway. + +Reference: https://bot.q.qq.com/wiki/develop/api-v2/ +""" + +from __future__ import annotations + +import logging +from enum import IntEnum +from typing import Tuple +from urllib.parse import quote + +from .constants import ( + ONBOARD_API_TIMEOUT, + ONBOARD_CREATE_PATH, + ONBOARD_POLL_PATH, + PORTAL_HOST, + QR_URL_TEMPLATE, +) +from .crypto import generate_bind_key +from .utils import get_api_headers + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Bind status +# --------------------------------------------------------------------------- + + +class BindStatus(IntEnum): + """Status codes returned by ``poll_bind_result``.""" + + NONE = 0 + PENDING = 1 + COMPLETED = 2 + EXPIRED = 3 + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +async def create_bind_task( + timeout: float = ONBOARD_API_TIMEOUT, +) -> Tuple[str, str]: + """Create a bind task and return *(task_id, aes_key_base64)*. + + The AES key is generated locally and sent to the server so it can + encrypt the bot credentials before returning them. + + Raises: + RuntimeError: If the API returns a non-zero ``retcode``. + """ + import httpx + + url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}" + key = generate_bind_key() + + async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client: + resp = await client.post(url, json={"key": key}, headers=get_api_headers()) + resp.raise_for_status() + data = resp.json() + + if data.get("retcode") != 0: + raise RuntimeError(data.get("msg", "create_bind_task failed")) + + task_id = data.get("data", {}).get("task_id") + if not task_id: + raise RuntimeError("create_bind_task: missing task_id in response") + + logger.debug("create_bind_task ok: task_id=%s", task_id) + return task_id, key + + +async def poll_bind_result( + task_id: str, + timeout: float = ONBOARD_API_TIMEOUT, +) -> Tuple[BindStatus, str, str, str]: + """Poll the bind result for *task_id*. + + Returns: + A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``. + + * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with + :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the + key from :func:`create_bind_task`. + * ``user_openid`` is the OpenID of the person who scanned the code + (available when ``status == COMPLETED``). + + Raises: + RuntimeError: If the API returns a non-zero ``retcode``. + """ + import httpx + + url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}" + + async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client: + resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers()) + resp.raise_for_status() + data = resp.json() + + if data.get("retcode") != 0: + raise RuntimeError(data.get("msg", "poll_bind_result failed")) + + d = data.get("data", {}) + return ( + BindStatus(d.get("status", 0)), + str(d.get("bot_appid", "")), + d.get("bot_encrypt_secret", ""), + d.get("user_openid", ""), + ) + + +def build_connect_url(task_id: str) -> str: + """Build the QR-code target URL for a given *task_id*.""" + return QR_URL_TEMPLATE.format(task_id=quote(task_id)) diff --git a/gateway/platforms/qqbot/utils.py b/gateway/platforms/qqbot/utils.py new file mode 100644 index 0000000000..873e58d2a5 --- /dev/null +++ b/gateway/platforms/qqbot/utils.py @@ -0,0 +1,71 @@ +"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion.""" + +from __future__ import annotations + +import platform +import sys +from typing import Any, Dict, List + +from .constants import QQBOT_VERSION + + +# --------------------------------------------------------------------------- +# User-Agent +# --------------------------------------------------------------------------- + +def _get_hermes_version() -> str: + """Return the hermes-agent package version, or 'dev' if unavailable.""" + try: + from importlib.metadata import version + return version("hermes-agent") + except Exception: + return "dev" + + +def build_user_agent() -> str: + """Build a descriptive User-Agent string. + + Format:: + + QQBotAdapter/ (Python/; ; Hermes/) + + Example:: + + QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0) + """ + py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + os_name = platform.system().lower() + hermes_version = _get_hermes_version() + return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})" + + +def get_api_headers() -> Dict[str, str]: + """Return standard HTTP headers for QQBot API requests. + + Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``. + ``q.qq.com`` requires ``Accept: application/json`` — without it, + the server returns a JavaScript anti-bot challenge page. + """ + return { + "Content-Type": "application/json", + "Accept": "application/json", + "User-Agent": build_user_agent(), + } + + +# --------------------------------------------------------------------------- +# Config helpers +# --------------------------------------------------------------------------- + +def coerce_list(value: Any) -> List[str]: + """Coerce config values into a trimmed string list. + + Accepts comma-separated strings, lists, tuples, sets, or single values. + """ + if value is None: + return [] + if isinstance(value, str): + return [item.strip() for item in value.split(",") if item.strip()] + if isinstance(value, (list, tuple, set)): + return [str(item).strip() for item in value if str(item).strip()] + return [str(value).strip()] if str(value).strip() else [] diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 617713ad90..9a0a6256a4 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -18,6 +18,7 @@ import logging import os import random import time +import uuid from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Optional, Any @@ -127,6 +128,27 @@ def _render_mentions(text: str, mentions: list) -> str: return text +def _is_signal_service_id(value: str) -> bool: + """Return True if *value* already looks like a Signal service identifier.""" + if not value: + return False + if value.startswith("PNI:") or value.startswith("u:"): + return True + try: + uuid.UUID(value) + return True + except (ValueError, AttributeError, TypeError): + return False + + +def _looks_like_e164_number(value: str) -> bool: + """Return True for a plausible E.164 phone number.""" + if not value or not value.startswith("+"): + return False + digits = value[1:] + return digits.isdigit() and 7 <= len(digits) <= 15 + + def check_signal_requirements() -> bool: """Check if Signal is configured (has URL and account).""" return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT")) @@ -160,6 +182,14 @@ class SignalAdapter(BasePlatformAdapter): self._sse_task: Optional[asyncio.Task] = None self._health_monitor_task: Optional[asyncio.Task] = None self._typing_tasks: Dict[str, asyncio.Task] = {} + # Per-chat typing-indicator backoff. When signal-cli reports + # NETWORK_FAILURE (recipient offline / unroutable), base.py's + # _keep_typing refresh loop would otherwise hammer sendTyping every + # ~2s indefinitely, producing WARNING-level log spam and pointless + # RPC traffic. We track consecutive failures per chat and skip the + # RPC during a cooldown window instead. + self._typing_failures: Dict[str, int] = {} + self._typing_skip_until: Dict[str, float] = {} self._running = False self._last_sse_activity = 0.0 self._sse_response: Optional[httpx.Response] = None @@ -171,6 +201,12 @@ class SignalAdapter(BasePlatformAdapter): # in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds) self._recent_sent_timestamps: set = set() self._max_recent_timestamps = 50 + # Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs. + # Keep a best-effort mapping so outbound sends can upgrade from a + # phone number to the corresponding UUID when signal-cli prefers it. + self._recipient_uuid_by_number: Dict[str, str] = {} + self._recipient_number_by_uuid: Dict[str, str] = {} + self._recipient_cache_lock = asyncio.Lock() logger.info("Signal adapter initialized: url=%s account=%s groups=%s", self.http_url, redact_phone(self.account), @@ -187,31 +223,40 @@ class SignalAdapter(BasePlatformAdapter): return False # Acquire scoped lock to prevent duplicate Signal listeners for the same phone + lock_acquired = False try: if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'): return False + lock_acquired = True except Exception as e: logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e) self.client = httpx.AsyncClient(timeout=30.0) - - # Health check — verify signal-cli daemon is reachable try: - resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0) - if resp.status_code != 200: - logger.error("Signal: health check failed (status %d)", resp.status_code) + # Health check — verify signal-cli daemon is reachable + try: + resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0) + if resp.status_code != 200: + logger.error("Signal: health check failed (status %d)", resp.status_code) + return False + except Exception as e: + logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e) return False - except Exception as e: - logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e) - return False - self._running = True - self._last_sse_activity = time.time() - self._sse_task = asyncio.create_task(self._sse_listener()) - self._health_monitor_task = asyncio.create_task(self._health_monitor()) + self._running = True + self._last_sse_activity = time.time() + self._sse_task = asyncio.create_task(self._sse_listener()) + self._health_monitor_task = asyncio.create_task(self._health_monitor()) - logger.info("Signal: connected to %s", self.http_url) - return True + logger.info("Signal: connected to %s", self.http_url) + return True + finally: + if not self._running: + if self.client: + await self.client.aclose() + self.client = None + if lock_acquired: + self._release_platform_lock() async def disconnect(self) -> None: """Stop SSE listener and clean up.""" @@ -392,6 +437,7 @@ class SignalAdapter(BasePlatformAdapter): ) sender_name = envelope_data.get("sourceName", "") sender_uuid = envelope_data.get("sourceUuid", "") + self._remember_recipient_identifiers(sender, sender_uuid) if not sender: logger.debug("Signal: ignoring envelope with no sender") @@ -510,6 +556,64 @@ class SignalAdapter(BasePlatformAdapter): await self.handle_message(event) + def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None: + """Cache any number↔UUID mapping observed from Signal envelopes.""" + if not number or not service_id or not _is_signal_service_id(service_id): + return + self._recipient_uuid_by_number[number] = service_id + self._recipient_number_by_uuid[service_id] = number + + def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]: + """Best-effort extraction of a Signal service ID from listContacts output.""" + if not isinstance(contact, dict): + return None + + number = contact.get("number") + recipient = contact.get("recipient") + service_id = contact.get("uuid") or contact.get("serviceId") + if not service_id: + profile = contact.get("profile") + if isinstance(profile, dict): + service_id = profile.get("serviceId") or profile.get("uuid") + + if service_id and _is_signal_service_id(service_id): + matches_number = number == phone_number or recipient == phone_number + if matches_number: + return service_id + return None + + async def _resolve_recipient(self, chat_id: str) -> str: + """Return the preferred Signal recipient identifier for a direct chat.""" + if ( + not chat_id + or chat_id.startswith("group:") + or _is_signal_service_id(chat_id) + or not _looks_like_e164_number(chat_id) + ): + return chat_id + + cached = self._recipient_uuid_by_number.get(chat_id) + if cached: + return cached + + async with self._recipient_cache_lock: + cached = self._recipient_uuid_by_number.get(chat_id) + if cached: + return cached + + contacts = await self._rpc("listContacts", { + "account": self.account, + "allRecipients": True, + }) + if isinstance(contacts, list): + for contact in contacts: + number = contact.get("number") if isinstance(contact, dict) else None + service_id = self._extract_contact_uuid(contact, chat_id) + if number and service_id: + self._remember_recipient_identifiers(number, service_id) + + return self._recipient_uuid_by_number.get(chat_id, chat_id) + # ------------------------------------------------------------------ # Attachment Handling # ------------------------------------------------------------------ @@ -548,8 +652,22 @@ class SignalAdapter(BasePlatformAdapter): # JSON-RPC Communication # ------------------------------------------------------------------ - async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any: - """Send a JSON-RPC 2.0 request to signal-cli daemon.""" + async def _rpc( + self, + method: str, + params: dict, + rpc_id: str = None, + *, + log_failures: bool = True, + ) -> Any: + """Send a JSON-RPC 2.0 request to signal-cli daemon. + + When ``log_failures=False``, error and exception paths log at DEBUG + instead of WARNING — used by the typing-indicator path to silence + repeated NETWORK_FAILURE spam for unreachable recipients while + still preserving visibility for the first occurrence and for + unrelated RPCs. + """ if not self.client: logger.warning("Signal: RPC called but client not connected") return None @@ -574,13 +692,19 @@ class SignalAdapter(BasePlatformAdapter): data = resp.json() if "error" in data: - logger.warning("Signal RPC error (%s): %s", method, data["error"]) + if log_failures: + logger.warning("Signal RPC error (%s): %s", method, data["error"]) + else: + logger.debug("Signal RPC error (%s): %s", method, data["error"]) return None return data.get("result") except Exception as e: - logger.warning("Signal RPC %s failed: %s", method, e) + if log_failures: + logger.warning("Signal RPC %s failed: %s", method, e) + else: + logger.debug("Signal RPC %s failed: %s", method, e) return None # ------------------------------------------------------------------ @@ -605,7 +729,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] result = await self._rpc("send", params) @@ -627,7 +751,28 @@ class SignalAdapter(BasePlatformAdapter): self._recent_sent_timestamps.pop() async def send_typing(self, chat_id: str, metadata=None) -> None: - """Send a typing indicator.""" + """Send a typing indicator. + + base.py's ``_keep_typing`` refresh loop calls this every ~2s while + the agent is processing. If signal-cli returns NETWORK_FAILURE for + this recipient (offline, unroutable, group membership lost, etc.) + the unmitigated behaviour is: a WARNING log every 2 seconds for as + long as the agent keeps running. Instead we: + + - silence the WARNING after the first consecutive failure (subsequent + attempts log at DEBUG) so transport issues are still visible once + but don't flood the log, + - skip the RPC entirely during an exponential cooldown window once + three consecutive failures have happened, so we stop hammering + signal-cli with requests it can't deliver. + + A successful sendTyping clears the counters. + """ + now = time.monotonic() + skip_until = self._typing_skip_until.get(chat_id, 0.0) + if now < skip_until: + return + params: Dict[str, Any] = { "account": self.account, } @@ -635,9 +780,28 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] - await self._rpc("sendTyping", params, rpc_id="typing") + fails = self._typing_failures.get(chat_id, 0) + result = await self._rpc( + "sendTyping", + params, + rpc_id="typing", + log_failures=(fails == 0), + ) + + if result is None: + fails += 1 + self._typing_failures[chat_id] = fails + # After 3 consecutive failures, back off exponentially (16s, + # 32s, 60s cap) to stop spamming signal-cli for a recipient + # that clearly isn't reachable right now. + if fails >= 3: + backoff = min(60.0, 16.0 * (2 ** (fails - 3))) + self._typing_skip_until[chat_id] = now + backoff + else: + self._typing_failures.pop(chat_id, None) + self._typing_skip_until.pop(chat_id, None) async def send_image( self, @@ -677,7 +841,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] result = await self._rpc("send", params) if result is not None: @@ -716,7 +880,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] result = await self._rpc("send", params) if result is not None: @@ -789,6 +953,10 @@ class SignalAdapter(BasePlatformAdapter): await task except asyncio.CancelledError: pass + # Reset per-chat typing backoff state so the next agent turn starts + # fresh rather than inheriting a cooldown from a prior conversation. + self._typing_failures.pop(chat_id, None) + self._typing_skip_until.pop(chat_id, None) async def stop_typing(self, chat_id: str) -> None: """Public interface for stopping typing — called by base adapter's diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index 8f9934cf7a..d3d2187948 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -150,9 +150,11 @@ class SlackAdapter(BasePlatformAdapter): except Exception as e: logger.warning("[Slack] Failed to read %s: %s", tokens_file, e) + lock_acquired = False try: if not self._acquire_platform_lock('slack-app-token', app_token, 'Slack app token'): return False + lock_acquired = True # First token is the primary — used for AsyncApp / Socket Mode primary_token = bot_tokens[0] @@ -228,6 +230,9 @@ class SlackAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.error("[Slack] Connection failed: %s", e, exc_info=True) return False + finally: + if lock_acquired and not self._running: + self._release_platform_lock() async def disconnect(self) -> None: """Disconnect from Slack.""" @@ -316,6 +321,8 @@ class SlackAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Slack message.""" if not self._app: @@ -366,6 +373,20 @@ class SlackAdapter(BasePlatformAdapter): # in an assistant-enabled context. Falls back to reactions. logger.debug("[Slack] assistant.threads.setStatus failed: %s", e) + def _dm_top_level_threads_as_sessions(self) -> bool: + """Whether top-level Slack DMs get per-message session threads. + + Defaults to ``True`` so each visible DM reply thread is isolated as its + own Hermes session — matching the per-thread behavior channels already + have. Set ``platforms.slack.extra.dm_top_level_threads_as_sessions`` + to ``false`` in config.yaml to revert to the legacy behavior where all + top-level DMs share one continuous session. + """ + raw = self.config.extra.get("dm_top_level_threads_as_sessions") + if raw is None: + return True # default: each DM thread is its own session + return str(raw).strip().lower() in ("1", "true", "yes", "on") + def _resolve_thread_ts( self, reply_to: Optional[str] = None, @@ -996,10 +1017,14 @@ class SlackAdapter(BasePlatformAdapter): # Build thread_ts for session keying. # In channels: fall back to ts so each top-level @mention starts a # new thread/session (the bot always replies in a thread). - # In DMs: only use the real thread_ts — top-level DMs should share - # one continuous session, threaded DMs get their own session. + # In DMs: fall back to ts so each top-level DM reply thread gets + # its own session key (matching channel behavior). Set + # dm_top_level_threads_as_sessions: false in config to revert to + # legacy single-session-per-DM-channel behavior. if is_dm: - thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs + thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") + if not thread_ts and self._dm_top_level_threads_as_sessions(): + thread_ts = ts else: thread_ts = event.get("thread_ts") or ts # ts fallback for channels @@ -1167,6 +1192,12 @@ class SlackAdapter(BasePlatformAdapter): thread_id=thread_ts, ) + # Per-channel ephemeral prompt + from gateway.platforms.base import resolve_channel_prompt + _channel_prompt = resolve_channel_prompt( + self.config.extra, channel_id, None, + ) + msg_event = MessageEvent( text=text, message_type=msg_type, @@ -1176,6 +1207,7 @@ class SlackAdapter(BasePlatformAdapter): media_urls=media_urls, media_types=media_types, reply_to_message_id=thread_ts if thread_ts != ts else None, + channel_prompt=_channel_prompt, ) # Only react when bot is directly addressed (DM or @mention). diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 112b232d0a..e849a03c77 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -11,6 +11,8 @@ import asyncio import json import logging import os +import tempfile +import html as _html import re from typing import Dict, List, Optional, Any @@ -18,6 +20,10 @@ logger = logging.getLogger(__name__) try: from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup + try: + from telegram import LinkPreviewOptions + except ImportError: + LinkPreviewOptions = None from telegram.ext import ( Application, CommandHandler, @@ -36,6 +42,7 @@ except ImportError: Message = Any InlineKeyboardButton = Any InlineKeyboardMarkup = Any + LinkPreviewOptions = None Application = Any CommandHandler = Any CallbackQueryHandler = Any @@ -64,8 +71,10 @@ from gateway.platforms.base import ( SendResult, cache_image_from_bytes, cache_audio_from_bytes, + cache_video_from_bytes, cache_document_from_bytes, resolve_proxy_url, + SUPPORTED_VIDEO_TYPES, SUPPORTED_DOCUMENT_TYPES, utf16_len, _prefix_within_utf16_limit, @@ -112,6 +121,84 @@ def _strip_mdv2(text: str) -> str: return cleaned +# --------------------------------------------------------------------------- +# Markdown table → code block conversion +# --------------------------------------------------------------------------- +# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal, +# so pipe tables render as noisy backslash-pipe text with no alignment. +# Wrapping the table in a fenced code block makes Telegram render it as +# monospace preformatted text with columns intact. + +# Matches a GFM table delimiter row: optional outer pipes, cells containing +# only dashes (with optional leading/trailing colons for alignment) separated +# by '|'. Requires at least one internal '|' so lone '---' horizontal rules +# are NOT matched. +_TABLE_SEPARATOR_RE = re.compile( + r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$' +) + + +def _is_table_row(line: str) -> bool: + """Return True if *line* could plausibly be a table data row.""" + stripped = line.strip() + return bool(stripped) and '|' in stripped + + +def _wrap_markdown_tables(text: str) -> str: + """Wrap GFM-style pipe tables in ``` fences so Telegram renders them. + + Detected by a row containing '|' immediately followed by a delimiter + row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing + non-blank lines are consumed as the table body and included in the + wrapped block. Tables inside existing fenced code blocks are left + alone. + """ + if '|' not in text or '-' not in text: + return text + + lines = text.split('\n') + out: list[str] = [] + in_fence = False + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.lstrip() + + # Track existing fenced code blocks — never touch content inside. + if stripped.startswith('```'): + in_fence = not in_fence + out.append(line) + i += 1 + continue + if in_fence: + out.append(line) + i += 1 + continue + + # Look for a header row (contains '|') immediately followed by a + # delimiter row. + if ( + '|' in line + and i + 1 < len(lines) + and _TABLE_SEPARATOR_RE.match(lines[i + 1]) + ): + table_block = [line, lines[i + 1]] + j = i + 2 + while j < len(lines) and _is_table_row(lines[j]): + table_block.append(lines[j]) + j += 1 + out.append('```') + out.extend(table_block) + out.append('```') + i = j + continue + + out.append(line) + i += 1 + + return '\n'.join(out) + + class TelegramAdapter(BasePlatformAdapter): """ Telegram bot adapter. @@ -129,6 +216,7 @@ class TelegramAdapter(BasePlatformAdapter): # When a chunk is near this limit, a continuation is almost certain. _SPLIT_THRESHOLD = 4000 MEDIA_GROUP_WAIT_SECONDS = 0.8 + _GENERAL_TOPIC_THREAD_ID = "1" def __init__(self, config: PlatformConfig): super().__init__(config, Platform.TELEGRAM) @@ -137,6 +225,7 @@ class TelegramAdapter(BasePlatformAdapter): self._webhook_mode: bool = False self._mention_patterns = self._compile_mention_patterns() self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first' + self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False) # Buffer rapid/album photo updates so Telegram image bursts are handled # as a single MessageEvent instead of self-interrupting multiple turns. self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8")) @@ -163,6 +252,38 @@ class TelegramAdapter(BasePlatformAdapter): # Approval button state: message_id → session_key self._approval_state: Dict[int, str] = {} + @staticmethod + def _is_callback_user_authorized(user_id: str) -> bool: + """Return whether a Telegram inline-button caller may perform gated actions.""" + allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip() + if not allowed_csv: + return True + allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()} + return "*" in allowed_ids or user_id in allowed_ids + + @classmethod + def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]: + if not metadata: + return None + thread_id = metadata.get("thread_id") or metadata.get("message_thread_id") + return str(thread_id) if thread_id is not None else None + + @classmethod + def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]: + if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID: + return None + return int(thread_id) + + @classmethod + def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]: + if not thread_id: + return None + return int(thread_id) + + @staticmethod + def _is_thread_not_found_error(error: Exception) -> bool: + return "thread not found" in str(error).lower() + def _fallback_ips(self) -> list[str]: """Return validated fallback IPs from config (populated by _apply_env_overrides).""" configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else [] @@ -193,6 +314,26 @@ class TelegramAdapter(BasePlatformAdapter): pass return isinstance(error, OSError) + def _coerce_bool_extra(self, key: str, default: bool = False) -> bool: + value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None + if value is None: + return default + if isinstance(value, str): + lowered = value.strip().lower() + if lowered in ("true", "1", "yes", "on"): + return True + if lowered in ("false", "0", "no", "off"): + return False + return default + return bool(value) + + def _link_preview_kwargs(self) -> Dict[str, Any]: + if not getattr(self, "_disable_link_previews", False): + return {} + if LinkPreviewOptions is not None: + return {"link_preview_options": LinkPreviewOptions(is_disabled=True)} + return {"disable_web_page_preview": True} + async def _handle_polling_network_error(self, error: Exception) -> None: """Reconnect polling after a transient network interruption. @@ -396,8 +537,23 @@ class TelegramAdapter(BasePlatformAdapter): break if changed: - with open(config_path, "w") as f: - _yaml.dump(config, f, default_flow_style=False, sort_keys=False) + fd, tmp_path = tempfile.mkstemp( + dir=str(config_path.parent), + suffix=".tmp", + prefix=".config_", + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + _yaml.dump(config, f, default_flow_style=False, sort_keys=False) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, config_path) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise logger.info( "[%s] Persisted thread_id=%s for topic '%s' in config.yaml", self.name, thread_id, topic_name, @@ -540,7 +696,7 @@ class TelegramAdapter(BasePlatformAdapter): "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0), } - proxy_url = resolve_proxy_url() + proxy_url = resolve_proxy_url("TELEGRAM_PROXY") disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on")) fallback_ips = self._fallback_ips() if not fallback_ips: @@ -606,14 +762,14 @@ class TelegramAdapter(BasePlatformAdapter): from telegram.error import NetworkError, TimedOut except ImportError: NetworkError = TimedOut = OSError # type: ignore[misc,assignment] - _max_connect = 3 + _max_connect = 8 for _attempt in range(_max_connect): try: await self._app.initialize() break except (NetworkError, TimedOut, OSError) as init_err: if _attempt < _max_connect - 1: - wait = 2 ** _attempt + wait = min(2 ** _attempt, 15) logger.warning( "[%s] Connect attempt %d/%d failed: %s — retrying in %ds", self.name, _attempt + 1, _max_connect, init_err, wait, @@ -814,7 +970,7 @@ class TelegramAdapter(BasePlatformAdapter): ] message_ids = [] - thread_id = metadata.get("thread_id") if metadata else None + thread_id = self._metadata_thread_id(metadata) try: from telegram.error import NetworkError as _NetErr @@ -834,7 +990,7 @@ class TelegramAdapter(BasePlatformAdapter): for i, chunk in enumerate(chunks): should_thread = self._should_thread_reply(reply_to, i) reply_to_id = int(reply_to) if should_thread else None - effective_thread_id = int(thread_id) if thread_id else None + effective_thread_id = self._message_thread_id_for_send(thread_id) msg = None for _send_attempt in range(3): @@ -847,6 +1003,7 @@ class TelegramAdapter(BasePlatformAdapter): parse_mode=ParseMode.MARKDOWN_V2, reply_to_message_id=reply_to_id, message_thread_id=effective_thread_id, + **self._link_preview_kwargs(), ) except Exception as md_error: # Markdown parsing failed, try plain text @@ -859,6 +1016,7 @@ class TelegramAdapter(BasePlatformAdapter): parse_mode=None, reply_to_message_id=reply_to_id, message_thread_id=effective_thread_id, + **self._link_preview_kwargs(), ) else: raise @@ -869,8 +1027,7 @@ class TelegramAdapter(BasePlatformAdapter): # (not transient network issues). Detect and handle # specific cases instead of blindly retrying. if _BadReq and isinstance(send_err, _BadReq): - err_lower = str(send_err).lower() - if "thread not found" in err_lower and effective_thread_id is not None: + if self._is_thread_not_found_error(send_err) and effective_thread_id is not None: # Thread doesn't exist — retry without # message_thread_id so the message still # reaches the chat. @@ -880,6 +1037,7 @@ class TelegramAdapter(BasePlatformAdapter): ) effective_thread_id = None continue + err_lower = str(send_err).lower() if "message to be replied not found" in err_lower and reply_to_id is not None: # Original message was deleted before we # could reply — clear reply target and retry @@ -941,6 +1099,8 @@ class TelegramAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Telegram message.""" if not self._bot: @@ -1046,6 +1206,7 @@ class TelegramAdapter(BasePlatformAdapter): text=text, parse_mode=ParseMode.MARKDOWN, reply_markup=keyboard, + **self._link_preview_kwargs(), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1068,15 +1229,13 @@ class TelegramAdapter(BasePlatformAdapter): try: cmd_preview = command[:3800] + "..." if len(command) > 3800 else command text = ( - f"⚠️ *Command Approval Required*\n\n" - f"`{cmd_preview}`\n\n" - f"Reason: {description}" + f"⚠️ Command Approval Required\n\n" + f"
{_html.escape(cmd_preview)}
\n\n" + f"Reason: {_html.escape(description)}" ) # Resolve thread context for thread replies - thread_id = None - if metadata: - thread_id = metadata.get("thread_id") or metadata.get("message_thread_id") + thread_id = self._metadata_thread_id(metadata) # We'll use the message_id as part of callback_data to look up session_key # Send a placeholder first, then update — or use a counter. @@ -1100,11 +1259,13 @@ class TelegramAdapter(BasePlatformAdapter): kwargs: Dict[str, Any] = { "chat_id": int(chat_id), "text": text, - "parse_mode": ParseMode.MARKDOWN, + "parse_mode": ParseMode.HTML, "reply_markup": keyboard, + **self._link_preview_kwargs(), } - if thread_id: - kwargs["message_thread_id"] = int(thread_id) + message_thread_id = self._message_thread_id_for_send(thread_id) + if message_thread_id is not None: + kwargs["message_thread_id"] = message_thread_id msg = await self._bot.send_message(**kwargs) @@ -1172,6 +1333,7 @@ class TelegramAdapter(BasePlatformAdapter): parse_mode=ParseMode.MARKDOWN, reply_markup=keyboard, message_thread_id=int(thread_id) if thread_id else None, + **self._link_preview_kwargs(), ) # Store picker state keyed by chat_id @@ -1440,12 +1602,9 @@ class TelegramAdapter(BasePlatformAdapter): # Only authorized users may click approval buttons. caller_id = str(getattr(query.from_user, "id", "")) - allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip() - if allowed_csv: - allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()} - if "*" not in allowed_ids and caller_id not in allowed_ids: - await query.answer(text="⛔ You are not authorized to approve commands.") - return + if not self._is_callback_user_authorized(caller_id): + await query.answer(text="⛔ You are not authorized to approve commands.") + return session_key = self._approval_state.pop(approval_id, None) if not session_key: @@ -1490,6 +1649,10 @@ class TelegramAdapter(BasePlatformAdapter): if not data.startswith("update_prompt:"): return answer = data.split(":", 1)[1] # "y" or "n" + caller_id = str(getattr(query.from_user, "id", "")) + if not self._is_callback_user_authorized(caller_id): + await query.answer(text="⛔ You are not authorized to answer update prompts.") + return await query.answer(text=f"Sent '{answer}' to the update process.") # Edit the message to show the choice and remove buttons label = "Yes" if answer == "y" else "No" @@ -1514,6 +1677,21 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as exc: logger.error("Failed to write update response from callback: %s", exc) + def _missing_media_path_error(self, label: str, path: str) -> str: + """Build an actionable file-not-found error for gateway MEDIA delivery. + + Paths like /workspace/... or /output/... often only exist inside the + Docker sandbox, while the gateway process runs on the host. + """ + error = f"{label} file not found: {path}" + if path.startswith(("/workspace/", "/output/", "/outputs/")): + error += ( + " (path may only exist inside the Docker sandbox. " + "Bind-mount a host directory and emit the host-visible " + "path in MEDIA: for gateway file delivery.)" + ) + return error + async def send_voice( self, chat_id: str, @@ -1530,28 +1708,28 @@ class TelegramAdapter(BasePlatformAdapter): try: import os if not os.path.exists(audio_path): - return SendResult(success=False, error=f"Audio file not found: {audio_path}") + return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path)) with open(audio_path, "rb") as audio_file: # .ogg files -> send as voice (round playable bubble) if audio_path.endswith((".ogg", ".opus")): - _voice_thread = metadata.get("thread_id") if metadata else None + _voice_thread = self._metadata_thread_id(metadata) msg = await self._bot.send_voice( chat_id=int(chat_id), voice=audio_file, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_voice_thread) if _voice_thread else None, + message_thread_id=self._message_thread_id_for_send(_voice_thread), ) else: # .mp3 and others -> send as audio file - _audio_thread = metadata.get("thread_id") if metadata else None + _audio_thread = self._metadata_thread_id(metadata) msg = await self._bot.send_audio( chat_id=int(chat_id), audio=audio_file, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_audio_thread) if _audio_thread else None, + message_thread_id=self._message_thread_id_for_send(_audio_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1579,16 +1757,16 @@ class TelegramAdapter(BasePlatformAdapter): try: import os if not os.path.exists(image_path): - return SendResult(success=False, error=f"Image file not found: {image_path}") + return SendResult(success=False, error=self._missing_media_path_error("Image", image_path)) - _thread = metadata.get("thread_id") if metadata else None + _thread = self._metadata_thread_id(metadata) with open(image_path, "rb") as image_file: msg = await self._bot.send_photo( chat_id=int(chat_id), photo=image_file, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_thread) if _thread else None, + message_thread_id=self._message_thread_id_for_send(_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1616,10 +1794,10 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(file_path): - return SendResult(success=False, error=f"File not found: {file_path}") + return SendResult(success=False, error=self._missing_media_path_error("File", file_path)) display_name = file_name or os.path.basename(file_path) - _thread = metadata.get("thread_id") if metadata else None + _thread = self._metadata_thread_id(metadata) with open(file_path, "rb") as f: msg = await self._bot.send_document( @@ -1628,7 +1806,7 @@ class TelegramAdapter(BasePlatformAdapter): filename=display_name, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_thread) if _thread else None, + message_thread_id=self._message_thread_id_for_send(_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1650,16 +1828,16 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(video_path): - return SendResult(success=False, error=f"Video file not found: {video_path}") + return SendResult(success=False, error=self._missing_media_path_error("Video", video_path)) - _thread = metadata.get("thread_id") if metadata else None + _thread = self._metadata_thread_id(metadata) with open(video_path, "rb") as f: msg = await self._bot.send_video( chat_id=int(chat_id), video=f, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_thread) if _thread else None, + message_thread_id=self._message_thread_id_for_send(_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1689,13 +1867,13 @@ class TelegramAdapter(BasePlatformAdapter): try: # Telegram can send photos directly from URLs (up to ~5MB) - _photo_thread = metadata.get("thread_id") if metadata else None + _photo_thread = self._metadata_thread_id(metadata) msg = await self._bot.send_photo( chat_id=int(chat_id), photo=image_url, caption=caption[:1024] if caption else None, # Telegram caption limit reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_photo_thread) if _photo_thread else None, + message_thread_id=self._message_thread_id_for_send(_photo_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1718,6 +1896,7 @@ class TelegramAdapter(BasePlatformAdapter): photo=image_data, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, + message_thread_id=self._message_thread_id_for_send(_photo_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e2: @@ -1743,13 +1922,13 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=False, error="Not connected") try: - _anim_thread = metadata.get("thread_id") if metadata else None + _anim_thread = self._metadata_thread_id(metadata) msg = await self._bot.send_animation( chat_id=int(chat_id), animation=animation_url, caption=caption[:1024] if caption else None, reply_to_message_id=int(reply_to) if reply_to else None, - message_thread_id=int(_anim_thread) if _anim_thread else None, + message_thread_id=self._message_thread_id_for_send(_anim_thread), ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: @@ -1766,12 +1945,23 @@ class TelegramAdapter(BasePlatformAdapter): """Send typing indicator.""" if self._bot: try: - _typing_thread = metadata.get("thread_id") if metadata else None - await self._bot.send_chat_action( - chat_id=int(chat_id), - action="typing", - message_thread_id=int(_typing_thread) if _typing_thread else None, - ) + _typing_thread = self._metadata_thread_id(metadata) + message_thread_id = self._message_thread_id_for_typing(_typing_thread) + try: + await self._bot.send_chat_action( + chat_id=int(chat_id), + action="typing", + message_thread_id=message_thread_id, + ) + except Exception as e: + if message_thread_id is not None and self._is_thread_not_found_error(e): + await self._bot.send_chat_action( + chat_id=int(chat_id), + action="typing", + message_thread_id=None, + ) + else: + raise except Exception as e: # Typing failures are non-fatal; log at debug level only. logger.debug( @@ -1839,6 +2029,12 @@ class TelegramAdapter(BasePlatformAdapter): text = content + # 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't + # render tables natively, but fenced code blocks render as + # monospace preformatted text with columns intact. The wrapped + # tables then flow through step (1) below as protected regions. + text = _wrap_markdown_tables(text) + # 1) Protect fenced code blocks (``` ... ```) # Per MarkdownV2 spec, \ and ` inside pre/code must be escaped. def _protect_fenced(m): @@ -2080,22 +2276,27 @@ class TelegramAdapter(BasePlatformAdapter): bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower() bot_id = getattr(self._bot, "id", None) + expected = f"@{bot_username}" if bot_username else None def _iter_sources(): yield getattr(message, "text", None) or "", getattr(message, "entities", None) or [] yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or [] + # Telegram parses mentions server-side and emits MessageEntity objects + # (type=mention for @username, type=text_mention for @FirstName targeting + # a user without a public username). Only those entities are authoritative — + # raw substring matches like "foo@hermes_bot.example" are not mentions + # (bug #12545). Entities also correctly handle @handles inside URLs, code + # blocks, and quoted text, where a regex scan would over-match. for source_text, entities in _iter_sources(): - if bot_username and f"@{bot_username}" in source_text.lower(): - return True for entity in entities: entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower() - if entity_type == "mention" and bot_username: + if entity_type == "mention" and expected: offset = int(getattr(entity, "offset", -1)) length = int(getattr(entity, "length", 0)) if offset < 0 or length <= 0: continue - if source_text[offset:offset + length].strip().lower() == f"@{bot_username}": + if source_text[offset:offset + length].strip().lower() == expected: return True elif entity_type == "text_mention": user = getattr(entity, "user", None) @@ -2165,7 +2366,7 @@ class TelegramAdapter(BasePlatformAdapter): if not self._should_process_message(update.message): return - event = self._build_message_event(update.message, MessageType.TEXT) + event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id) event.text = self._clean_bot_trigger_text(event.text) self._enqueue_text_event(event) @@ -2176,7 +2377,7 @@ class TelegramAdapter(BasePlatformAdapter): if not self._should_process_message(update.message, is_command=True): return - event = self._build_message_event(update.message, MessageType.COMMAND) + event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id) await self.handle_message(event) async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: @@ -2212,7 +2413,7 @@ class TelegramAdapter(BasePlatformAdapter): parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}") parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.") - event = self._build_message_event(msg, MessageType.LOCATION) + event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id) event.text = "\n".join(parts) await self.handle_message(event) @@ -2363,7 +2564,7 @@ class TelegramAdapter(BasePlatformAdapter): else: msg_type = MessageType.DOCUMENT - event = self._build_message_event(msg, msg_type) + event = self._build_message_event(msg, msg_type, update_id=update.update_id) # Add caption as text if msg.caption: @@ -2429,6 +2630,23 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as e: logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True) + elif msg.video: + try: + file_obj = await msg.video.get_file() + video_bytes = await file_obj.download_as_bytearray() + ext = ".mp4" + if getattr(file_obj, "file_path", None): + for candidate in SUPPORTED_VIDEO_TYPES: + if file_obj.file_path.lower().endswith(candidate): + ext = candidate + break + cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext) + event.media_urls = [cached_path] + event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")] + logger.info("[Telegram] Cached user video at %s", cached_path) + except Exception as e: + logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True) + # Download document files to cache for agent processing elif msg.document: doc = msg.document @@ -2445,6 +2663,21 @@ class TelegramAdapter(BasePlatformAdapter): mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()} ext = mime_to_ext.get(doc.mime_type, "") + if not ext and doc.mime_type: + video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()} + ext = video_mime_to_ext.get(doc.mime_type, "") + + if ext in SUPPORTED_VIDEO_TYPES: + file_obj = await doc.get_file() + video_bytes = await file_obj.download_as_bytearray() + cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext) + event.media_urls = [cached_path] + event.media_types = [SUPPORTED_VIDEO_TYPES[ext]] + event.message_type = MessageType.VIDEO + logger.info("[Telegram] Cached user video document at %s", cached_path) + await self.handle_message(event) + return + # Check if supported if ext not in SUPPORTED_DOCUMENT_TYPES: supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys())) @@ -2702,8 +2935,19 @@ class TelegramAdapter(BasePlatformAdapter): self.name, cache_key, thread_id, ) - def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent: - """Build a MessageEvent from a Telegram message.""" + def _build_message_event( + self, + message: Message, + msg_type: MessageType, + update_id: Optional[int] = None, + ) -> MessageEvent: + """Build a MessageEvent from a Telegram message. + + ``update_id`` is the ``Update.update_id`` from PTB; passing it through + lets ``/restart`` record the triggering offset so the new gateway + process can advance past it (prevents ``/restart`` being re-delivered + when PTB's graceful-shutdown ACK fails). + """ chat = message.chat user = message.from_user @@ -2716,7 +2960,9 @@ class TelegramAdapter(BasePlatformAdapter): # Resolve DM topic name and skill binding thread_id_raw = message.message_thread_id - thread_id_str = str(thread_id_raw) if thread_id_raw else None + thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None + if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False): + thread_id_str = self._GENERAL_TOPIC_THREAD_ID chat_topic = None topic_skill = None @@ -2752,8 +2998,8 @@ class TelegramAdapter(BasePlatformAdapter): chat_id=str(chat.id), chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None), chat_type=chat_type, - user_id=str(user.id) if user else None, - user_name=user.full_name if user else None, + user_id=str(user.id) if user else (str(chat.id) if chat_type == "dm" else None), + user_name=user.full_name if user else (chat.full_name if hasattr(chat, "full_name") and chat_type == "dm" else None), thread_id=thread_id_str, chat_topic=chat_topic, ) @@ -2765,15 +3011,26 @@ class TelegramAdapter(BasePlatformAdapter): reply_to_id = str(message.reply_to_message.message_id) reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None + # Per-channel/topic ephemeral prompt + from gateway.platforms.base import resolve_channel_prompt + _chat_id_str = str(chat.id) + _channel_prompt = resolve_channel_prompt( + self.config.extra, + thread_id_str or _chat_id_str, + _chat_id_str if thread_id_str else None, + ) + return MessageEvent( text=message.text or "", message_type=msg_type, source=source, raw_message=message, message_id=str(message.message_id), + platform_update_id=update_id, reply_to_message_id=reply_to_id, reply_to_text=reply_to_text, auto_skill=topic_skill, + channel_prompt=_channel_prompt, timestamp=message.date, ) diff --git a/gateway/platforms/telegram_network.py b/gateway/platforms/telegram_network.py index 4fca934ef8..ed2d60d797 100644 --- a/gateway/platforms/telegram_network.py +++ b/gateway/platforms/telegram_network.py @@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"] def _resolve_proxy_url() -> str | None: # Delegate to shared implementation (env vars + macOS system proxy detection) from gateway.platforms.base import resolve_proxy_url - return resolve_proxy_url() + return resolve_proxy_url("TELEGRAM_PROXY") class TelegramFallbackTransport(httpx.AsyncBaseTransport): diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py index c37445b17e..e3a736a451 100644 --- a/gateway/platforms/webhook.py +++ b/gateway/platforms/webhook.py @@ -13,6 +13,10 @@ Each route defines: - skills: optional list of skills to load for the agent - deliver: where to send the response (github_comment, telegram, etc.) - deliver_extra: additional delivery config (repo, pr_number, chat_id) + - deliver_only: if true, skip the agent — the rendered prompt IS the + message that gets delivered. Use for external push notifications + (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost + and sub-second delivery matter more than agent reasoning. Security: - HMAC secret is required per route (validated at startup) @@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter): f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'." ) + # deliver_only routes bypass the agent — the POST body becomes a + # direct push notification via the configured delivery target. + # Validate up-front so misconfiguration surfaces at startup rather + # than on the first webhook POST. + if route.get("deliver_only"): + deliver = route.get("deliver", "log") + if not deliver or deliver == "log": + raise ValueError( + f"[webhook] Route '{name}' has deliver_only=true but " + f"deliver is '{deliver}'. Direct delivery requires a " + f"real target (telegram, discord, slack, github_comment, etc.)." + ) + app = web.Application() app.router.add_get("/health", self._handle_health) app.router.add_post("/webhooks/{route_name}", self._handle_webhook) @@ -296,24 +313,14 @@ class WebhookAdapter(BasePlatformAdapter): {"error": "Payload too large"}, status=413 ) - # ── Rate limiting ──────────────────────────────────────── - now = time.time() - window = self._rate_counts.setdefault(route_name, []) - window[:] = [t for t in window if now - t < 60] - if len(window) >= self._rate_limit: - return web.json_response( - {"error": "Rate limit exceeded"}, status=429 - ) - window.append(now) - - # Read body + # Read body (must be done before any validation) try: raw_body = await request.read() except Exception as e: logger.error("[webhook] Failed to read body: %s", e) return web.json_response({"error": "Bad request"}, status=400) - # Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode) + # Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode) secret = route_config.get("secret", self._global_secret) if secret and secret != _INSECURE_NO_AUTH: if not self._validate_signature(request, raw_body, secret): @@ -324,6 +331,16 @@ class WebhookAdapter(BasePlatformAdapter): {"error": "Invalid signature"}, status=401 ) + # ── Rate limiting (after auth) ─────────────────────────── + now = time.time() + window = self._rate_counts.setdefault(route_name, []) + window[:] = [t for t in window if now - t < 60] + if len(window) >= self._rate_limit: + return web.json_response( + {"error": "Rate limit exceeded"}, status=429 + ) + window.append(now) + # Parse payload try: payload = json.loads(raw_body) @@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter): ) self._seen_deliveries[delivery_id] = now + # ── Direct delivery mode (deliver_only) ───────────────── + # Skip the agent entirely — the rendered prompt IS the message we + # deliver. Use case: external services (Supabase, monitoring, + # cron jobs, other agents) that need to push a plain notification + # to a user's chat with zero LLM cost. Reuses the same HMAC auth, + # rate limiting, idempotency, and template rendering as agent mode. + if route_config.get("deliver_only"): + delivery = { + "deliver": route_config.get("deliver", "log"), + "deliver_extra": self._render_delivery_extra( + route_config.get("deliver_extra", {}), payload + ), + "payload": payload, + } + logger.info( + "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s", + event_type, + route_name, + delivery["deliver"], + len(prompt), + delivery_id, + ) + try: + result = await self._direct_deliver(prompt, delivery) + except Exception: + logger.exception( + "[webhook] direct-deliver failed route=%s delivery=%s", + route_name, + delivery_id, + ) + return web.json_response( + {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id}, + status=502, + ) + + if result.success: + return web.json_response( + { + "status": "delivered", + "route": route_name, + "target": delivery["deliver"], + "delivery_id": delivery_id, + }, + status=200, + ) + # Delivery attempted but target rejected it — surface as 502 + # with a generic error (don't leak adapter-level detail). + logger.warning( + "[webhook] direct-deliver target rejected route=%s target=%s error=%s", + route_name, + delivery["deliver"], + result.error, + ) + return web.json_response( + {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id}, + status=502, + ) + # Use delivery_id in session key so concurrent webhooks on the # same route get independent agent runs (not queued/interrupted). session_chat_id = f"webhook:{route_name}:{delivery_id}" @@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter): # Response delivery # ------------------------------------------------------------------ + async def _direct_deliver( + self, content: str, delivery: dict + ) -> SendResult: + """Deliver *content* directly without invoking the agent. + + Used by ``deliver_only`` routes: the rendered template becomes the + literal message body, and we dispatch to the same delivery helpers + that the agent-mode ``send()`` flow uses. All target types that + work in agent mode work here — Telegram, Discord, Slack, GitHub + PR comments, etc. + """ + deliver_type = delivery.get("deliver", "log") + + if deliver_type == "log": + # Shouldn't reach here — startup validation rejects deliver_only + # with deliver=log — but guard defensively. + logger.info("[webhook] direct-deliver log-only: %s", content[:200]) + return SendResult(success=True) + + if deliver_type == "github_comment": + return await self._deliver_github_comment(content, delivery) + + # Fall through to the cross-platform dispatcher, which validates the + # target name and routes via the gateway runner. + return await self._deliver_cross_platform( + deliver_type, content, delivery + ) + async def _deliver_github_comment( self, content: str, delivery: dict ) -> SendResult: diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index d43fca6126..9e5dd04e0d 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -180,6 +180,8 @@ class WeComAdapter(BasePlatformAdapter): self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) self._pending_text_batches: Dict[str, MessageEvent] = {} self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {} + self._device_id = uuid.uuid4().hex + self._last_chat_req_ids: Dict[str, str] = {} # ------------------------------------------------------------------ # Connection lifecycle @@ -277,7 +279,11 @@ class WeComAdapter(BasePlatformAdapter): { "cmd": APP_CMD_SUBSCRIBE, "headers": {"req_id": req_id}, - "body": {"bot_id": self._bot_id, "secret": self._secret}, + "body": { + "bot_id": self._bot_id, + "secret": self._secret, + "device_id": self._device_id, + }, } ) @@ -496,6 +502,11 @@ class WeComAdapter(BasePlatformAdapter): logger.debug("[%s] DM sender %s blocked by policy", self.name, sender_id) return + # Cache the inbound req_id after policy checks so proactive sends to + # this chat can fall back to APP_CMD_RESPONSE (required for groups — + # WeCom AI Bots cannot initiate APP_CMD_SEND in group chats). + self._remember_chat_req_id(chat_id, self._payload_req_id(payload)) + text, reply_text = self._extract_text(body) media_urls, media_types = await self._extract_media(body) message_type = self._derive_message_type(body, text, media_types) @@ -847,6 +858,23 @@ class WeComAdapter(BasePlatformAdapter): while len(self._reply_req_ids) > DEDUP_MAX_SIZE: self._reply_req_ids.pop(next(iter(self._reply_req_ids))) + def _remember_chat_req_id(self, chat_id: str, req_id: str) -> None: + """Cache the most recent inbound req_id per chat. + + Used as a fallback reply target when we need to send into a group + without an explicit ``reply_to`` — WeCom AI Bots are blocked from + APP_CMD_SEND in groups and must use APP_CMD_RESPONSE bound to some + prior req_id. Bounded like _reply_req_ids so long-running gateways + don't leak memory across many chats. + """ + normalized_chat_id = str(chat_id or "").strip() + normalized_req_id = str(req_id or "").strip() + if not normalized_chat_id or not normalized_req_id: + return + self._last_chat_req_ids[normalized_chat_id] = normalized_req_id + while len(self._last_chat_req_ids) > DEDUP_MAX_SIZE: + self._last_chat_req_ids.pop(next(iter(self._last_chat_req_ids))) + def _reply_req_id_for_message(self, reply_to: Optional[str]) -> Optional[str]: normalized = str(reply_to or "").strip() if not normalized or normalized.startswith("quote:"): @@ -1163,19 +1191,15 @@ class WeComAdapter(BasePlatformAdapter): self._raise_for_wecom_error(response, "send media message") return response - async def _send_reply_stream(self, reply_req_id: str, content: str) -> Dict[str, Any]: + async def _send_reply_markdown(self, reply_req_id: str, content: str) -> Dict[str, Any]: response = await self._send_reply_request( reply_req_id, { - "msgtype": "stream", - "stream": { - "id": self._new_req_id("stream"), - "finish": True, - "content": content[:self.MAX_MESSAGE_LENGTH], - }, + "msgtype": "markdown", + "markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]}, }, ) - self._raise_for_wecom_error(response, "send reply stream") + self._raise_for_wecom_error(response, "send reply markdown") return response async def _send_reply_media_message( @@ -1235,6 +1259,9 @@ class WeComAdapter(BasePlatformAdapter): return SendResult(success=False, error=prepared["reject_reason"]) reply_req_id = self._reply_req_id_for_message(reply_to) + if not reply_req_id and chat_id in self._last_chat_req_ids: + reply_req_id = self._last_chat_req_ids[chat_id] + try: upload_result = await self._upload_media_bytes( prepared["data"], @@ -1302,8 +1329,12 @@ class WeComAdapter(BasePlatformAdapter): try: reply_req_id = self._reply_req_id_for_message(reply_to) + + if not reply_req_id and chat_id in self._last_chat_req_ids: + reply_req_id = self._last_chat_req_ids[chat_id] + if reply_req_id: - response = await self._send_reply_stream(reply_req_id, content) + response = await self._send_reply_markdown(reply_req_id, content) else: response = await self._send_request( APP_CMD_SEND, diff --git a/gateway/platforms/wecom_callback.py b/gateway/platforms/wecom_callback.py index 4bb67d5cfa..5440792dea 100644 --- a/gateway/platforms/wecom_callback.py +++ b/gateway/platforms/wecom_callback.py @@ -258,6 +258,20 @@ class WecomCallbackAdapter(BasePlatformAdapter): ) event = self._build_event(app, decrypted) if event is not None: + # Deduplicate: WeCom retries callbacks on timeout, + # producing duplicate inbound messages (#10305). + if event.message_id: + now = time.time() + if event.message_id in self._seen_messages: + if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS: + logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id) + return web.Response(text="success", content_type="text/plain") + del self._seen_messages[event.message_id] + self._seen_messages[event.message_id] = now + # Prune expired entries when cache grows large + if len(self._seen_messages) > 2000: + cutoff = now - MESSAGE_DEDUP_TTL_SECONDS + self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff} # Record which app this user belongs to. if event.source and event.source.user_id: map_key = self._user_app_key( diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index e5859e41a4..958e71da17 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -28,7 +28,7 @@ import uuid from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Tuple -from urllib.parse import quote +from urllib.parse import quote, urlparse logger = logging.getLogger(__name__) @@ -96,6 +96,28 @@ MEDIA_VIDEO = 2 MEDIA_FILE = 3 MEDIA_VOICE = 4 +_LIVE_ADAPTERS: Dict[str, Any] = {} + + +def _make_ssl_connector() -> Optional["aiohttp.TCPConnector"]: + """Return a TCPConnector with a certifi CA bundle, or None if certifi is unavailable. + + Tencent's iLink server (``ilinkai.weixin.qq.com``) is not verifiable against + some system CA stores (notably Homebrew's OpenSSL on macOS Apple Silicon). + When ``certifi`` is installed, use its Mozilla CA bundle to guarantee + verification. Otherwise fall back to aiohttp's default (which honors + ``SSL_CERT_FILE`` env var via ``trust_env=True``). + """ + try: + import ssl + import certifi + except ImportError: + return None + if not AIOHTTP_AVAILABLE: + return None + ssl_ctx = ssl.create_default_context(cafile=certifi.where()) + return aiohttp.TCPConnector(ssl=ssl_ctx) + ITEM_TEXT = 1 ITEM_IMAGE = 2 ITEM_VOICE = 3 @@ -398,7 +420,12 @@ async def _send_message( text: str, context_token: Optional[str], client_id: str, -) -> None: +) -> Dict[str, Any]: + """Send a text message via iLink sendmessage API. + + Returns the raw API response dict (may contain error codes like + ``errcode: -14`` for session expiry that the caller can inspect). + """ if not text or not text.strip(): raise ValueError("_send_message: text must not be empty") message: Dict[str, Any] = { @@ -411,7 +438,7 @@ async def _send_message( } if context_token: message["context_token"] = context_token - await _api_post( + return await _api_post( session, base_url=base_url, endpoint=EP_SEND_MESSAGE, @@ -533,6 +560,39 @@ async def _download_bytes( return await response.read() +_WEIXIN_CDN_ALLOWLIST: frozenset[str] = frozenset( + { + "novac2c.cdn.weixin.qq.com", + "ilinkai.weixin.qq.com", + "wx.qlogo.cn", + "thirdwx.qlogo.cn", + "res.wx.qq.com", + "mmbiz.qpic.cn", + "mmbiz.qlogo.cn", + } +) + + +def _assert_weixin_cdn_url(url: str) -> None: + """Raise ValueError if *url* does not point at a known WeChat CDN host.""" + try: + parsed = urlparse(url) + scheme = parsed.scheme.lower() + host = parsed.hostname or "" + except Exception as exc: # noqa: BLE001 + raise ValueError(f"Unparseable media URL: {url!r}") from exc + + if scheme not in ("http", "https"): + raise ValueError( + f"Media URL has disallowed scheme {scheme!r}; only http/https are permitted." + ) + if host not in _WEIXIN_CDN_ALLOWLIST: + raise ValueError( + f"Media URL host {host!r} is not in the WeChat CDN allowlist. " + "Refusing to fetch to prevent SSRF." + ) + + def _media_reference(item: Dict[str, Any], key: str) -> Dict[str, Any]: return (item.get(key) or {}).get("media") or {} @@ -553,6 +613,7 @@ async def _download_and_decrypt_media( timeout_seconds=timeout_seconds, ) elif full_url: + _assert_weixin_cdn_url(full_url) raw = await _download_bytes(session, url=full_url, timeout_seconds=timeout_seconds) else: raise RuntimeError("media item had neither encrypt_query_param nor full_url") @@ -623,42 +684,31 @@ def _rewrite_table_block_for_weixin(lines: List[str]) -> str: def _normalize_markdown_blocks(content: str) -> str: lines = content.splitlines() result: List[str] = [] - i = 0 in_code_block = False + blank_run = 0 - while i < len(lines): - line = lines[i].rstrip() - fence_match = _FENCE_RE.match(line.strip()) - if fence_match: + for raw_line in lines: + line = raw_line.rstrip() + if _FENCE_RE.match(line.strip()): in_code_block = not in_code_block result.append(line) - i += 1 + blank_run = 0 continue if in_code_block: result.append(line) - i += 1 continue - if ( - i + 1 < len(lines) - and "|" in lines[i] - and _TABLE_RULE_RE.match(lines[i + 1].rstrip()) - ): - table_lines = [lines[i].rstrip(), lines[i + 1].rstrip()] - i += 2 - while i < len(lines) and "|" in lines[i]: - table_lines.append(lines[i].rstrip()) - i += 1 - result.append(_rewrite_table_block_for_weixin(table_lines)) + if not line.strip(): + blank_run += 1 + if blank_run <= 1: + result.append("") continue - result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line))) - i += 1 + blank_run = 0 + result.append(line) - normalized = "\n".join(item.rstrip() for item in result) - normalized = re.sub(r"\n{3,}", "\n\n", normalized) - return normalized.strip() + return "\n".join(result).strip() def _split_markdown_blocks(content: str) -> List[str]: @@ -704,8 +754,8 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]: Weixin can render Markdown, but chat readability is better when top-level line breaks become separate messages. Keep fenced code blocks intact and - attach indented continuation lines to the previous top-level line so - transformed tables/lists do not get torn apart. + attach indented continuation lines to the previous top-level line so nested + list items do not get torn apart. """ units: List[str] = [] @@ -747,7 +797,9 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool: return False if line.startswith((" ", "\t")): return False - if stripped.startswith((">", "-", "*", "【")): + if stripped.startswith((">", "-", "*", "【", "#", "|")): + return False + if _TABLE_RULE_RE.match(stripped): return False if re.match(r"^\*\*[^*]+\*\*$", stripped): return False @@ -757,10 +809,12 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool: def _looks_like_heading_line_for_weixin(line: str) -> bool: - """Return True when a short line behaves like a plain-text heading.""" + """Return True when a short line behaves like a heading.""" stripped = line.strip() if not stripped: return False + if _HEADER_RE.match(stripped): + return True return len(stripped) <= 24 and stripped.endswith((":", ":")) @@ -935,7 +989,7 @@ async def qr_login( if not AIOHTTP_AVAILABLE: raise RuntimeError("aiohttp is required for Weixin QR login") - async with aiohttp.ClientSession(trust_env=True) as session: + async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session: try: qr_resp = await _api_get( session, @@ -953,6 +1007,10 @@ async def qr_login( logger.error("weixin: QR response missing qrcode") return None + # qrcode_url is the full scannable liteapp URL; qrcode_value is just the hex token + # WeChat needs to scan the full URL, not the raw hex string + qr_scan_data = qrcode_url if qrcode_url else qrcode_value + print("\n请使用微信扫描以下二维码:") if qrcode_url: print(qrcode_url) @@ -960,11 +1018,11 @@ async def qr_login( import qrcode qr = qrcode.QRCode() - qr.add_data(qrcode_url or qrcode_value) + qr.add_data(qr_scan_data) qr.make(fit=True) qr.print_ascii(invert=True) - except Exception: - print("(终端二维码渲染失败,请直接打开上面的二维码链接)") + except Exception as _qr_exc: + print(f"(终端二维码渲染失败: {_qr_exc},请直接打开上面的二维码链接)") deadline = time.time() + timeout_seconds current_base_url = ILINK_BASE_URL @@ -1010,8 +1068,17 @@ async def qr_login( ) qrcode_value = str(qr_resp.get("qrcode") or "") qrcode_url = str(qr_resp.get("qrcode_img_content") or "") + qr_scan_data = qrcode_url if qrcode_url else qrcode_value if qrcode_url: print(qrcode_url) + try: + import qrcode as _qrcode + qr = _qrcode.QRCode() + qr.add_data(qr_scan_data) + qr.make(fit=True) + qr.print_ascii(invert=True) + except Exception: + pass except Exception as exc: logger.error("weixin: QR refresh failed: %s", exc) return None @@ -1059,7 +1126,8 @@ class WeixinAdapter(BasePlatformAdapter): self._hermes_home = hermes_home self._token_store = ContextTokenStore(hermes_home) self._typing_cache = TypingTicketCache() - self._session: Optional[aiohttp.ClientSession] = None + self._poll_session: Optional[aiohttp.ClientSession] = None + self._send_session: Optional[aiohttp.ClientSession] = None self._poll_task: Optional[asyncio.Task] = None self._dedup = MessageDeduplicator(ttl_seconds=MESSAGE_DEDUP_TTL_SECONDS) @@ -1134,14 +1202,17 @@ class WeixinAdapter(BasePlatformAdapter): except Exception as exc: logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc) - self._session = aiohttp.ClientSession(trust_env=True) + self._poll_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) + self._send_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) self._token_store.restore(self._account_id) self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll") self._mark_connected() + _LIVE_ADAPTERS[self._token] = self logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url) return True async def disconnect(self) -> None: + _LIVE_ADAPTERS.pop(self._token, None) self._running = False if self._poll_task and not self._poll_task.done(): self._poll_task.cancel() @@ -1150,15 +1221,18 @@ class WeixinAdapter(BasePlatformAdapter): except asyncio.CancelledError: pass self._poll_task = None - if self._session and not self._session.closed: - await self._session.close() - self._session = None + if self._poll_session and not self._poll_session.closed: + await self._poll_session.close() + self._poll_session = None + if self._send_session and not self._send_session.closed: + await self._send_session.close() + self._send_session = None self._release_platform_lock() self._mark_disconnected() logger.info("[%s] Disconnected", self.name) async def _poll_loop(self) -> None: - assert self._session is not None + assert self._poll_session is not None sync_buf = _load_sync_buf(self._hermes_home, self._account_id) timeout_ms = LONG_POLL_TIMEOUT_MS consecutive_failures = 0 @@ -1166,7 +1240,7 @@ class WeixinAdapter(BasePlatformAdapter): while self._running: try: response = await _get_updates( - self._session, + self._poll_session, base_url=self._base_url, token=self._token, sync_buf=sync_buf, @@ -1223,7 +1297,7 @@ class WeixinAdapter(BasePlatformAdapter): logger.error("[%s] unhandled inbound error from=%s: %s", self.name, _safe_id(message.get("from_user_id")), exc, exc_info=True) async def _process_message(self, message: Dict[str, Any]) -> None: - assert self._session is not None + assert self._poll_session is not None sender_id = str(message.get("from_user_id") or "").strip() if not sender_id: return @@ -1316,7 +1390,7 @@ class WeixinAdapter(BasePlatformAdapter): media = _media_reference(item, "image_item") try: data = await _download_and_decrypt_media( - self._session, + self._poll_session, cdn_base_url=self._cdn_base_url, encrypted_query_param=media.get("encrypt_query_param"), aes_key_b64=(item.get("image_item") or {}).get("aeskey") @@ -1334,7 +1408,7 @@ class WeixinAdapter(BasePlatformAdapter): media = _media_reference(item, "video_item") try: data = await _download_and_decrypt_media( - self._session, + self._poll_session, cdn_base_url=self._cdn_base_url, encrypted_query_param=media.get("encrypt_query_param"), aes_key_b64=media.get("aes_key"), @@ -1353,7 +1427,7 @@ class WeixinAdapter(BasePlatformAdapter): mime = _mime_from_filename(filename) try: data = await _download_and_decrypt_media( - self._session, + self._poll_session, cdn_base_url=self._cdn_base_url, encrypted_query_param=media.get("encrypt_query_param"), aes_key_b64=media.get("aes_key"), @@ -1372,7 +1446,7 @@ class WeixinAdapter(BasePlatformAdapter): return None try: data = await _download_and_decrypt_media( - self._session, + self._poll_session, cdn_base_url=self._cdn_base_url, encrypted_query_param=media.get("encrypt_query_param"), aes_key_b64=media.get("aes_key"), @@ -1385,13 +1459,13 @@ class WeixinAdapter(BasePlatformAdapter): return None async def _maybe_fetch_typing_ticket(self, user_id: str, context_token: Optional[str]) -> None: - if not self._session or not self._token: + if not self._poll_session or not self._token: return if self._typing_cache.get(user_id): return try: response = await _get_config( - self._session, + self._poll_session, base_url=self._base_url, token=self._token, user_id=user_id, @@ -1416,12 +1490,19 @@ class WeixinAdapter(BasePlatformAdapter): context_token: Optional[str], client_id: str, ) -> None: - """Send a single text chunk with per-chunk retry and backoff.""" + """Send a single text chunk with per-chunk retry and backoff. + + On session-expired errors (errcode -14), automatically retries + *without* ``context_token`` — iLink accepts tokenless sends as a + degraded fallback, which keeps cron-initiated push messages working + even when no user message has refreshed the session recently. + """ last_error: Optional[Exception] = None + retried_without_token = False for attempt in range(self._send_chunk_retries + 1): try: - await _send_message( - self._session, + resp = await _send_message( + self._send_session, base_url=self._base_url, token=self._token, to=chat_id, @@ -1429,6 +1510,31 @@ class WeixinAdapter(BasePlatformAdapter): context_token=context_token, client_id=client_id, ) + # Check iLink response for session-expired error + if resp and isinstance(resp, dict): + ret = resp.get("ret") + errcode = resp.get("errcode") + if (ret is not None and ret not in (0,)) or (errcode is not None and errcode not in (0,)): + is_session_expired = ( + ret == SESSION_EXPIRED_ERRCODE + or errcode == SESSION_EXPIRED_ERRCODE + ) + # Session expired — strip token and retry once + if is_session_expired and not retried_without_token and context_token: + retried_without_token = True + context_token = None + self._token_store._cache.pop( + self._token_store._key(self._account_id, chat_id), None + ) + logger.warning( + "[%s] session expired for %s; retrying without context_token", + self.name, _safe_id(chat_id), + ) + continue + errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error" + raise RuntimeError( + f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}" + ) return except Exception as exc: last_error = exc @@ -1456,12 +1562,48 @@ class WeixinAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - if not self._session or not self._token: + if not self._send_session or not self._token: return SendResult(success=False, error="Not connected") context_token = self._token_store.get(self._account_id, chat_id) last_message_id: Optional[str] = None + + # Extract MEDIA: tags and bare local file paths before text delivery. + media_files, cleaned_content = self.extract_media(content) + _, image_cleaned = self.extract_images(cleaned_content) + local_files, final_content = self.extract_local_files(image_cleaned) + + _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"} + _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"} + _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"} + + async def _deliver_media(path: str, is_voice: bool = False) -> None: + ext = Path(path).suffix.lower() + if is_voice or ext in _AUDIO_EXTS: + await self.send_voice(chat_id=chat_id, audio_path=path, metadata=metadata) + elif ext in _VIDEO_EXTS: + await self.send_video(chat_id=chat_id, video_path=path, metadata=metadata) + elif ext in _IMAGE_EXTS: + await self.send_image_file(chat_id=chat_id, image_path=path, metadata=metadata) + else: + await self.send_document(chat_id=chat_id, file_path=path, metadata=metadata) + try: - chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()] + # Deliver extracted MEDIA: attachments first. + for media_path, is_voice in media_files: + try: + await _deliver_media(media_path, is_voice) + except Exception as exc: + logger.warning("[%s] media delivery failed for %s: %s", self.name, media_path, exc) + + # Deliver bare local file paths. + for file_path in local_files: + try: + await _deliver_media(file_path, is_voice=False) + except Exception as exc: + logger.warning("[%s] local file delivery failed for %s: %s", self.name, file_path, exc) + + # Deliver text content. + chunks = [c for c in self._split_text(self.format_message(final_content)) if c and c.strip()] for idx, chunk in enumerate(chunks): client_id = f"hermes-weixin-{uuid.uuid4().hex}" await self._send_text_chunk( @@ -1479,14 +1621,14 @@ class WeixinAdapter(BasePlatformAdapter): return SendResult(success=False, error=str(exc)) async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None: - if not self._session or not self._token: + if not self._send_session or not self._token: return typing_ticket = self._typing_cache.get(chat_id) if not typing_ticket: return try: await _send_typing( - self._session, + self._send_session, base_url=self._base_url, token=self._token, to_user_id=chat_id, @@ -1497,14 +1639,14 @@ class WeixinAdapter(BasePlatformAdapter): logger.debug("[%s] typing start failed for %s: %s", self.name, _safe_id(chat_id), exc) async def stop_typing(self, chat_id: str) -> None: - if not self._session or not self._token: + if not self._send_session or not self._token: return typing_ticket = self._typing_cache.get(chat_id) if not typing_ticket: return try: await _send_typing( - self._session, + self._send_session, base_url=self._base_url, token=self._token, to_user_id=chat_id, @@ -1542,24 +1684,35 @@ class WeixinAdapter(BasePlatformAdapter): async def send_image_file( self, chat_id: str, - path: str, - caption: str = "", + image_path: str, + caption: Optional[str] = None, reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, + **kwargs, ) -> SendResult: - return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata) + del reply_to, kwargs + return await self.send_document( + chat_id=chat_id, + file_path=image_path, + caption=caption, + metadata=metadata, + ) async def send_document( self, chat_id: str, file_path: str, - caption: str = "", + caption: Optional[str] = None, + file_name: Optional[str] = None, + reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, + **kwargs, ) -> SendResult: - if not self._session or not self._token: + del file_name, reply_to, metadata, kwargs + if not self._send_session or not self._token: return SendResult(success=False, error="Not connected") try: - message_id = await self._send_file(chat_id, file_path, caption) + message_id = await self._send_file(chat_id, file_path, caption or "") return SendResult(success=True, message_id=message_id) except Exception as exc: logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc) @@ -1573,7 +1726,7 @@ class WeixinAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - if not self._session or not self._token: + if not self._send_session or not self._token: return SendResult(success=False, error="Not connected") try: message_id = await self._send_file(chat_id, video_path, caption or "") @@ -1590,7 +1743,24 @@ class WeixinAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata) + if not self._send_session or not self._token: + return SendResult(success=False, error="Not connected") + + # Native outbound Weixin voice bubbles are not proven-working in the + # upstream reference implementation. Prefer a reliable file attachment + # fallback so users at least receive playable audio, even for .silk. + fallback_caption = caption or "[voice message as attachment]" + try: + message_id = await self._send_file( + chat_id, + audio_path, + fallback_caption, + force_file_attachment=True, + ) + return SendResult(success=True, message_id=message_id) + except Exception as exc: + logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc) + return SendResult(success=False, error=str(exc)) async def _download_remote_media(self, url: str) -> str: from tools.url_safety import is_safe_url @@ -1598,8 +1768,8 @@ class WeixinAdapter(BasePlatformAdapter): if not is_safe_url(url): raise ValueError(f"Blocked unsafe URL (SSRF protection): {url}") - assert self._session is not None - async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response: + assert self._send_session is not None + async with self._send_session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response: response.raise_for_status() data = await response.read() suffix = Path(url.split("?", 1)[0]).suffix or ".bin" @@ -1607,16 +1777,22 @@ class WeixinAdapter(BasePlatformAdapter): handle.write(data) return handle.name - async def _send_file(self, chat_id: str, path: str, caption: str) -> str: - assert self._session is not None and self._token is not None + async def _send_file( + self, + chat_id: str, + path: str, + caption: str, + force_file_attachment: bool = False, + ) -> str: + assert self._send_session is not None and self._token is not None plaintext = Path(path).read_bytes() - media_type, item_builder = self._outbound_media_builder(path) + media_type, item_builder = self._outbound_media_builder(path, force_file_attachment=force_file_attachment) filekey = secrets.token_hex(16) aes_key = secrets.token_bytes(16) rawsize = len(plaintext) rawfilemd5 = hashlib.md5(plaintext).hexdigest() upload_response = await _get_upload_url( - self._session, + self._send_session, base_url=self._base_url, token=self._token, to_user_id=chat_id, @@ -1642,30 +1818,34 @@ class WeixinAdapter(BasePlatformAdapter): raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}") encrypted_query_param = await _upload_ciphertext( - self._session, + self._send_session, ciphertext=ciphertext, upload_url=upload_url, ) - context_token = self._token_store.get(self._account_id, chat_id) # The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes). # Sending base64(raw_bytes) causes images to show as grey boxes on the # receiver side because the decryption key doesn't match. aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii") - media_item = item_builder( - encrypt_query_param=encrypted_query_param, - aes_key_for_api=aes_key_for_api, - ciphertext_size=len(ciphertext), - plaintext_size=rawsize, - filename=Path(path).name, - rawfilemd5=rawfilemd5, - ) + item_kwargs = { + "encrypt_query_param": encrypted_query_param, + "aes_key_for_api": aes_key_for_api, + "ciphertext_size": len(ciphertext), + "plaintext_size": rawsize, + "filename": Path(path).name, + "rawfilemd5": rawfilemd5, + } + if media_type == MEDIA_VOICE and path.endswith(".silk"): + item_kwargs["encode_type"] = 6 + item_kwargs["sample_rate"] = 24000 + item_kwargs["bits_per_sample"] = 16 + media_item = item_builder(**item_kwargs) last_message_id = None if caption: last_message_id = f"hermes-weixin-{uuid.uuid4().hex}" await _send_message( - self._session, + self._send_session, base_url=self._base_url, token=self._token, to=chat_id, @@ -1676,7 +1856,7 @@ class WeixinAdapter(BasePlatformAdapter): last_message_id = f"hermes-weixin-{uuid.uuid4().hex}" await _api_post( - self._session, + self._send_session, base_url=self._base_url, endpoint=EP_SEND_MESSAGE, payload={ @@ -1695,7 +1875,7 @@ class WeixinAdapter(BasePlatformAdapter): ) return last_message_id - def _outbound_media_builder(self, path: str): + def _outbound_media_builder(self, path: str, force_file_attachment: bool = False): mime = mimetypes.guess_type(path)[0] or "application/octet-stream" if mime.startswith("image/"): return MEDIA_IMAGE, lambda **kw: { @@ -1723,7 +1903,7 @@ class WeixinAdapter(BasePlatformAdapter): "video_md5": kw.get("rawfilemd5", ""), }, } - if mime.startswith("audio/") or path.endswith(".silk"): + if path.endswith(".silk") and not force_file_attachment: return MEDIA_VOICE, lambda **kw: { "type": ITEM_VOICE, "voice_item": { @@ -1732,9 +1912,25 @@ class WeixinAdapter(BasePlatformAdapter): "aes_key": kw["aes_key_for_api"], "encrypt_type": 1, }, + "encode_type": kw.get("encode_type"), + "bits_per_sample": kw.get("bits_per_sample"), + "sample_rate": kw.get("sample_rate"), "playtime": kw.get("playtime", 0), }, } + if mime.startswith("audio/"): + return MEDIA_FILE, lambda **kw: { + "type": ITEM_FILE, + "file_item": { + "media": { + "encrypt_query_param": kw["encrypt_query_param"], + "aes_key": kw["aes_key_for_api"], + "encrypt_type": 1, + }, + "file_name": kw["filename"], + "len": str(kw["plaintext_size"]), + }, + } return MEDIA_FILE, lambda **kw: { "type": ITEM_FILE, "file_item": { @@ -1784,7 +1980,34 @@ async def send_weixin_direct( token_store.restore(account_id) context_token = token_store.get(account_id, chat_id) - async with aiohttp.ClientSession(trust_env=True) as session: + live_adapter = _LIVE_ADAPTERS.get(resolved_token) + send_session = getattr(live_adapter, '_send_session', None) + if live_adapter is not None and send_session is not None and not send_session.closed: + last_result: Optional[SendResult] = None + cleaned = live_adapter.format_message(message) + if cleaned: + last_result = await live_adapter.send(chat_id, cleaned) + if not last_result.success: + return {"error": f"Weixin send failed: {last_result.error}"} + + for media_path, _is_voice in media_files or []: + ext = Path(media_path).suffix.lower() + if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}: + last_result = await live_adapter.send_image_file(chat_id, media_path) + else: + last_result = await live_adapter.send_document(chat_id, media_path) + if not last_result.success: + return {"error": f"Weixin media send failed: {last_result.error}"} + + return { + "success": True, + "platform": "weixin", + "chat_id": chat_id, + "message_id": last_result.message_id if last_result else None, + "context_token_used": bool(context_token), + } + + async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session: adapter = WeixinAdapter( PlatformConfig( enabled=True, @@ -1797,6 +2020,7 @@ async def send_weixin_direct( }, ) ) + adapter._send_session = session adapter._session = session adapter._token = resolved_token adapter._account_id = account_id diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index d1de5b8568..b998da345e 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -289,33 +289,35 @@ class WhatsAppAdapter(BasePlatformAdapter): logger.info("[%s] Bridge found at %s", self.name, bridge_path) # Acquire scoped lock to prevent duplicate sessions + lock_acquired = False try: if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'): return False + lock_acquired = True except Exception as e: logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e) - # Auto-install npm dependencies if node_modules doesn't exist - bridge_dir = bridge_path.parent - if not (bridge_dir / "node_modules").exists(): - print(f"[{self.name}] Installing WhatsApp bridge dependencies...") - try: - install_result = subprocess.run( - ["npm", "install", "--silent"], - cwd=str(bridge_dir), - capture_output=True, - text=True, - timeout=60, - ) - if install_result.returncode != 0: - print(f"[{self.name}] npm install failed: {install_result.stderr}") - return False - print(f"[{self.name}] Dependencies installed") - except Exception as e: - print(f"[{self.name}] Failed to install dependencies: {e}") - return False - try: + # Auto-install npm dependencies if node_modules doesn't exist + bridge_dir = bridge_path.parent + if not (bridge_dir / "node_modules").exists(): + print(f"[{self.name}] Installing WhatsApp bridge dependencies...") + try: + install_result = subprocess.run( + ["npm", "install", "--silent"], + cwd=str(bridge_dir), + capture_output=True, + text=True, + timeout=60, + ) + if install_result.returncode != 0: + print(f"[{self.name}] npm install failed: {install_result.stderr}") + return False + print(f"[{self.name}] Dependencies installed") + except Exception as e: + print(f"[{self.name}] Failed to install dependencies: {e}") + return False + # Ensure session directory exists self._session_path.mkdir(parents=True, exist_ok=True) @@ -452,10 +454,13 @@ class WhatsAppAdapter(BasePlatformAdapter): return True except Exception as e: - self._release_platform_lock() logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True) - self._close_bridge_log() return False + finally: + if not self._running: + if lock_acquired: + self._release_platform_lock() + self._close_bridge_log() def _close_bridge_log(self) -> None: """Close the bridge log file handle if open.""" @@ -655,6 +660,8 @@ class WhatsAppAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent message via the WhatsApp bridge.""" if not self._running or not self._http_session: @@ -766,6 +773,17 @@ class WhatsAppAdapter(BasePlatformAdapter): """Send a video natively via bridge — plays inline in WhatsApp.""" return await self._send_media_to_bridge(chat_id, video_path, "video", caption) + async def send_voice( + self, + chat_id: str, + audio_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send an audio file as a WhatsApp voice message via bridge.""" + return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption) + async def send_document( self, chat_id: str, diff --git a/gateway/run.py b/gateway/run.py index da3560cf74..eb0dfe237f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -24,10 +24,20 @@ import signal import tempfile import threading import time +from collections import OrderedDict +from contextvars import copy_context from pathlib import Path from datetime import datetime from typing import Dict, Optional, Any, List +# --- Agent cache tuning --------------------------------------------------- +# Bounds the per-session AIAgent cache to prevent unbounded growth in +# long-lived gateways (each AIAgent holds LLM clients, tool schemas, +# memory providers, etc.). LRU order + idle TTL eviction are enforced +# from _enforce_agent_cache_cap() and _session_expiry_watcher() below. +_AGENT_CACHE_MAX_SIZE = 128 +_AGENT_CACHE_IDLE_TTL_SECS = 3600.0 # evict agents idle for >1h + # --------------------------------------------------------------------------- # SSL certificate auto-detection for NixOS and other non-standard systems. # Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported. @@ -86,6 +96,10 @@ from hermes_cli.env_loader import load_hermes_dotenv _env_path = _hermes_home / '.env' load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env') + +_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P.+):(?P/[^:]+?)(?::(?P[^:]+))?$") +_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"} + # Bridge config.yaml values into the environment so os.getenv() picks them up. # config.yaml is authoritative for terminal settings — overrides .env. _config_path = _hermes_home / 'config.yaml' @@ -130,6 +144,12 @@ if _config_path.exists(): for _cfg_key, _env_var in _terminal_env_map.items(): if _cfg_key in _terminal_cfg: _val = _terminal_cfg[_cfg_key] + # Skip cwd placeholder values (".", "auto", "cwd") — the + # gateway resolves these to Path.home() later (line ~255). + # Writing the raw placeholder here would just be noise. + # Only bridge explicit absolute paths from config.yaml. + if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"): + continue if isinstance(_val, list): os.environ[_env_var] = json.dumps(_val) else: @@ -224,6 +244,13 @@ try: except Exception: pass +# Warn if user has deprecated MESSAGING_CWD / TERMINAL_CWD in .env +try: + from hermes_cli.config import warn_deprecated_cwd_env_vars + warn_deprecated_cwd_env_vars() +except Exception: + pass + # Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs) os.environ["HERMES_QUIET"] = "1" @@ -231,12 +258,14 @@ os.environ["HERMES_QUIET"] = "1" os.environ["HERMES_EXEC_ASK"] = "1" # Set terminal working directory for messaging platforms. -# If the user set an explicit path in config.yaml (not "." or "auto"), -# respect it. Otherwise use MESSAGING_CWD or default to home directory. +# config.yaml terminal.cwd is the canonical source (bridged to TERMINAL_CWD +# by the config bridge above). When it's unset or a placeholder, default +# to home directory. MESSAGING_CWD is accepted as a backward-compat +# fallback (deprecated — the warning above tells users to migrate). _configured_cwd = os.environ.get("TERMINAL_CWD", "") if not _configured_cwd or _configured_cwd in (".", "auto", "cwd"): - messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home()) - os.environ["TERMINAL_CWD"] = messaging_cwd + _fallback = os.getenv("MESSAGING_CWD") or str(Path.home()) + os.environ["TERMINAL_CWD"] = _fallback from gateway.config import ( Platform, @@ -373,6 +402,33 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None: return adapter.get_pending_message(session_key) +_INTERRUPT_REASON_STOP = "Stop requested" +_INTERRUPT_REASON_RESET = "Session reset requested" +_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)" +_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected" +_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down" +_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting" + +_CONTROL_INTERRUPT_MESSAGES = frozenset( + { + _INTERRUPT_REASON_STOP.lower(), + _INTERRUPT_REASON_RESET.lower(), + _INTERRUPT_REASON_TIMEOUT.lower(), + _INTERRUPT_REASON_SSE_DISCONNECT.lower(), + _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(), + _INTERRUPT_REASON_GATEWAY_RESTART.lower(), + } +) + + +def _is_control_interrupt_message(message: Optional[str]) -> bool: + """Return True when an interrupt message is internal control flow.""" + if not message: + return False + normalized = " ".join(str(message).strip().split()).lower() + return normalized in _CONTROL_INTERRUPT_MESSAGES + + def _check_unavailable_skill(command_name: str) -> str | None: """Check if a command matches a known-but-inactive skill. @@ -482,6 +538,32 @@ def _resolve_hermes_bin() -> Optional[list[str]]: return None +def _parse_session_key(session_key: str) -> "dict | None": + """Parse a session key into its component parts. + + Session keys follow the format + ``agent:main:{platform}:{chat_type}:{chat_id}[:{extra}...]``. + Returns a dict with ``platform``, ``chat_type``, ``chat_id``, and + optionally ``thread_id`` keys, or None if the key doesn't match. + + The 6th element is only returned as ``thread_id`` for chat types where + it is unambiguous (``dm`` and ``thread``). For group/channel sessions + the suffix may be a user_id (per-user isolation) rather than a + thread_id, so we leave ``thread_id`` out to avoid mis-routing. + """ + parts = session_key.split(":") + if len(parts) >= 5 and parts[0] == "agent" and parts[1] == "main": + result = { + "platform": parts[2], + "chat_type": parts[3], + "chat_id": parts[4], + } + if len(parts) > 5 and parts[3] in ("dm", "thread"): + result["thread_id"] = parts[5] + return result + return None + + def _format_gateway_process_notification(evt: dict) -> "str | None": """Format a watch pattern event from completion_queue into a [SYSTEM:] message.""" evt_type = evt.get("type", "completion") @@ -534,6 +616,7 @@ class GatewayRunner: def __init__(self, config: Optional[GatewayConfig] = None): self.config = config or load_gateway_config() self.adapters: Dict[Platform, BasePlatformAdapter] = {} + self._warn_if_docker_media_delivery_is_risky() # Load ephemeral config from config.yaml / env vars. # Both are injected at API-call time only and never persisted. @@ -546,7 +629,6 @@ class GatewayRunner: self._restart_drain_timeout = self._load_restart_drain_timeout() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() - self._smart_model_routing = self._load_smart_model_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -573,14 +655,21 @@ class GatewayRunner: self._running_agents: Dict[str, Any] = {} self._running_agents_ts: Dict[str, float] = {} # start timestamp per session self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt + self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce) + self._session_run_generation: Dict[str, int] = {} # Cache AIAgent instances per session to preserve prompt caching. # Without this, a new AIAgent is created per message, rebuilding the # system prompt (including memory) every turn — breaking prefix cache # and costing ~10x more on providers with prompt caching (Anthropic). # Key: session_key, Value: (AIAgent, config_signature_str) + # + # OrderedDict so _enforce_agent_cache_cap() can pop the least-recently- + # used entry (move_to_end() on cache hits, popitem(last=False) for + # eviction). Hard cap via _AGENT_CACHE_MAX_SIZE, idle TTL enforced + # from _session_expiry_watcher(). import threading as _threading - self._agent_cache: Dict[str, tuple] = {} + self._agent_cache: "OrderedDict[str, tuple]" = OrderedDict() self._agent_cache_lock = _threading.Lock() # Per-session model overrides from /model command. @@ -634,6 +723,53 @@ class GatewayRunner: self._background_tasks: set = set() + def _warn_if_docker_media_delivery_is_risky(self) -> None: + """Warn when Docker-backed gateways lack an explicit export mount. + + MEDIA delivery happens in the gateway process, so paths emitted by the model + must be readable from the host. A plain container-local path like + `/workspace/report.txt` or `/output/report.txt` often exists only inside + Docker, so users commonly need a dedicated export mount such as + `host-dir:/output`. + """ + if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker": + return + + connected = self.config.get_connected_platforms() + messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}] + if not messaging_platforms: + return + + raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip() + volumes: List[str] = [] + if raw_volumes: + try: + parsed = json.loads(raw_volumes) + if isinstance(parsed, list): + volumes = [str(v) for v in parsed if isinstance(v, str)] + except Exception: + logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True) + + has_explicit_output_mount = False + for spec in volumes: + match = _DOCKER_VOLUME_SPEC_RE.match(spec) + if not match: + continue + container_path = match.group("container") + if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS: + has_explicit_output_mount = True + break + + if has_explicit_output_mount: + return + + logger.warning( + "Docker backend is enabled for the messaging gateway but no explicit host-visible " + "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. " + "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail " + "for container-local paths like '/workspace/...' or '/output/...'." + ) + # -- Setup skill availability ---------------------------------------- @@ -650,6 +786,10 @@ class GatewayRunner: _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json" + def _voice_key(self, platform: Platform, chat_id: str) -> str: + """Return a platform-namespaced key for voice mode state.""" + return f"{platform.value}:{chat_id}" + def _load_voice_modes(self) -> Dict[str, str]: try: data = json.loads(self._VOICE_MODE_PATH.read_text()) @@ -660,11 +800,21 @@ class GatewayRunner: return {} valid_modes = {"off", "voice_only", "all"} - return { - str(chat_id): mode - for chat_id, mode in data.items() - if mode in valid_modes - } + result = {} + for chat_id, mode in data.items(): + if mode not in valid_modes: + continue + key = str(chat_id) + # Skip legacy unprefixed keys (warn and skip) + if ":" not in key: + logger.warning( + "Skipping legacy unprefixed voice mode key %r during migration. " + "Re-enable voice mode on that chat to rebuild the prefixed key.", + key, + ) + continue + result[key] = mode + return result def _save_voice_modes(self) -> None: try: @@ -690,11 +840,36 @@ class GatewayRunner: disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None) if not isinstance(disabled_chats, set): return + platform = getattr(adapter, "platform", None) + if not isinstance(platform, Platform): + return disabled_chats.clear() + prefix = f"{platform.value}:" disabled_chats.update( - chat_id for chat_id, mode in self._voice_mode.items() if mode == "off" + key[len(prefix):] for key, mode in self._voice_mode.items() + if mode == "off" and key.startswith(prefix) ) + async def _safe_adapter_disconnect(self, adapter, platform) -> None: + """Call adapter.disconnect() defensively, swallowing any error. + + Used when adapter.connect() failed or raised — the adapter may + have allocated partial resources (aiohttp.ClientSession, poll + tasks, child subprocesses) that would otherwise leak and surface + as "Unclosed client session" warnings at process exit. + + Must tolerate partial-init state and never raise, since callers + use it inside error-handling blocks. + """ + try: + await adapter.disconnect() + except Exception as e: + logger.debug( + "Defensive %s disconnect after failed connect raised: %s", + platform.value if platform is not None else "adapter", + e, + ) + # ----------------------------------------------------------------- def _flush_memories_for_session( @@ -734,69 +909,72 @@ class GatewayRunner: enabled_toolsets=["memory", "skills"], session_id=old_session_id, ) - # Fully silence the flush agent — quiet_mode only suppresses init - # messages; tool call output still leaks to the terminal through - # _safe_print → _print_fn. Set a no-op to prevent that. - tmp_agent._print_fn = lambda *a, **kw: None - - # Build conversation history from transcript - msgs = [ - {"role": m.get("role"), "content": m.get("content")} - for m in history - if m.get("role") in ("user", "assistant") and m.get("content") - ] - - # Read live memory state from disk so the flush agent can see - # what's already saved and avoid overwriting newer entries. - _current_memory = "" try: - from tools.memory_tool import get_memory_dir - _mem_dir = get_memory_dir() - for fname, label in [ - ("MEMORY.md", "MEMORY (your personal notes)"), - ("USER.md", "USER PROFILE (who the user is)"), - ]: - fpath = _mem_dir / fname - if fpath.exists(): - content = fpath.read_text(encoding="utf-8").strip() - if content: - _current_memory += f"\n\n## Current {label}:\n{content}" - except Exception: - pass # Non-fatal — flush still works, just without the guard + # Fully silence the flush agent — quiet_mode only suppresses init + # messages; tool call output still leaks to the terminal through + # _safe_print → _print_fn. Set a no-op to prevent that. + tmp_agent._print_fn = lambda *a, **kw: None - # Give the agent a real turn to think about what to save - flush_prompt = ( - "[System: This session is about to be automatically reset due to " - "inactivity or a scheduled daily reset. The conversation context " - "will be cleared after this turn.\n\n" - "Review the conversation above and:\n" - "1. Save any important facts, preferences, or decisions to memory " - "(user profile or your notes) that would be useful in future sessions.\n" - "2. If you discovered a reusable workflow or solved a non-trivial " - "problem, consider saving it as a skill.\n" - "3. If nothing is worth saving, that's fine — just skip.\n\n" - ) + # Build conversation history from transcript + msgs = [ + {"role": m.get("role"), "content": m.get("content")} + for m in history + if m.get("role") in ("user", "assistant") and m.get("content") + ] - if _current_memory: - flush_prompt += ( - "IMPORTANT — here is the current live state of memory. Other " - "sessions, cron jobs, or the user may have updated it since this " - "conversation ended. Do NOT overwrite or remove entries unless " - "the conversation above reveals something that genuinely " - "supersedes them. Only add new information that is not already " - "captured below." - f"{_current_memory}\n\n" + # Read live memory state from disk so the flush agent can see + # what's already saved and avoid overwriting newer entries. + _current_memory = "" + try: + from tools.memory_tool import get_memory_dir + _mem_dir = get_memory_dir() + for fname, label in [ + ("MEMORY.md", "MEMORY (your personal notes)"), + ("USER.md", "USER PROFILE (who the user is)"), + ]: + fpath = _mem_dir / fname + if fpath.exists(): + content = fpath.read_text(encoding="utf-8").strip() + if content: + _current_memory += f"\n\n## Current {label}:\n{content}" + except Exception: + pass # Non-fatal — flush still works, just without the guard + + # Give the agent a real turn to think about what to save + flush_prompt = ( + "[System: This session is about to be automatically reset due to " + "inactivity or a scheduled daily reset. The conversation context " + "will be cleared after this turn.\n\n" + "Review the conversation above and:\n" + "1. Save any important facts, preferences, or decisions to memory " + "(user profile or your notes) that would be useful in future sessions.\n" + "2. If you discovered a reusable workflow or solved a non-trivial " + "problem, consider saving it as a skill.\n" + "3. If nothing is worth saving, that's fine — just skip.\n\n" ) - flush_prompt += ( - "Do NOT respond to the user. Just use the memory and skill_manage " - "tools if needed, then stop.]" - ) + if _current_memory: + flush_prompt += ( + "IMPORTANT — here is the current live state of memory. Other " + "sessions, cron jobs, or the user may have updated it since this " + "conversation ended. Do NOT overwrite or remove entries unless " + "the conversation above reveals something that genuinely " + "supersedes them. Only add new information that is not already " + "captured below." + f"{_current_memory}\n\n" + ) - tmp_agent.run_conversation( - user_message=flush_prompt, - conversation_history=msgs, - ) + flush_prompt += ( + "Do NOT respond to the user. Just use the memory and skill_manage " + "tools if needed, then stop.]" + ) + + tmp_agent.run_conversation( + user_message=flush_prompt, + conversation_history=msgs, + ) + finally: + self._cleanup_agent_resources(tmp_agent) logger.info("Pre-reset memory flush completed for session %s", old_session_id) except Exception as e: logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e) @@ -807,7 +985,7 @@ class GatewayRunner: session_key: Optional[str] = None, ): """Run the sync memory flush in a thread pool so it won't block the event loop.""" - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() await loop.run_in_executor( None, self._flush_memories_for_session, @@ -922,11 +1100,16 @@ class GatewayRunner: return model, runtime_kwargs def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: - from agent.smart_model_routing import resolve_turn_route + """Build the effective model/runtime config for a single turn. + + Always uses the session's primary model/provider. If `/fast` is + enabled and the model supports Priority Processing / Anthropic fast + mode, attach `request_overrides` so the API call is marked + accordingly. + """ from hermes_cli.models import resolve_fast_mode_overrides - primary = { - "model": model, + runtime = { "api_key": runtime_kwargs.get("api_key"), "base_url": runtime_kwargs.get("base_url"), "provider": runtime_kwargs.get("provider"), @@ -935,7 +1118,18 @@ class GatewayRunner: "args": list(runtime_kwargs.get("args") or []), "credential_pool": runtime_kwargs.get("credential_pool"), } - route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + route = { + "model": model, + "runtime": runtime, + "signature": ( + model, + runtime["provider"], + runtime["base_url"], + runtime["api_mode"], + runtime["command"], + tuple(runtime["args"]), + ), + } service_tier = getattr(self, "_service_tier", None) if not service_tier: @@ -943,7 +1137,7 @@ class GatewayRunner: return route try: - overrides = resolve_fast_mode_overrides(route.get("model")) + overrides = resolve_fast_mode_overrides(route["model"]) except Exception: overrides = None route["request_overrides"] = overrides @@ -1301,20 +1495,6 @@ class GatewayRunner: pass return None - @staticmethod - def _load_smart_model_routing() -> dict: - """Load optional smart cheap-vs-strong model routing config.""" - try: - import yaml as _y - cfg_path = _hermes_home / "config.yaml" - if cfg_path.exists(): - with open(cfg_path, encoding="utf-8") as _f: - cfg = _y.safe_load(_f) or {} - return cfg.get("smart_model_routing", {}) or {} - except Exception: - pass - return {} - def _snapshot_running_agents(self) -> Dict[str, Any]: return { session_key: agent @@ -1329,26 +1509,100 @@ class GatewayRunner: merge_pending_message_event(adapter._pending_messages, session_key, event) async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool: - if not self._draining: - return False + # --- Draining case (gateway restarting/stopping) --- + if self._draining: + adapter = self.adapters.get(event.source.platform) + if not adapter: + return True + + thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None + if self._queue_during_drain_enabled(): + self._queue_or_replace_pending_event(session_key, event) + message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back." + else: + message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now." + + await adapter._send_with_retry( + chat_id=event.source.chat_id, + content=message, + reply_to=event.message_id, + metadata=thread_meta, + ) + return True + + # --- Normal busy case (agent actively running a task) --- + # The user sent a message while the agent is working. Interrupt the + # agent immediately so it stops the current tool-calling loop and + # processes the new message. The pending message is stored in the + # adapter so the base adapter picks it up once the interrupted run + # returns. A brief ack tells the user what's happening (debounced + # to avoid spam when they fire multiple messages quickly). adapter = self.adapters.get(event.source.platform) if not adapter: - return True + return False # let default path handle it + + # Store the message so it's processed as the next turn after the + # interrupt causes the current run to exit. + from gateway.platforms.base import merge_pending_message_event + merge_pending_message_event(adapter._pending_messages, session_key, event) + + # Interrupt the running agent — this aborts in-flight tool calls and + # causes the agent loop to exit at the next check point. + running_agent = self._running_agents.get(session_key) + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + try: + running_agent.interrupt(event.text) + except Exception: + pass # don't let interrupt failure block the ack + + # Debounce: only send an acknowledgment once every 30 seconds per session + # to avoid spamming the user when they send multiple messages quickly + _BUSY_ACK_COOLDOWN = 30 + now = time.time() + last_ack = self._busy_ack_ts.get(session_key, 0) + if now - last_ack < _BUSY_ACK_COOLDOWN: + return True # interrupt sent, ack already delivered recently + + self._busy_ack_ts[session_key] = now + + # Build a status-rich acknowledgment + status_parts = [] + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + try: + summary = running_agent.get_activity_summary() + iteration = summary.get("api_call_count", 0) + max_iter = summary.get("max_iterations", 0) + current_tool = summary.get("current_tool") + start_ts = self._running_agents_ts.get(session_key, 0) + if start_ts: + elapsed_min = int((now - start_ts) / 60) + if elapsed_min > 0: + status_parts.append(f"{elapsed_min} min elapsed") + if max_iter: + status_parts.append(f"iteration {iteration}/{max_iter}") + if current_tool: + status_parts.append(f"running: {current_tool}") + except Exception: + pass + + status_detail = f" ({', '.join(status_parts)})" if status_parts else "" + message = ( + f"⚡ Interrupting current task{status_detail}. " + f"I'll respond to your message shortly." + ) thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None - if self._queue_during_drain_enabled(): - self._queue_or_replace_pending_event(session_key, event) - message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back." - else: - message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now." + try: + await adapter._send_with_retry( + chat_id=event.source.chat_id, + content=message, + reply_to=event.message_id, + metadata=thread_meta, + ) + except Exception as e: + logger.debug("Failed to send busy-ack: %s", e) - await adapter._send_with_retry( - chat_id=event.source.chat_id, - content=message, - reply_to=event.message_id, - metadata=thread_meta, - ) return True async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]: @@ -1405,7 +1659,7 @@ class GatewayRunner: action = "restarting" if self._restart_requested else "shutting down" hint = ( "Your current task will be interrupted. " - "Use /retry after restart to continue." + "Send any message after restart and I'll try to resume where you left off." if self._restart_requested else "Your current task will be interrupted." ) @@ -1413,13 +1667,32 @@ class GatewayRunner: notified: set = set() for session_key in active: - # Parse platform + chat_id from the session key. - # Format: agent:main:{platform}:{chat_type}:{chat_id}[:{extra}...] - parts = session_key.split(":") - if len(parts) < 5: - continue - platform_str = parts[2] - chat_id = parts[4] + source = None + try: + if getattr(self, "session_store", None) is not None: + self.session_store._ensure_loaded() + entry = self.session_store._entries.get(session_key) + source = getattr(entry, "origin", None) if entry else None + except Exception as e: + logger.debug( + "Failed to load session origin for shutdown notification %s: %s", + session_key, + e, + ) + + if source is not None: + platform_str = source.platform.value + chat_id = source.chat_id + thread_id = source.thread_id + else: + # Fall back to parsing the session key when no persisted + # origin is available (legacy sessions/tests). + _parsed = _parse_session_key(session_key) + if not _parsed: + continue + platform_str = _parsed["platform"] + chat_id = _parsed["chat_id"] + thread_id = _parsed.get("thread_id") # Deduplicate: one notification per chat, even if multiple # sessions (different users/threads) share the same chat. @@ -1435,7 +1708,6 @@ class GatewayRunner: # Include thread_id if present so the message lands in the # correct forum topic / thread. - thread_id = parts[5] if len(parts) > 5 else None metadata = {"thread_id": thread_id} if thread_id else None await adapter.send(chat_id, msg, metadata=metadata) @@ -1461,20 +1733,126 @@ class GatewayRunner: ) except Exception: pass + self._cleanup_agent_resources(agent) + + def _cleanup_agent_resources(self, agent: Any) -> None: + """Best-effort cleanup for temporary or cached agent instances.""" + if agent is None: + return + try: + if hasattr(agent, "shutdown_memory_provider"): + agent.shutdown_memory_provider() + except Exception: + pass + # Close tool resources (terminal sandboxes, browser daemons, + # background processes, httpx clients) to prevent zombie + # process accumulation. + try: + if hasattr(agent, "close"): + agent.close() + except Exception: + pass + + _STUCK_LOOP_THRESHOLD = 3 # restarts while active before auto-suspend + _STUCK_LOOP_FILE = ".restart_failure_counts" + + def _increment_restart_failure_counts(self, active_session_keys: set) -> None: + """Increment restart-failure counters for sessions active at shutdown. + + Persists to a JSON file so counters survive across restarts. + Sessions NOT in active_session_keys are removed (they completed + successfully, so the loop is broken). + """ + import json + + path = _hermes_home / self._STUCK_LOOP_FILE + try: + counts = json.loads(path.read_text()) if path.exists() else {} + except Exception: + counts = {} + + # Increment active sessions, remove inactive ones (loop broken) + new_counts = {} + for key in active_session_keys: + new_counts[key] = counts.get(key, 0) + 1 + # Keep any entries that are still above 0 even if not active now + # (they might become active again next restart) + + try: + path.write_text(json.dumps(new_counts)) + except Exception: + pass + + def _suspend_stuck_loop_sessions(self) -> int: + """Suspend sessions that have been active across too many restarts. + + Returns the number of sessions suspended. Called on gateway startup + AFTER suspend_recently_active() to catch the stuck-loop pattern: + session loads → agent gets stuck → gateway restarts → repeat. + """ + import json + + path = _hermes_home / self._STUCK_LOOP_FILE + if not path.exists(): + return 0 + + try: + counts = json.loads(path.read_text()) + except Exception: + return 0 + + suspended = 0 + stuck_keys = [k for k, v in counts.items() if v >= self._STUCK_LOOP_THRESHOLD] + + for session_key in stuck_keys: try: - if hasattr(agent, "shutdown_memory_provider"): - agent.shutdown_memory_provider() + entry = self.session_store._entries.get(session_key) + if entry and not entry.suspended: + entry.suspended = True + suspended += 1 + logger.warning( + "Auto-suspended stuck session %s (active across %d " + "consecutive restarts — likely a stuck loop)", + session_key[:30], counts[session_key], + ) except Exception: pass - # Close tool resources (terminal sandboxes, browser daemons, - # background processes, httpx clients) to prevent zombie - # process accumulation. + + if suspended: try: - if hasattr(agent, 'close'): - agent.close() + self.session_store._save() except Exception: pass + # Clear the file — counters start fresh after suspension + try: + path.unlink(missing_ok=True) + except Exception: + pass + + return suspended + + def _clear_restart_failure_count(self, session_key: str) -> None: + """Clear the restart-failure counter for a session that completed OK. + + Called after a successful agent turn to signal the loop is broken. + """ + import json + + path = _hermes_home / self._STUCK_LOOP_FILE + if not path.exists(): + return + try: + counts = json.loads(path.read_text()) + if session_key in counts: + del counts[session_key] + if counts: + path.write_text(json.dumps(counts)) + else: + path.unlink(missing_ok=True) + except Exception: + pass + async def _launch_detached_restart_command(self) -> None: import shutil import subprocess @@ -1618,6 +1996,17 @@ class GatewayRunner: except Exception as e: logger.warning("Session suspension on startup failed: %s", e) + # Stuck-loop detection (#7536): if a session has been active across + # 3+ consecutive restarts, it's probably stuck in a loop (the same + # history keeps causing the agent to hang). Auto-suspend it so the + # user gets a clean slate on the next message. + try: + stuck = self._suspend_stuck_loop_sessions() + if stuck: + logger.warning("Auto-suspended %d stuck-loop session(s)", stuck) + except Exception as e: + logger.debug("Stuck-loop detection failed: %s", e) + connected_count = 0 enabled_platform_count = 0 startup_nonretryable_errors: list[str] = [] @@ -1663,6 +2052,15 @@ class GatewayRunner: logger.info("✓ %s connected", platform.value) else: logger.warning("✗ %s failed to connect", platform.value) + # Defensive cleanup: a failed connect() may have + # allocated resources (aiohttp.ClientSession, poll + # tasks, bridge subprocesses) before giving up. + # Without this call, those resources are orphaned + # and Python logs "Unclosed client session" at + # process exit. Adapter disconnect() implementations + # are expected to be idempotent and tolerate + # partial-init state. + await self._safe_adapter_disconnect(adapter, platform) if adapter.has_fatal_error: self._update_platform_runtime_status( platform.value, @@ -1703,6 +2101,10 @@ class GatewayRunner: } except Exception as e: logger.error("✗ %s error: %s", platform.value, e) + # Same defensive cleanup path for exceptions — an adapter + # that raised mid-connect may still have a live + # aiohttp.ClientSession or child subprocess. + await self._safe_adapter_disconnect(adapter, platform) self._update_platform_runtime_status( platform.value, platform_state="retrying", @@ -1865,16 +2267,12 @@ class GatewayRunner: if _cached_agent is None: _cached_agent = self._running_agents.get(key) if _cached_agent and _cached_agent is not _AGENT_PENDING_SENTINEL: - try: - if hasattr(_cached_agent, 'shutdown_memory_provider'): - _cached_agent.shutdown_memory_provider() - except Exception: - pass - try: - if hasattr(_cached_agent, 'close'): - _cached_agent.close() - except Exception: - pass + self._cleanup_agent_resources(_cached_agent) + # Drop the cache entry so the AIAgent (and its LLM + # clients, tool schemas, memory provider refs) can + # be garbage-collected. Otherwise the cache grows + # unbounded across the gateway's lifetime. + self._evict_cached_agent(key) # Mark as flushed and persist to disk so the flag # survives gateway restarts. with self.session_store._lock: @@ -1918,6 +2316,44 @@ class GatewayRunner: logger.info( "Session expiry done: %d flushed", _flushed, ) + + # Sweep agents that have been idle beyond the TTL regardless + # of session reset policy. This catches sessions with very + # long / "never" reset windows, whose cached AIAgents would + # otherwise pin memory for the gateway's entire lifetime. + try: + _idle_evicted = self._sweep_idle_cached_agents() + if _idle_evicted: + logger.info( + "Agent cache idle sweep: evicted %d agent(s)", + _idle_evicted, + ) + except Exception as _e: + logger.debug("Idle agent sweep failed: %s", _e) + + # Periodically prune stale SessionStore entries. The + # in-memory dict (and sessions.json) would otherwise grow + # unbounded in gateways serving many rotating chats / + # threads / users over long time windows. Pruning is + # invisible to users — a resumed session just gets a + # fresh session_id, exactly as if the reset policy fired. + _last_prune_ts = getattr(self, "_last_session_store_prune_ts", 0.0) + _prune_interval = 3600.0 # once per hour + if time.time() - _last_prune_ts > _prune_interval: + try: + _max_age = int( + getattr(self.config, "session_store_max_age_days", 0) or 0 + ) + if _max_age > 0: + _pruned = self.session_store.prune_old_entries(_max_age) + if _pruned: + logger.info( + "SessionStore prune: dropped %d stale entries", + _pruned, + ) + except Exception as _e: + logger.debug("SessionStore prune failed: %s", _e) + self._last_session_store_prune_ts = time.time() except Exception as e: logger.debug("Session expiry watcher error: %s", e) # Sleep in small increments so we can stop quickly @@ -2089,8 +2525,42 @@ class GatewayRunner: timeout, self._running_agent_count(), ) + # Mark forcibly-interrupted sessions as resume_pending BEFORE + # interrupting the agents. This preserves each session's + # session_id + transcript so the next message on the same + # session_key auto-resumes from the existing conversation + # instead of getting routed through suspend_recently_active() + # and converted into a fresh session. Terminal escalation + # for genuinely stuck sessions still flows through the + # existing ``.restart_failure_counts`` stuck-loop counter + # (incremented below, threshold 3), which sets + # ``suspended=True`` and overrides resume_pending. + # + # Iterate self._running_agents (current) rather than the + # drain-start ``active_agents`` snapshot — the snapshot + # may include sessions that finished gracefully during + # the drain window, and marking those falsely would give + # them a stray restart-interruption system note on their + # next turn even though their previous turn completed + # cleanly. Skip pending sentinels for the same reason + # _interrupt_running_agents() does: their agent hasn't + # started yet, there's nothing to interrupt, and the + # session shouldn't carry a misleading resume flag. + _resume_reason = ( + "restart_timeout" if self._restart_requested else "shutdown_timeout" + ) + for _sk, _agent in list(self._running_agents.items()): + if _agent is _AGENT_PENDING_SENTINEL: + continue + try: + self.session_store.mark_resume_pending(_sk, _resume_reason) + except Exception as _e: + logger.debug( + "mark_resume_pending failed for %s: %s", + _sk[:20], _e, + ) self._interrupt_running_agents( - "Gateway restarting" if self._restart_requested else "Gateway shutting down" + _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN ) interrupt_deadline = asyncio.get_running_loop().time() + 5.0 while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline: @@ -2124,8 +2594,11 @@ class GatewayRunner: self.adapters.clear() self._running_agents.clear() + self._running_agents_ts.clear() self._pending_messages.clear() self._pending_approvals.clear() + if hasattr(self, '_busy_ack_ts'): + self._busy_ack_ts.clear() self._shutdown_event.set() # Global cleanup: kill any remaining tool subprocesses not tied @@ -2146,6 +2619,20 @@ class GatewayRunner: except Exception: pass + # Close SQLite session DBs so the WAL write lock is released. + # Without this, --replace and similar restart flows leave the + # old gateway's connection holding the WAL lock until Python + # actually exits — causing 'database is locked' errors when + # the new gateway tries to open the same file. + for _db_holder in (self, getattr(self, "session_store", None)): + _db = getattr(_db_holder, "_db", None) if _db_holder else None + if _db is None or not hasattr(_db, "close"): + continue + try: + _db.close() + except Exception as _e: + logger.debug("SessionDB close error: %s", _e) + from gateway.status import remove_pid_file remove_pid_file() @@ -2169,6 +2656,14 @@ class GatewayRunner: "active sessions." ) + # Track sessions that were active at shutdown for stuck-loop + # detection (#7536). On each restart, the counter increments + # for sessions that were running. If a session hits the + # threshold (3 consecutive restarts while active), the next + # startup auto-suspends it — breaking the loop. + if active_agents: + self._increment_restart_failure_counts(set(active_agents.keys())) + if self._restart_requested and self._restart_via_service: self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE self._exit_reason = self._exit_reason or "Gateway restart requested" @@ -2381,6 +2876,9 @@ class GatewayRunner: Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS", Platform.QQBOT: "QQ_ALLOWED_USERS", } + platform_group_env_map = { + Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS", + } platform_allow_all_map = { Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS", Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS", @@ -2405,6 +2903,28 @@ class GatewayRunner: if platform_allow_all_var and os.getenv(platform_allow_all_var, "").lower() in ("true", "1", "yes"): return True + # Discord bot senders that passed the DISCORD_ALLOW_BOTS platform + # filter are already authorized at the platform level — skip the + # user allowlist. Without this, bot messages allowed by + # DISCORD_ALLOW_BOTS=mentions/all would be rejected here with + # "Unauthorized user" (fixes #4466). + if source.platform == Platform.DISCORD and getattr(source, "is_bot", False): + allow_bots = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip() + if allow_bots in ("mentions", "all"): + return True + + # Discord role-based access (DISCORD_ALLOWED_ROLES): the adapter's + # on_message pre-filter already verified role membership — if the + # message reached here, the user passed that check. Authorize + # directly to avoid the "no allowlists configured" branch below + # rejecting role-only setups where DISCORD_ALLOWED_USERS is empty + # (issue #7871). + if ( + source.platform == Platform.DISCORD + and os.getenv("DISCORD_ALLOWED_ROLES", "").strip() + ): + return True + # Check pairing store (always checked, regardless of allowlists) platform_name = source.platform.value if source.platform else "" if self.pairing_store.is_approved(platform_name, user_id): @@ -2412,12 +2932,23 @@ class GatewayRunner: # Check platform-specific and global allowlists platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip() + group_allowlist = "" + if source.chat_type == "group": + group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip() global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip() - if not platform_allowlist and not global_allowlist: + if not platform_allowlist and not group_allowlist and not global_allowlist: # No allowlists configured -- check global allow-all flag return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") + # Some platforms authorize group traffic by chat ID rather than sender ID. + if group_allowlist and source.chat_type == "group" and source.chat_id: + allowed_group_ids = { + chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip() + } + if "*" in allowed_group_ids or source.chat_id in allowed_group_ids: + return True + # Check if user is in any allowlist allowed_ids = set() if platform_allowlist: @@ -2450,10 +2981,59 @@ class GatewayRunner: return bool(check_ids & allowed_ids) def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str: - """Return how unauthorized DMs should be handled for a platform.""" + """Return how unauthorized DMs should be handled for a platform. + + Resolution order: + 1. Explicit per-platform ``unauthorized_dm_behavior`` in config — always wins. + 2. Explicit global ``unauthorized_dm_behavior`` in config — wins when no per-platform. + 3. When an allowlist (``PLATFORM_ALLOWED_USERS`` or ``GATEWAY_ALLOWED_USERS``) is + configured, default to ``"ignore"`` — the allowlist signals that the owner has + deliberately restricted access; spamming unknown contacts with pairing codes + is both noisy and a potential info-leak. (#9337) + 4. No allowlist and no explicit config → ``"pair"`` (open-gateway default). + """ config = getattr(self, "config", None) - if config and hasattr(config, "get_unauthorized_dm_behavior"): - return config.get_unauthorized_dm_behavior(platform) + + # Check for an explicit per-platform override first. + if config and hasattr(config, "get_unauthorized_dm_behavior") and platform: + platform_cfg = config.platforms.get(platform) if hasattr(config, "platforms") else None + if platform_cfg and "unauthorized_dm_behavior" in getattr(platform_cfg, "extra", {}): + # Operator explicitly configured behavior for this platform — respect it. + return config.get_unauthorized_dm_behavior(platform) + + # Check for an explicit global config override. + if config and hasattr(config, "unauthorized_dm_behavior"): + if config.unauthorized_dm_behavior != "pair": # non-default → explicit override + return config.unauthorized_dm_behavior + + # No explicit override. Fall back to allowlist-aware default: + # if any allowlist is configured for this platform, silently drop + # unauthorized messages instead of sending pairing codes. + if platform: + platform_env_map = { + Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS", + Platform.DISCORD: "DISCORD_ALLOWED_USERS", + Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS", + Platform.SLACK: "SLACK_ALLOWED_USERS", + Platform.SIGNAL: "SIGNAL_ALLOWED_USERS", + Platform.EMAIL: "EMAIL_ALLOWED_USERS", + Platform.SMS: "SMS_ALLOWED_USERS", + Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS", + Platform.MATRIX: "MATRIX_ALLOWED_USERS", + Platform.DINGTALK: "DINGTALK_ALLOWED_USERS", + Platform.FEISHU: "FEISHU_ALLOWED_USERS", + Platform.WECOM: "WECOM_ALLOWED_USERS", + Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS", + Platform.WEIXIN: "WEIXIN_ALLOWED_USERS", + Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS", + Platform.QQBOT: "QQ_ALLOWED_USERS", + } + if os.getenv(platform_env_map.get(platform, ""), "").strip(): + return "ignore" + + if os.getenv("GATEWAY_ALLOWED_USERS", "").strip(): + return "ignore" + return "pair" async def _handle_message(self, event: MessageEvent) -> Optional[str]: @@ -2600,15 +3180,21 @@ class GatewayRunner: _quick_key[:30], _stale_age, _stale_idle, _raw_stale_timeout, _stale_detail, ) - del self._running_agents[_quick_key] - self._running_agents_ts.pop(_quick_key, None) + self._invalidate_session_run_generation( + _quick_key, + reason="stale_running_agent_eviction", + ) + self._release_running_agent_state(_quick_key) if _quick_key in self._running_agents: if event.get_command() == "status": return await self._handle_status_command(event) # Resolve the command once for all early-intercept checks below. - from hermes_cli.commands import resolve_command as _resolve_cmd_inner + from hermes_cli.commands import ( + ACTIVE_SESSION_BYPASS_COMMANDS as _DEDICATED_HANDLERS, + resolve_command as _resolve_cmd_inner, + ) _evt_cmd = event.get_command() _cmd_def_inner = _resolve_cmd_inner(_evt_cmd) if _evt_cmd else None @@ -2621,16 +3207,12 @@ class GatewayRunner: # _interrupt_requested. Force-clean _running_agents so the session # is unlocked and subsequent messages are processed normally. if _cmd_def_inner and _cmd_def_inner.name == "stop": - running_agent = self._running_agents.get(_quick_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - running_agent.interrupt("Stop requested") - # Force-clean: remove the session lock regardless of agent state - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, 'get_pending_message'): - adapter.get_pending_message(_quick_key) # consume and discard - self._pending_messages.pop(_quick_key, None) - if _quick_key in self._running_agents: - del self._running_agents[_quick_key] + await self._interrupt_and_clear_session( + _quick_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command", + ) logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20]) return "⚡ Stopped. You can continue this session." @@ -2642,18 +3224,15 @@ class GatewayRunner: # doesn't get re-processed as a user message after the # interrupt completes. if _cmd_def_inner and _cmd_def_inner.name == "new": - running_agent = self._running_agents.get(_quick_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - running_agent.interrupt("Session reset requested") # Clear any pending messages so the old text doesn't replay - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, 'get_pending_message'): - adapter.get_pending_message(_quick_key) # consume and discard - self._pending_messages.pop(_quick_key, None) + await self._interrupt_and_clear_session( + _quick_key, + source, + interrupt_reason=_INTERRUPT_REASON_RESET, + invalidation_reason="new_command", + ) # Clean up the running agent entry so the reset handler # doesn't think an agent is still active. - if _quick_key in self._running_agents: - del self._running_agents[_quick_key] return await self._handle_reset_command(event) # /queue — queue without interrupting @@ -2669,10 +3248,59 @@ class GatewayRunner: message_type=_MT.TEXT, source=event.source, message_id=event.message_id, + channel_prompt=event.channel_prompt, ) adapter._pending_messages[_quick_key] = queued_event return "Queued for the next turn." + # /steer — inject mid-run after the next tool call. + # Unlike /queue (turn boundary), /steer lands BETWEEN tool-call + # iterations inside the same agent run, by appending to the + # last tool result's content. No interrupt, no new user turn, + # no role-alternation violation. + if _cmd_def_inner and _cmd_def_inner.name == "steer": + steer_text = event.get_command_args().strip() + if not steer_text: + return "Usage: /steer " + running_agent = self._running_agents.get(_quick_key) + if running_agent is _AGENT_PENDING_SENTINEL: + # Agent hasn't started yet — queue as turn-boundary fallback. + adapter = self.adapters.get(source.platform) + if adapter: + from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT + queued_event = _ME( + text=steer_text, + message_type=_MT.TEXT, + source=event.source, + message_id=event.message_id, + channel_prompt=event.channel_prompt, + ) + adapter._pending_messages[_quick_key] = queued_event + return "Agent still starting — /steer queued for the next turn." + if running_agent and hasattr(running_agent, "steer"): + try: + accepted = running_agent.steer(steer_text) + except Exception as exc: + logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc) + return f"⚠️ Steer failed: {exc}" + if accepted: + preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "") + return f"⏩ Steer queued — arrives after the next tool call: '{preview}'" + return "Steer rejected (empty payload)." + # Running agent is missing or lacks steer() — fall back to queue. + adapter = self.adapters.get(source.platform) + if adapter: + from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT + queued_event = _ME( + text=steer_text, + message_type=_MT.TEXT, + source=event.source, + message_id=event.message_id, + channel_prompt=event.channel_prompt, + ) + adapter._pending_messages[_quick_key] = queued_event + return "No active agent — /steer queued for the next turn." + # /model must not be used while the agent is running. if _cmd_def_inner and _cmd_def_inner.name == "model": return "Agent is running — wait or /stop first, then switch models." @@ -2686,11 +3314,56 @@ class GatewayRunner: return await self._handle_approve_command(event) return await self._handle_deny_command(event) + # /agents (/tasks alias) should be query-only and never interrupt. + if _cmd_def_inner and _cmd_def_inner.name == "agents": + return await self._handle_agents_command(event) + # /background must bypass the running-agent guard — it starts a # parallel task and must never interrupt the active conversation. if _cmd_def_inner and _cmd_def_inner.name == "background": return await self._handle_background_command(event) + # Session-level toggles that are safe to run mid-agent — + # /yolo can unblock a pending approval prompt, /verbose cycles + # the tool-progress display mode for the ongoing stream. + # Both modify session state without needing agent interaction + # and must not be queued (the safety net would discard them). + # /fast and /reasoning are config-only and take effect next + # message, so they fall through to the catch-all busy response + # below — users should wait and set them between turns. + if _cmd_def_inner and _cmd_def_inner.name in ("yolo", "verbose"): + if _cmd_def_inner.name == "yolo": + return await self._handle_yolo_command(event) + if _cmd_def_inner.name == "verbose": + return await self._handle_verbose_command(event) + + # Gateway-handled info/control commands with dedicated + # running-agent handlers. + if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS: + if _cmd_def_inner.name == "help": + return await self._handle_help_command(event) + if _cmd_def_inner.name == "commands": + return await self._handle_commands_command(event) + if _cmd_def_inner.name == "profile": + return await self._handle_profile_command(event) + if _cmd_def_inner.name == "update": + return await self._handle_update_command(event) + + # Catch-all: any other recognized slash command reached the + # running-agent guard. Reject gracefully rather than falling + # through to interrupt + discard. Without this, commands + # like /model, /reasoning, /voice, /insights, /title, + # /resume, /retry, /undo, /compress, /usage, /provider, + # /reload-mcp, /sethome, /reset (all registered as Discord + # slash commands) would interrupt the agent AND get + # silently discarded by the slash-command safety net, + # producing a zero-char response. See #5057, #6252, #10370. + if _cmd_def_inner: + return ( + f"⏳ Agent is running — `/{_cmd_def_inner.name}` can't run " + f"mid-turn. Wait for the current response or `/stop` first." + ) + if event.message_type == MessageType.PHOTO: logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20]) adapter = self.adapters.get(source.platform) @@ -2698,20 +3371,50 @@ class GatewayRunner: merge_pending_message_event(adapter._pending_messages, _quick_key, event) return None + _telegram_followup_grace = float( + os.getenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0") + ) + _started_at = self._running_agents_ts.get(_quick_key, 0) + if ( + source.platform == Platform.TELEGRAM + and event.message_type == MessageType.TEXT + and _telegram_followup_grace > 0 + and _started_at + and (time.time() - _started_at) <= _telegram_followup_grace + ): + logger.debug( + "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt", + time.time() - _started_at, + _quick_key[:20], + ) + adapter = self.adapters.get(source.platform) + if adapter: + merge_pending_message_event( + adapter._pending_messages, + _quick_key, + event, + merge_text=True, + ) + return None + running_agent = self._running_agents.get(_quick_key) if running_agent is _AGENT_PENDING_SENTINEL: # Agent is being set up but not ready yet. if event.get_command() == "stop": # Force-clean the sentinel so the session is unlocked. - if _quick_key in self._running_agents: - del self._running_agents[_quick_key] + self._release_running_agent_state(_quick_key) logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20]) return "⚡ Force-stopped. The agent was still starting — session unlocked." # Queue the message so it will be picked up after the # agent starts. adapter = self.adapters.get(source.platform) if adapter: - adapter._pending_messages[_quick_key] = event + merge_pending_message_event( + adapter._pending_messages, + _quick_key, + event, + merge_text=True, + ) return None if self._draining: if self._queue_during_drain_enabled(): @@ -2763,6 +3466,9 @@ class GatewayRunner: if canonical == "status": return await self._handle_status_command(event) + if canonical == "agents": + return await self._handle_agents_command(event) + if canonical == "restart": return await self._handle_restart_command(event) @@ -2863,6 +3569,21 @@ class GatewayRunner: if canonical == "btw": return await self._handle_btw_command(event) + if canonical == "steer": + # No active agent — /steer has no tool call to inject into. + # Strip the prefix so downstream treats it as a normal user + # message. If the payload is empty, surface the usage hint. + steer_payload = event.get_command_args().strip() + if not steer_payload: + return "Usage: /steer (no agent is running; sending as a normal message)" + try: + event.text = steer_payload + except Exception: + pass + # Do NOT return — fall through to _handle_message_with_agent + # at the end of this function so the rewritten text is sent + # to the agent as a regular user turn. + if canonical == "voice": return await self._handle_voice_command(event) @@ -3006,17 +3727,23 @@ class GatewayRunner: # same session — corrupting the transcript. self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL self._running_agents_ts[_quick_key] = time.time() + _run_generation = self._begin_session_run_generation(_quick_key) try: - return await self._handle_message_with_agent(event, source, _quick_key) + return await self._handle_message_with_agent(event, source, _quick_key, _run_generation) finally: # If _run_agent replaced the sentinel with a real agent and # then cleaned it up, this is a no-op. If we exited early # (exception, command fallthrough, etc.) the sentinel must # not linger or the session would be permanently locked out. if self._running_agents.get(_quick_key) is _AGENT_PENDING_SENTINEL: - del self._running_agents[_quick_key] - self._running_agents_ts.pop(_quick_key, None) + self._release_running_agent_state(_quick_key) + else: + # Agent path already cleaned _running_agents; make sure + # the paired metadata dicts are gone too. + self._running_agents_ts.pop(_quick_key, None) + if hasattr(self, "_busy_ack_ts"): + self._busy_ack_ts.pop(_quick_key, None) async def _prepare_inbound_message_text( self, @@ -3148,7 +3875,7 @@ class GatewayRunner: from agent.context_references import preprocess_context_references_async from agent.model_metadata import get_model_context_length - _msg_cwd = os.environ.get("MESSAGING_CWD", os.path.expanduser("~")) + _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~")) _msg_ctx_len = get_model_context_length( self._model, base_url=self._base_url or "", @@ -3174,7 +3901,7 @@ class GatewayRunner: return message_text - async def _handle_message_with_agent(self, event, source, _quick_key: str): + async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int): """Inner handler that runs under the _running_agents sentinel guard.""" _msg_start_time = time.time() _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform) @@ -3516,54 +4243,58 @@ class GatewayRunner: model=_hyg_model, max_iterations=4, quiet_mode=True, + skip_memory=True, enabled_toolsets=["memory"], session_id=session_entry.session_id, ) - _hyg_agent._print_fn = lambda *a, **kw: None + try: + _hyg_agent._print_fn = lambda *a, **kw: None - loop = asyncio.get_event_loop() - _compressed, _ = await loop.run_in_executor( - None, - lambda: _hyg_agent._compress_context( - _hyg_msgs, "", - approx_tokens=_approx_tokens, - ), - ) - - # _compress_context ends the old session and creates - # a new session_id. Write compressed messages into - # the NEW session so the old transcript stays intact - # and searchable via session_search. - _hyg_new_sid = _hyg_agent.session_id - if _hyg_new_sid != session_entry.session_id: - session_entry.session_id = _hyg_new_sid - self.session_store._save() - - self.session_store.rewrite_transcript( - session_entry.session_id, _compressed - ) - # Reset stored token count — transcript was rewritten - session_entry.last_prompt_tokens = 0 - history = _compressed - _new_count = len(_compressed) - _new_tokens = estimate_messages_tokens_rough( - _compressed - ) - - logger.info( - "Session hygiene: compressed %s → %s msgs, " - "~%s → ~%s tokens", - _msg_count, _new_count, - f"{_approx_tokens:,}", f"{_new_tokens:,}", - ) - - if _new_tokens >= _warn_token_threshold: - logger.warning( - "Session hygiene: still ~%s tokens after " - "compression", - f"{_new_tokens:,}", + loop = asyncio.get_running_loop() + _compressed, _ = await loop.run_in_executor( + None, + lambda: _hyg_agent._compress_context( + _hyg_msgs, "", + approx_tokens=_approx_tokens, + ), ) + # _compress_context ends the old session and creates + # a new session_id. Write compressed messages into + # the NEW session so the old transcript stays intact + # and searchable via session_search. + _hyg_new_sid = _hyg_agent.session_id + if _hyg_new_sid != session_entry.session_id: + session_entry.session_id = _hyg_new_sid + self.session_store._save() + + self.session_store.rewrite_transcript( + session_entry.session_id, _compressed + ) + # Reset stored token count — transcript was rewritten + session_entry.last_prompt_tokens = 0 + history = _compressed + _new_count = len(_compressed) + _new_tokens = estimate_messages_tokens_rough( + _compressed + ) + + logger.info( + "Session hygiene: compressed %s → %s msgs, " + "~%s → ~%s tokens", + _msg_count, _new_count, + f"{_approx_tokens:,}", f"{_new_tokens:,}", + ) + + if _new_tokens >= _warn_token_threshold: + logger.warning( + "Session hygiene: still ~%s tokens after " + "compression", + f"{_new_tokens:,}", + ) + finally: + self._cleanup_agent_resources(_hyg_agent) + except Exception as e: logger.warning( "Session hygiene auto-compress failed: %s", e @@ -3627,6 +4358,15 @@ class GatewayRunner: if message_text is None: return + # Bind this gateway run generation to the adapter's active-session + # event so deferred post-delivery callbacks can be released by the + # same run that registered them. + self._bind_adapter_run_generation( + self.adapters.get(source.platform), + session_key, + run_generation, + ) + try: # Emit agent:start hook hook_ctx = { @@ -3645,7 +4385,9 @@ class GatewayRunner: source=source, session_id=session_entry.session_id, session_key=session_key, + run_generation=run_generation, event_message_id=event.message_id, + channel_prompt=event.channel_prompt, ) # Stop persistent typing indicator now that the agent is done @@ -3656,7 +4398,35 @@ class GatewayRunner: except Exception: pass + if not self._is_session_run_current(_quick_key, run_generation): + logger.info( + "Discarding stale agent result for %s — generation %d is no longer current", + _quick_key[:20] if _quick_key else "?", + run_generation, + ) + _stale_adapter = self.adapters.get(source.platform) + if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None: + _stale_adapter.pop_post_delivery_callback( + _quick_key, + generation=run_generation, + ) + elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"): + _stale_adapter._post_delivery_callbacks.pop(_quick_key, None) + return None + response = agent_result.get("final_response") or "" + + # Convert the agent's internal "(empty)" sentinel into a + # user-friendly message. "(empty)" means the model failed to + # produce visible content after exhausting all retries (nudge, + # prefill, empty-retry, fallback). Sending the raw sentinel + # looks like a bug; a short explanation is more helpful. + if response == "(empty)": + response = ( + "⚠️ The model returned no response after processing tool " + "results. This can happen with some models — try again or " + "rephrase your question." + ) agent_messages = agent_result.get("messages", []) _response_time = time.time() - _msg_start_time _api_calls = agent_result.get("api_calls", 0) @@ -3667,6 +4437,24 @@ class GatewayRunner: _response_time, _api_calls, _resp_len, ) + # Successful turn — clear any stuck-loop counter for this session. + # This ensures the counter only accumulates across CONSECUTIVE + # restarts where the session was active (never completed). + # + # Also clear the resume_pending flag (set by drain-timeout + # shutdown) — the turn ran to completion, so recovery + # succeeded and subsequent messages should no longer receive + # the restart-interruption system note. + if session_key: + self._clear_restart_failure_count(session_key) + try: + self.session_store.clear_resume_pending(session_key) + except Exception as _e: + logger.debug( + "clear_resume_pending failed for %s: %s", + session_key[:20], _e, + ) + # Surface error details when the agent failed silently (final_response=None) if not response and agent_result.get("failed"): error_detail = agent_result.get("error", "unknown error") @@ -3755,7 +4543,7 @@ class GatewayRunner: synth_text = _format_gateway_process_notification(evt) if synth_text: try: - await self._inject_watch_notification(synth_text, event) + await self._inject_watch_notification(synth_text, evt) except Exception as e2: logger.error("Watch notification injection error: %s", e2) except Exception as e: @@ -3773,14 +4561,11 @@ class GatewayRunner: # intermediate reasoning) so sessions can be resumed with full context # and transcripts are useful for debugging and training data. # - # IMPORTANT: When the agent failed before producing any response - # (e.g. context-overflow 400), do NOT persist the user's message. + # IMPORTANT: When the agent failed (e.g. context-overflow 400, + # compression exhausted), do NOT persist the user's message. # Persisting it would make the session even larger, causing the - # same failure on the next attempt — an infinite loop. (#1630) - agent_failed_early = ( - agent_result.get("failed") - and not agent_result.get("final_response") - ) + # same failure on the next attempt — an infinite loop. (#1630, #9893) + agent_failed_early = bool(agent_result.get("failed")) if agent_failed_early: logger.info( "Skipping transcript persistence for failed request in " @@ -3788,6 +4573,24 @@ class GatewayRunner: session_entry.session_id, ) + # When compression is exhausted, the session is permanently too + # large to process. Auto-reset it so the next message starts + # fresh instead of replaying the same oversized context in an + # infinite fail loop. (#9893) + if agent_result.get("compression_exhausted") and session_entry and session_key: + logger.info( + "Auto-resetting session %s after compression exhaustion.", + session_entry.session_id, + ) + self.session_store.reset_session(session_key) + self._evict_cached_agent(session_key) + self._session_model_overrides.pop(session_key, None) + response = (response or "") + ( + "\n\n🔄 Session auto-reset — the conversation exceeded the " + "maximum context size and could not be compressed further. " + "Your next message will start a fresh session." + ) + ts = datetime.now().isoformat() # If this is a fresh session (no history), write the full tool @@ -3895,6 +4698,8 @@ class GatewayRunner: _hist_len = len(history) if 'history' in locals() else 0 if status_code == 401: status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials." + elif status_code == 402: + status_hint = " Your API balance or quota is exhausted. Check your provider dashboard." elif status_code == 429: # Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit _err_body = getattr(e, "response", None) @@ -4023,6 +4828,7 @@ class GatewayRunner: # Get existing session key session_key = self._session_key_for_source(source) + self._invalidate_session_run_generation(session_key, reason="session_reset") # Flush memories in the background (fire-and-forget) so the user # gets the "Session reset!" response immediately. @@ -4045,16 +4851,7 @@ class GatewayRunner: _cached = self._agent_cache.get(session_key) _old_agent = _cached[0] if isinstance(_cached, tuple) else _cached if _cached else None if _old_agent is not None: - try: - if hasattr(_old_agent, "shutdown_memory_provider"): - _old_agent.shutdown_memory_provider() - except Exception: - pass - try: - if hasattr(_old_agent, "close"): - _old_agent.close() - except Exception: - pass + self._cleanup_agent_resources(_old_agent) self._evict_cached_agent(session_key) try: @@ -4134,31 +4931,16 @@ class GatewayRunner: async def _handle_profile_command(self, event: MessageEvent) -> str: """Handle /profile — show active profile name and home directory.""" - from hermes_constants import get_hermes_home, display_hermes_home - from pathlib import Path + from hermes_constants import display_hermes_home + from hermes_cli.profiles import get_active_profile_name - home = get_hermes_home() display = display_hermes_home() + profile_name = get_active_profile_name() - # Detect profile name from HERMES_HOME path - # Profile paths look like: ~/.hermes/profiles/ - profiles_parent = Path.home() / ".hermes" / "profiles" - try: - rel = home.relative_to(profiles_parent) - profile_name = str(rel).split("/")[0] - except ValueError: - profile_name = None - - if profile_name: - lines = [ - f"👤 **Profile:** `{profile_name}`", - f"📂 **Home:** `{display}`", - ] - else: - lines = [ - "👤 **Profile:** default", - f"📂 **Home:** `{display}`", - ] + lines = [ + f"👤 **Profile:** `{profile_name}`", + f"📂 **Home:** `{display}`", + ] return "\n".join(lines) @@ -4197,6 +4979,96 @@ class GatewayRunner: ]) return "\n".join(lines) + + async def _handle_agents_command(self, event: MessageEvent) -> str: + """Handle /agents command - list active agents and running tasks.""" + from tools.process_registry import format_uptime_short, process_registry + + now = time.time() + current_session_key = self._session_key_for_source(event.source) + + running_agents: dict = getattr(self, "_running_agents", {}) or {} + running_started: dict = getattr(self, "_running_agents_ts", {}) or {} + + agent_rows: list[dict] = [] + for session_key, agent in running_agents.items(): + started = float(running_started.get(session_key, now)) + elapsed = max(0, int(now - started)) + is_pending = agent is _AGENT_PENDING_SENTINEL + agent_rows.append( + { + "session_key": session_key, + "elapsed": elapsed, + "state": "starting" if is_pending else "running", + "session_id": "" if is_pending else str(getattr(agent, "session_id", "") or ""), + "model": "" if is_pending else str(getattr(agent, "model", "") or ""), + } + ) + + agent_rows.sort(key=lambda row: row["elapsed"], reverse=True) + + running_processes: list[dict] = [] + try: + running_processes = [ + p for p in process_registry.list_sessions() + if p.get("status") == "running" + ] + except Exception: + running_processes = [] + + background_tasks = [ + t for t in (getattr(self, "_background_tasks", set()) or set()) + if hasattr(t, "done") and not t.done() + ] + + lines = [ + "🤖 **Active Agents & Tasks**", + "", + f"**Active agents:** {len(agent_rows)}", + ] + + if agent_rows: + for idx, row in enumerate(agent_rows[:12], 1): + current = " · this chat" if row["session_key"] == current_session_key else "" + sid = f" · `{row['session_id']}`" if row["session_id"] else "" + model = f" · `{row['model']}`" if row["model"] else "" + lines.append( + f"{idx}. `{row['session_key']}` · {row['state']} · " + f"{format_uptime_short(row['elapsed'])}{sid}{model}{current}" + ) + if len(agent_rows) > 12: + lines.append(f"... and {len(agent_rows) - 12} more") + + lines.extend( + [ + "", + f"**Running background processes:** {len(running_processes)}", + ] + ) + if running_processes: + for proc in running_processes[:12]: + cmd = " ".join(str(proc.get("command", "")).split()) + if len(cmd) > 90: + cmd = cmd[:87] + "..." + lines.append( + f"- `{proc.get('session_id', '?')}` · " + f"{format_uptime_short(int(proc.get('uptime_seconds', 0)))} · `{cmd}`" + ) + if len(running_processes) > 12: + lines.append(f"... and {len(running_processes) - 12} more") + + lines.extend( + [ + "", + f"**Gateway async jobs:** {len(background_tasks)}", + ] + ) + + if not agent_rows and not running_processes and not background_tasks: + lines.append("") + lines.append("No active agents or running tasks.") + + return "\n".join(lines) async def _handle_stop_command(self, event: MessageEvent) -> str: """Handle /stop command - interrupt a running agent. @@ -4216,22 +5088,49 @@ class GatewayRunner: agent = self._running_agents.get(session_key) if agent is _AGENT_PENDING_SENTINEL: # Force-clean the sentinel so the session is unlocked. - if session_key in self._running_agents: - del self._running_agents[session_key] + await self._interrupt_and_clear_session( + session_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command_pending", + ) logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20]) return "⚡ Stopped. The agent hadn't started yet — you can continue this session." if agent: - agent.interrupt("Stop requested") # Force-clean the session lock so a truly hung agent doesn't # keep it locked forever. - if session_key in self._running_agents: - del self._running_agents[session_key] + await self._interrupt_and_clear_session( + session_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command_handler", + ) return "⚡ Stopped. You can continue this session." else: return "No active task to stop." async def _handle_restart_command(self, event: MessageEvent) -> str: """Handle /restart command - drain active work, then restart the gateway.""" + # Defensive idempotency check: if the previous gateway process + # recorded this same /restart (same platform + update_id) and the new + # process is seeing it *again*, this is a re-delivery caused by PTB's + # graceful-shutdown `get_updates` ACK failing on the way out ("Error + # while calling `get_updates` one more time to mark all fetched + # updates. Suppressing error to ensure graceful shutdown. When + # polling for updates is restarted, updates may be received twice." + # in gateway.log). Ignoring the stale redelivery prevents a + # self-perpetuating restart loop where every fresh gateway + # re-processes the same /restart command and immediately restarts + # again. + if self._is_stale_restart_redelivery(event): + logger.info( + "Ignoring redelivered /restart (platform=%s, update_id=%s) — " + "already processed by a previous gateway instance.", + event.source.platform.value if event.source and event.source.platform else "?", + event.platform_update_id, + ) + return "" + if self._restart_requested or self._draining: count = self._running_agent_count() if count: @@ -4254,6 +5153,26 @@ class GatewayRunner: except Exception as e: logger.debug("Failed to write restart notify file: %s", e) + # Record the triggering platform + update_id in a dedicated dedup + # marker. Unlike .restart_notify.json (which gets unlinked once the + # new gateway sends the "gateway restarted" notification), this + # marker persists so the new gateway can still detect a delayed + # /restart redelivery from Telegram. Overwritten on every /restart. + try: + import json as _json + import time as _time + dedup_data = { + "platform": event.source.platform.value if event.source.platform else None, + "requested_at": _time.time(), + } + if event.platform_update_id is not None: + dedup_data["update_id"] = event.platform_update_id + (_hermes_home / ".restart_last_processed.json").write_text( + _json.dumps(dedup_data) + ) + except Exception as e: + logger.debug("Failed to write restart dedup marker: %s", e) + active_agents = self._running_agent_count() # When running under a service manager (systemd/launchd), use the # service restart path: exit with code 75 so the service manager @@ -4269,6 +5188,58 @@ class GatewayRunner: return f"⏳ Draining {active_agents} active agent(s) before restart..." return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`." + def _is_stale_restart_redelivery(self, event: MessageEvent) -> bool: + """Return True if this /restart is a Telegram re-delivery we already handled. + + The previous gateway wrote ``.restart_last_processed.json`` with the + triggering platform + update_id when it processed the /restart. If + we now see a /restart on the same platform with an update_id <= that + recorded value AND the marker is recent (< 5 minutes), it's a + redelivery and should be ignored. + + Only applies to Telegram today (the only platform that exposes a + numeric cross-session update ordering); other platforms return False. + """ + if event is None or event.source is None: + return False + if event.platform_update_id is None: + return False + if event.source.platform is None: + return False + # Only Telegram populates platform_update_id currently; be explicit + # so future platforms aren't accidentally gated by this check. + try: + platform_value = event.source.platform.value + except Exception: + return False + if platform_value != "telegram": + return False + + try: + import json as _json + import time as _time + marker_path = _hermes_home / ".restart_last_processed.json" + if not marker_path.exists(): + return False + data = _json.loads(marker_path.read_text()) + except Exception: + return False + + if data.get("platform") != platform_value: + return False + recorded_uid = data.get("update_id") + if not isinstance(recorded_uid, int): + return False + # Staleness guard: ignore markers older than 5 minutes. A legitimately + # old marker (e.g. crash recovery where notify never fired) should not + # swallow a fresh /restart from the user. + requested_at = data.get("requested_at") + if isinstance(requested_at, (int, float)): + if _time.time() - requested_at > 300: + return False + return event.platform_update_id <= recorded_uid + + async def _handle_help_command(self, event: MessageEvent) -> str: """Handle /help command - list available commands.""" from hermes_cli.commands import gateway_help_lines @@ -4731,6 +5702,7 @@ class GatewayRunner: async def _handle_personality_command(self, event: MessageEvent) -> str: """Handle /personality command - list or set a personality.""" import yaml + from hermes_constants import display_hermes_home args = event.get_command_args().strip().lower() config_path = _hermes_home / 'config.yaml' @@ -4748,7 +5720,7 @@ class GatewayRunner: personalities = {} if not personalities: - return "No personalities configured in `~/.hermes/config.yaml`" + return f"No personalities configured in `{display_hermes_home()}/config.yaml`" if not args: lines = ["🎭 **Available Personalities**\n"] @@ -4832,6 +5804,7 @@ class GatewayRunner: message_type=MessageType.TEXT, source=source, raw_message=event.raw_message, + channel_prompt=event.channel_prompt, ) # Let the normal message handler process it @@ -4909,11 +5882,13 @@ class GatewayRunner: """Handle /voice [on|off|tts|channel|leave|status] command.""" args = event.get_command_args().strip().lower() chat_id = event.source.chat_id + platform = event.source.platform + voice_key = self._voice_key(platform, chat_id) - adapter = self.adapters.get(event.source.platform) + adapter = self.adapters.get(platform) if args in ("on", "enable"): - self._voice_mode[chat_id] = "voice_only" + self._voice_mode[voice_key] = "voice_only" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) @@ -4923,13 +5898,13 @@ class GatewayRunner: "Use /voice tts to get voice replies for all messages." ) elif args in ("off", "disable"): - self._voice_mode[chat_id] = "off" + self._voice_mode[voice_key] = "off" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) return "Voice mode disabled. Text-only replies." elif args == "tts": - self._voice_mode[chat_id] = "all" + self._voice_mode[voice_key] = "all" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) @@ -4942,7 +5917,7 @@ class GatewayRunner: elif args == "leave": return await self._handle_voice_channel_leave(event) elif args == "status": - mode = self._voice_mode.get(chat_id, "off") + mode = self._voice_mode.get(voice_key, "off") labels = { "off": "Off (text only)", "voice_only": "On (voice reply to voice messages)", @@ -4966,15 +5941,15 @@ class GatewayRunner: return f"Voice mode: {labels.get(mode, mode)}" else: # Toggle: off → on, on/all → off - current = self._voice_mode.get(chat_id, "off") + current = self._voice_mode.get(voice_key, "off") if current == "off": - self._voice_mode[chat_id] = "voice_only" + self._voice_mode[voice_key] = "voice_only" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) return "Voice mode enabled." else: - self._voice_mode[chat_id] = "off" + self._voice_mode[voice_key] = "off" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) @@ -5012,8 +5987,7 @@ class GatewayRunner: if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower: return ( "Voice dependencies are missing (PyNaCl / davey). " - "Install or reinstall Hermes with the messaging extra, e.g. " - "`pip install hermes-agent[messaging]`." + f"Install with: `{sys.executable} -m pip install PyNaCl`" ) return f"Failed to join voice channel: {e}" @@ -5021,7 +5995,7 @@ class GatewayRunner: adapter._voice_text_channels[guild_id] = int(event.source.chat_id) if hasattr(adapter, "_voice_sources"): adapter._voice_sources[guild_id] = event.source.to_dict() - self._voice_mode[event.source.chat_id] = "all" + self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all" self._save_voice_modes() self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False) return ( @@ -5048,7 +6022,7 @@ class GatewayRunner: except Exception as e: logger.warning("Error leaving voice channel: %s", e) # Always clean up state even if leave raised an exception - self._voice_mode[event.source.chat_id] = "off" + self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off" self._save_voice_modes() self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True) if hasattr(adapter, "_voice_input_callback"): @@ -5060,7 +6034,7 @@ class GatewayRunner: Cleans up runner-side voice_mode state that the adapter cannot reach. """ - self._voice_mode[chat_id] = "off" + self._voice_mode[self._voice_key(Platform.DISCORD, chat_id)] = "off" self._save_voice_modes() adapter = self.adapters.get(Platform.DISCORD) self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) @@ -5146,7 +6120,7 @@ class GatewayRunner: return False chat_id = event.source.chat_id - voice_mode = self._voice_mode.get(chat_id, "off") + voice_mode = self._voice_mode.get(self._voice_key(event.source.platform, chat_id), "off") is_voice_input = (event.message_type == MessageType.VOICE) should = ( @@ -5341,7 +6315,7 @@ class GatewayRunner: max_snapshots=cp_cfg.get("max_snapshots", 50), ) - cwd = os.getenv("MESSAGING_CWD", str(Path.home())) + cwd = os.getenv("TERMINAL_CWD", str(Path.home())) arg = event.get_command_args().strip() if not arg: @@ -5462,14 +6436,15 @@ class GatewayRunner: session_db=self._session_db, fallback_model=self._fallback_model, ) + try: + return agent.run_conversation( + user_message=prompt, + task_id=task_id, + ) + finally: + self._cleanup_agent_resources(agent) - return agent.run_conversation( - user_message=prompt, - task_id=task_id, - ) - - loop = asyncio.get_event_loop() - result = await loop.run_in_executor(None, run_sync) + result = await self._run_in_executor_with_context(run_sync) response = result.get("final_response", "") if result else "" if not response and result and result.get("error"): @@ -5508,7 +6483,7 @@ class GatewayRunner: pass # Send media files - for media_path in (media_files or []): + for media_path, _is_voice in (media_files or []): try: await adapter.send_document( chat_id=source.chat_id, @@ -5645,14 +6620,16 @@ class GatewayRunner: skip_context_files=True, persist_session=False, ) - return agent.run_conversation( - user_message=btw_prompt, - conversation_history=history_snapshot, - task_id=task_id, - ) + try: + return agent.run_conversation( + user_message=btw_prompt, + conversation_history=history_snapshot, + task_id=task_id, + ) + finally: + self._cleanup_agent_resources(agent) - loop = asyncio.get_event_loop() - result = await loop.run_in_executor(None, run_sync) + result = await self._run_in_executor_with_context(run_sync) response = (result.get("final_response") or "") if result else "" if not response and result and result.get("error"): @@ -5684,7 +6661,7 @@ class GatewayRunner: except Exception: pass - for media_path in (media_files or []): + for media_path, _is_voice in (media_files or []): try: await adapter.send_file(chat_id=source.chat_id, file_path=media_path) except Exception: @@ -5975,45 +6952,49 @@ class GatewayRunner: model=model, max_iterations=4, quiet_mode=True, + skip_memory=True, enabled_toolsets=["memory"], session_id=session_entry.session_id, ) - tmp_agent._print_fn = lambda *a, **kw: None + try: + tmp_agent._print_fn = lambda *a, **kw: None - compressor = tmp_agent.context_compressor - compress_start = compressor.protect_first_n - compress_start = compressor._align_boundary_forward(msgs, compress_start) - compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) - if compress_start >= compress_end: - return "Nothing to compress yet (the transcript is still all protected context)." + compressor = tmp_agent.context_compressor + compress_start = compressor.protect_first_n + compress_start = compressor._align_boundary_forward(msgs, compress_start) + compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) + if compress_start >= compress_end: + return "Nothing to compress yet (the transcript is still all protected context)." - loop = asyncio.get_event_loop() - compressed, _ = await loop.run_in_executor( - None, - lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic) - ) + loop = asyncio.get_running_loop() + compressed, _ = await loop.run_in_executor( + None, + lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic) + ) - # _compress_context already calls end_session() on the old session - # (preserving its full transcript in SQLite) and creates a new - # session_id for the continuation. Write the compressed messages - # into the NEW session so the original history stays searchable. - new_session_id = tmp_agent.session_id - if new_session_id != session_entry.session_id: - session_entry.session_id = new_session_id - self.session_store._save() + # _compress_context already calls end_session() on the old session + # (preserving its full transcript in SQLite) and creates a new + # session_id for the continuation. Write the compressed messages + # into the NEW session so the original history stays searchable. + new_session_id = tmp_agent.session_id + if new_session_id != session_entry.session_id: + session_entry.session_id = new_session_id + self.session_store._save() - self.session_store.rewrite_transcript(new_session_id, compressed) - # Reset stored token count — transcript changed, old value is stale - self.session_store.update_session( - session_entry.session_key, last_prompt_tokens=0 - ) - new_tokens = estimate_messages_tokens_rough(compressed) - summary = summarize_manual_compression( - msgs, - compressed, - approx_tokens, - new_tokens, - ) + self.session_store.rewrite_transcript(new_session_id, compressed) + # Reset stored token count — transcript changed, old value is stale + self.session_store.update_session( + session_entry.session_key, last_prompt_tokens=0 + ) + new_tokens = estimate_messages_tokens_rough(compressed) + summary = summarize_manual_compression( + msgs, + compressed, + approx_tokens, + new_tokens, + ) + finally: + self._cleanup_agent_resources(tmp_agent) lines = [f"🗜️ {summary['headline']}"] if focus_topic: lines.append(f"Focus: \"{focus_topic}\"") @@ -6132,8 +7113,7 @@ class GatewayRunner: logger.debug("Memory flush on resume failed: %s", e) # Clear any running agent for this session key - if session_key in self._running_agents: - del self._running_agents[session_key] + self._release_running_agent_state(session_key) # Switch the session entry to point at the old session new_entry = self.session_store.switch_session(session_key, target_id) @@ -6340,6 +7320,11 @@ class GatewayRunner: import asyncio as _asyncio args = event.get_command_args().strip() + + # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash) + import re as _re + args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args) + days = 30 source = None @@ -6367,7 +7352,7 @@ class GatewayRunner: from hermes_state import SessionDB from agent.insights import InsightsEngine - loop = _asyncio.get_event_loop() + loop = _asyncio.get_running_loop() def _run_insights(): db = SessionDB() @@ -6384,7 +7369,7 @@ class GatewayRunner: async def _handle_reload_mcp_command(self, event: MessageEvent) -> str: """Handle /reload-mcp command -- disconnect and reconnect all MCP servers.""" - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() try: from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock @@ -6563,11 +7548,17 @@ class GatewayRunner: }) async def _handle_debug_command(self, event: MessageEvent) -> str: - """Handle /debug — upload debug report + logs and return paste URLs.""" + """Handle /debug — upload debug report (summary only) and return paste URLs. + + Gateway uploads ONLY the summary report (system info + log tails), + NOT full log files, to protect conversation privacy. Users who need + full log uploads should use ``hermes debug share`` from the CLI. + """ import asyncio from hermes_cli.debug import ( - _capture_dump, collect_debug_report, _read_full_log, - upload_to_pastebin, + _capture_dump, collect_debug_report, + upload_to_pastebin, _schedule_auto_delete, + _GATEWAY_PRIVACY_NOTICE, ) loop = asyncio.get_running_loop() @@ -6576,43 +7567,25 @@ class GatewayRunner: def _collect_and_upload(): dump_text = _capture_dump() report = collect_debug_report(log_lines=200, dump_text=dump_text) - agent_log = _read_full_log("agent") - gateway_log = _read_full_log("gateway") - - if agent_log: - agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log - if gateway_log: - gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log urls = {} - failures = [] - try: urls["Report"] = upload_to_pastebin(report) except Exception as exc: return f"✗ Failed to upload debug report: {exc}" - if agent_log: - try: - urls["agent.log"] = upload_to_pastebin(agent_log) - except Exception: - failures.append("agent.log") + # Schedule auto-deletion after 6 hours + _schedule_auto_delete(list(urls.values())) - if gateway_log: - try: - urls["gateway.log"] = upload_to_pastebin(gateway_log) - except Exception: - failures.append("gateway.log") - - lines = ["**Debug report uploaded:**", ""] + lines = [_GATEWAY_PRIVACY_NOTICE, "", "**Debug report uploaded:**", ""] label_width = max(len(k) for k in urls) for label, url in urls.items(): lines.append(f"`{label:<{label_width}}` {url}") - if failures: - lines.append(f"\n_(failed to upload: {', '.join(failures)})_") - - lines.append("\nShare these links with the Hermes team for support.") + lines.append("") + lines.append("⏱ Pastes will auto-delete in 6 hours.") + lines.append("For full log uploads, use `hermes debug share` from the CLI.") + lines.append("Share these links with the Hermes team for support.") return "\n".join(lines) return await loop.run_in_executor(None, _collect_and_upload) @@ -7077,7 +8050,13 @@ class GatewayRunner: """Restore session context variables to their pre-handler values.""" from gateway.session_context import clear_session_vars clear_session_vars(tokens) - + + async def _run_in_executor_with_context(self, func, *args): + """Run blocking work in the thread pool while preserving session contextvars.""" + loop = asyncio.get_running_loop() + ctx = copy_context() + return await loop.run_in_executor(None, ctx.run, func, *args) + async def _enrich_message_with_vision( self, user_text: str, @@ -7232,14 +8211,75 @@ class GatewayRunner: return prefix return user_text - async def _inject_watch_notification(self, synth_text: str, original_event) -> None: + def _build_process_event_source(self, evt: dict): + """Resolve the canonical source for a synthetic background-process event. + + Prefer the persisted session-store origin for the event's session key. + Falling back to the currently active foreground event is what causes + cross-topic bleed, so don't do that. + """ + from gateway.session import SessionSource + + session_key = str(evt.get("session_key") or "").strip() + derived_platform = "" + derived_chat_type = "" + derived_chat_id = "" + + if session_key: + try: + self.session_store._ensure_loaded() + entry = self.session_store._entries.get(session_key) + if entry and getattr(entry, "origin", None): + return entry.origin + except Exception as exc: + logger.debug( + "Synthetic process-event session-store lookup failed for %s: %s", + session_key, + exc, + ) + + _parsed = _parse_session_key(session_key) + if _parsed: + derived_platform = _parsed["platform"] + derived_chat_type = _parsed["chat_type"] + derived_chat_id = _parsed["chat_id"] + + platform_name = str(evt.get("platform") or derived_platform or "").strip().lower() + chat_type = str(evt.get("chat_type") or derived_chat_type or "").strip().lower() + chat_id = str(evt.get("chat_id") or derived_chat_id or "").strip() + if not platform_name or not chat_type or not chat_id: + return None + + try: + platform = Platform(platform_name) + except Exception: + logger.warning( + "Synthetic process event has invalid platform metadata: %r", + platform_name, + ) + return None + + return SessionSource( + platform=platform, + chat_id=chat_id, + chat_type=chat_type, + thread_id=str(evt.get("thread_id") or "").strip() or None, + user_id=str(evt.get("user_id") or "").strip() or None, + user_name=str(evt.get("user_name") or "").strip() or None, + ) + + async def _inject_watch_notification(self, synth_text: str, evt: dict) -> None: """Inject a watch-pattern notification as a synthetic message event. - Uses the source from the original user event to route the notification - back to the correct chat/adapter. + Routing must come from the queued watch event itself, not from whatever + foreground message happened to be active when the queue was drained. """ - source = getattr(original_event, "source", None) + source = self._build_process_event_source(evt) if not source: + logger.warning( + "Dropping watch notification with no routing metadata for process %s", + evt.get("session_id", "unknown"), + ) return platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform) adapter = None @@ -7257,7 +8297,12 @@ class GatewayRunner: source=source, internal=True, ) - logger.info("Watch pattern notification — injecting for %s", platform_name) + logger.info( + "Watch pattern notification — injecting for %s chat=%s thread=%s", + platform_name, + source.chat_id, + source.thread_id, + ) await adapter.handle_message(synth_event) except Exception as e: logger.error("Watch notification injection error: %s", e) @@ -7327,33 +8372,42 @@ class GatewayRunner: f"Command: {session.command}\n" f"Output:\n{_out}]" ) + source = self._build_process_event_source({ + "session_id": session_id, + "session_key": session_key, + "platform": platform_name, + "chat_id": chat_id, + "thread_id": thread_id, + "user_id": user_id, + "user_name": user_name, + }) + if not source: + logger.warning( + "Dropping completion notification with no routing metadata for process %s", + session_id, + ) + break + adapter = None for p, a in self.adapters.items(): - if p.value == platform_name: + if p == source.platform: adapter = a break - if adapter and chat_id: + if adapter and source.chat_id: try: from gateway.platforms.base import MessageEvent, MessageType - from gateway.session import SessionSource - from gateway.config import Platform - _platform_enum = Platform(platform_name) - _source = SessionSource( - platform=_platform_enum, - chat_id=chat_id, - thread_id=thread_id or None, - user_id=user_id or None, - user_name=user_name or None, - ) synth_event = MessageEvent( text=synth_text, message_type=MessageType.TEXT, - source=_source, + source=source, internal=True, ) logger.info( - "Process %s finished — injecting agent notification for session %s", - session_id, session_key, + "Process %s finished — injecting agent notification for session %s chat=%s thread=%s", + session_id, + session_key, + source.chat_id, + source.thread_id, ) await adapter.handle_message(synth_event) except Exception as e: @@ -7475,6 +8529,108 @@ class GatewayRunner: override = self._session_model_overrides.get(session_key) return override is not None and override.get("model") == agent_model + def _release_running_agent_state(self, session_key: str) -> None: + """Pop ALL per-running-agent state entries for ``session_key``. + + Replaces ad-hoc ``del self._running_agents[key]`` calls scattered + across the gateway. Those sites had drifted: some popped only + ``_running_agents``; some also ``_running_agents_ts``; only one + path also cleared ``_busy_ack_ts``. Each missed entry was a + small, persistent leak — a (str_key → float) tuple per session + per gateway lifetime. + + Use this at every site that ends a running turn, regardless of + cause (normal completion, /stop, /reset, /resume, sentinel + cleanup, stale-eviction). Per-session state that PERSISTS + across turns (``_session_model_overrides``, ``_voice_mode``, + ``_pending_approvals``, ``_update_prompt_pending``) is NOT + touched here — those have their own lifecycles. + """ + if not session_key: + return + self._running_agents.pop(session_key, None) + self._running_agents_ts.pop(session_key, None) + if hasattr(self, "_busy_ack_ts"): + self._busy_ack_ts.pop(session_key, None) + + def _begin_session_run_generation(self, session_key: str) -> int: + """Claim a fresh run generation token for ``session_key``. + + Every top-level gateway turn gets a monotonically increasing token. + If a later command like /stop or /new invalidates that token while the + old worker is still unwinding, the late result can be recognized and + dropped instead of bleeding into the fresh session. + """ + if not session_key: + return 0 + generations = self.__dict__.get("_session_run_generation") + if generations is None: + generations = {} + self._session_run_generation = generations + next_generation = int(generations.get(session_key, 0)) + 1 + generations[session_key] = next_generation + return next_generation + + def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int: + """Invalidate any in-flight run token for ``session_key``.""" + generation = self._begin_session_run_generation(session_key) + if reason: + logger.info( + "Invalidated run generation for %s → %d (%s)", + session_key[:20], + generation, + reason, + ) + return generation + + def _is_session_run_current(self, session_key: str, generation: int) -> bool: + """Return True when ``generation`` is still current for ``session_key``.""" + if not session_key: + return True + generations = self.__dict__.get("_session_run_generation") or {} + return int(generations.get(session_key, 0)) == int(generation) + + def _bind_adapter_run_generation( + self, + adapter: Any, + session_key: str, + generation: int | None, + ) -> None: + """Bind a gateway run generation to the adapter's active-session event.""" + if not adapter or not session_key or generation is None: + return + try: + interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key) + if interrupt_event is not None: + setattr(interrupt_event, "_hermes_run_generation", int(generation)) + except Exception: + pass + + async def _interrupt_and_clear_session( + self, + session_key: str, + source: SessionSource, + *, + interrupt_reason: str, + invalidation_reason: str, + release_running_state: bool = True, + ) -> None: + """Interrupt the current run and clear queued session state consistently.""" + if not session_key: + return + running_agent = self._running_agents.get(session_key) + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + running_agent.interrupt(interrupt_reason) + self._invalidate_session_run_generation(session_key, reason=invalidation_reason) + adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(session_key, source.chat_id) + if adapter and hasattr(adapter, "get_pending_message"): + adapter.get_pending_message(session_key) # consume and discard + self._pending_messages.pop(session_key, None) + if release_running_state: + self._release_running_agent_state(session_key) + def _evict_cached_agent(self, session_key: str) -> None: """Remove a cached agent for a session (called on /new, /model, etc).""" _lock = getattr(self, "_agent_cache_lock", None) @@ -7482,6 +8638,153 @@ class GatewayRunner: with _lock: self._agent_cache.pop(session_key, None) + def _release_evicted_agent_soft(self, agent: Any) -> None: + """Soft cleanup for cache-evicted agents — preserves session tool state. + + Called from _enforce_agent_cache_cap and _sweep_idle_cached_agents. + Distinct from _cleanup_agent_resources (full teardown) because a + cache-evicted session may resume at any time — its terminal + sandbox, browser daemon, and tracked bg processes must outlive + the Python AIAgent instance so the next agent built for the + same task_id inherits them. + """ + if agent is None: + return + try: + if hasattr(agent, "release_clients"): + agent.release_clients() + else: + # Older agent instance (shouldn't happen in practice) — + # fall back to the legacy full-close path. + self._cleanup_agent_resources(agent) + except Exception: + pass + + def _enforce_agent_cache_cap(self) -> None: + """Evict oldest cached agents when cache exceeds _AGENT_CACHE_MAX_SIZE. + + Must be called with _agent_cache_lock held. Resource cleanup + (memory provider shutdown, tool resource close) is scheduled + on a daemon thread so the caller doesn't block on slow teardown + while holding the cache lock. + + Agents currently in _running_agents are SKIPPED — their clients, + terminal sandboxes, background processes, and child subagents + are all in active use by the running turn. Evicting them would + tear down those resources mid-turn and crash the request. If + every candidate in the LRU order is active, we simply leave the + cache over the cap; it will be re-checked on the next insert. + """ + _cache = getattr(self, "_agent_cache", None) + if _cache is None: + return + # OrderedDict.popitem(last=False) pops oldest; plain dict lacks the + # arg so skip enforcement if a test fixture swapped the cache type. + if not hasattr(_cache, "move_to_end"): + return + + # Snapshot of agent instances that are actively mid-turn. Use id() + # so the lookup is O(1) and doesn't depend on AIAgent.__eq__ (which + # MagicMock overrides in tests). + running_ids = { + id(a) + for a in getattr(self, "_running_agents", {}).values() + if a is not None and a is not _AGENT_PENDING_SENTINEL + } + + # Walk LRU → MRU and evict excess-LRU entries that aren't mid-turn. + # We only consider entries in the first (size - cap) LRU positions + # as eviction candidates. If one of those slots is held by an + # active agent, we SKIP it without compensating by evicting a + # newer entry — that would penalise a freshly-inserted session + # (which has no cache history to retain) while protecting an + # already-cached long-running one. The cache may therefore stay + # temporarily over cap; it will re-check on the next insert, + # after active turns have finished. + excess = max(0, len(_cache) - _AGENT_CACHE_MAX_SIZE) + evict_plan: List[tuple] = [] # [(key, agent), ...] + if excess > 0: + ordered_keys = list(_cache.keys()) + for key in ordered_keys[:excess]: + entry = _cache.get(key) + agent = entry[0] if isinstance(entry, tuple) and entry else None + if agent is not None and id(agent) in running_ids: + continue # active mid-turn; don't evict, don't substitute + evict_plan.append((key, agent)) + + for key, _ in evict_plan: + _cache.pop(key, None) + + remaining_over_cap = len(_cache) - _AGENT_CACHE_MAX_SIZE + if remaining_over_cap > 0: + logger.warning( + "Agent cache over cap (%d > %d); %d excess slot(s) held by " + "mid-turn agents — will re-check on next insert.", + len(_cache), _AGENT_CACHE_MAX_SIZE, remaining_over_cap, + ) + + for key, agent in evict_plan: + logger.info( + "Agent cache at cap; evicting LRU session=%s (cache_size=%d)", + key, len(_cache), + ) + if agent is not None: + threading.Thread( + target=self._release_evicted_agent_soft, + args=(agent,), + daemon=True, + name=f"agent-cache-evict-{key[:24]}", + ).start() + + def _sweep_idle_cached_agents(self) -> int: + """Evict cached agents whose AIAgent has been idle > _AGENT_CACHE_IDLE_TTL_SECS. + + Safe to call from the session expiry watcher without holding the + cache lock — acquires it internally. Returns the number of entries + evicted. Resource cleanup is scheduled on daemon threads. + + Agents currently in _running_agents are SKIPPED for the same reason + as _enforce_agent_cache_cap: tearing down an active turn's clients + mid-flight would crash the request. + """ + _cache = getattr(self, "_agent_cache", None) + _lock = getattr(self, "_agent_cache_lock", None) + if _cache is None or _lock is None: + return 0 + now = time.time() + to_evict: List[tuple] = [] + running_ids = { + id(a) + for a in getattr(self, "_running_agents", {}).values() + if a is not None and a is not _AGENT_PENDING_SENTINEL + } + with _lock: + for key, entry in list(_cache.items()): + agent = entry[0] if isinstance(entry, tuple) and entry else None + if agent is None: + continue + if id(agent) in running_ids: + continue # mid-turn — don't tear it down + last_activity = getattr(agent, "_last_activity_ts", None) + if last_activity is None: + continue + if (now - last_activity) > _AGENT_CACHE_IDLE_TTL_SECS: + to_evict.append((key, agent)) + for key, _ in to_evict: + _cache.pop(key, None) + for key, agent in to_evict: + logger.info( + "Agent cache idle-TTL evict: session=%s (idle=%.0fs)", + key, now - getattr(agent, "_last_activity_ts", now), + ) + threading.Thread( + target=self._release_evicted_agent_soft, + args=(agent,), + daemon=True, + name=f"agent-cache-idle-{key[:24]}", + ).start() + return len(to_evict) + # ------------------------------------------------------------------ # Proxy mode: forward messages to a remote Hermes API server # ------------------------------------------------------------------ @@ -7509,6 +8812,7 @@ class GatewayRunner: source: "SessionSource", session_id: str, session_key: str = None, + run_generation: Optional[int] = None, event_message_id: Optional[str] = None, ) -> Dict[str, Any]: """Forward the message to a remote Hermes API server instead of @@ -7544,6 +8848,11 @@ class GatewayRunner: proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip() + def _run_still_current() -> bool: + if run_generation is None or not session_key: + return True + return self._is_session_run_current(session_key, run_generation) + # Build messages in OpenAI chat format -------------------------- # # The remote api_server can maintain session continuity via @@ -7613,12 +8922,15 @@ class GatewayRunner: if _adapter: _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True) _effective_cursor = _scfg.cursor if _adapter_supports_edit else "" + _buffer_only = False if source.platform == Platform.MATRIX: _effective_cursor = "" + _buffer_only = True _consumer_cfg = StreamConsumerConfig( edit_interval=_scfg.edit_interval, buffer_threshold=_scfg.buffer_threshold, cursor=_effective_cursor, + buffer_only=_buffer_only, ) _stream_consumer = GatewayStreamConsumer( adapter=_adapter, @@ -7670,6 +8982,21 @@ class GatewayRunner: # Parse SSE stream buffer = "" async for chunk in resp.content.iter_any(): + if not _run_still_current(): + logger.info( + "Discarding stale proxy stream for %s — generation %d is no longer current", + session_key[:20] if session_key else "?", + run_generation or 0, + ) + return { + "final_response": "", + "messages": [], + "api_calls": 0, + "tools": [], + "history_offset": len(history), + "session_id": session_id, + "response_previewed": False, + } text = chunk.decode("utf-8", errors="replace") buffer += text @@ -7719,6 +9046,21 @@ class GatewayRunner: stream_task.cancel() _elapsed = time.time() - _start + if not _run_still_current(): + logger.info( + "Discarding stale proxy result for %s — generation %d is no longer current", + session_key[:20] if session_key else "?", + run_generation or 0, + ) + return { + "final_response": "", + "messages": [], + "api_calls": 0, + "tools": [], + "history_offset": len(history), + "session_id": session_id, + "response_previewed": False, + } logger.info( "proxy response: url=%s session=%s time=%.1fs response=%d chars", proxy_url, (session_id or "")[:20], _elapsed, len(full_response), @@ -7747,8 +9089,10 @@ class GatewayRunner: source: SessionSource, session_id: str, session_key: str = None, + run_generation: Optional[int] = None, _interrupt_depth: int = 0, event_message_id: Optional[str] = None, + channel_prompt: Optional[str] = None, ) -> Dict[str, Any]: """ Run the agent with the given message and context. @@ -7771,11 +9115,17 @@ class GatewayRunner: source=source, session_id=session_id, session_key=session_key, + run_generation=run_generation, event_message_id=event_message_id, ) from run_agent import AIAgent import queue + + def _run_still_current() -> bool: + if run_generation is None or not session_key: + return True + return self._is_session_run_current(session_key, run_generation) user_config = _load_gateway_config() platform_key = _platform_config_key(source.platform) @@ -7830,7 +9180,7 @@ class GatewayRunner: def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs): """Callback invoked by agent on tool lifecycle events.""" - if not progress_queue: + if not progress_queue or not _run_still_current(): return # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.) @@ -7935,6 +9285,14 @@ class GatewayRunner: while True: try: + if not _run_still_current(): + while not progress_queue.empty(): + try: + progress_queue.get_nowait() + except Exception: + break + return + raw = progress_queue.get_nowait() # Handle dedup messages: update last line with repeat counter @@ -7960,6 +9318,9 @@ class GatewayRunner: await asyncio.sleep(_remaining) continue + if not _run_still_current(): + return + if can_edit and progress_msg_id is not None: # Try to edit the existing progress message full_text = "\n".join(progress_lines) @@ -7995,7 +9356,8 @@ class GatewayRunner: # Restore typing indicator await asyncio.sleep(0.3) - await adapter.send_typing(source.chat_id, metadata=_progress_metadata) + if _run_still_current(): + await adapter.send_typing(source.chat_id, metadata=_progress_metadata) except queue.Empty: await asyncio.sleep(0.3) @@ -8035,10 +9397,12 @@ class GatewayRunner: stream_consumer_holder = [None] # Mutable container for stream consumer # Bridge sync step_callback → async hooks.emit for agent:step events - _loop_for_step = asyncio.get_event_loop() + _loop_for_step = asyncio.get_running_loop() _hooks_ref = self.hooks def _step_callback_sync(iteration: int, prev_tools: list) -> None: + if not _run_still_current(): + return try: # prev_tools may be list[str] or list[dict] with "name"/"result" # keys. Normalise to keep "tool_names" backward-compatible for @@ -8069,7 +9433,7 @@ class GatewayRunner: _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None def _status_callback_sync(event_type: str, message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return try: asyncio.run_coroutine_threadsafe( @@ -8103,8 +9467,12 @@ class GatewayRunner: # Platform.LOCAL ("local") maps to "cli"; others pass through as-is. platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value - # Combine platform context with user-configured ephemeral system prompt + # Combine platform context, per-channel context, and the user-configured + # ephemeral system prompt. combined_ephemeral = context_prompt or "" + event_channel_prompt = (channel_prompt or "").strip() + if event_channel_prompt: + combined_ephemeral = (combined_ephemeral + "\n\n" + event_channel_prompt).strip() if self._ephemeral_system_prompt: combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip() @@ -8179,12 +9547,15 @@ class GatewayRunner: # Some Matrix clients render the streaming cursor # as a visible tofu/white-box artifact. Keep # streaming text on Matrix, but suppress the cursor. + _buffer_only = False if source.platform == Platform.MATRIX: _effective_cursor = "" + _buffer_only = True _consumer_cfg = StreamConsumerConfig( edit_interval=_scfg.edit_interval, buffer_threshold=_scfg.buffer_threshold, cursor=_effective_cursor, + buffer_only=_buffer_only, ) _stream_consumer = GatewayStreamConsumer( adapter=_adapter, @@ -8193,12 +9564,16 @@ class GatewayRunner: metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None, ) if _want_stream_deltas: - _stream_delta_cb = _stream_consumer.on_delta + def _stream_delta_cb(text: str) -> None: + if _run_still_current(): + _stream_consumer.on_delta(text) stream_consumer_holder[0] = _stream_consumer except Exception as _sc_err: logger.debug("Could not set up stream consumer: %s", _sc_err) def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None: + if not _run_still_current(): + return if _stream_consumer is not None: if already_streamed: _stream_consumer.on_segment_break() @@ -8238,6 +9613,19 @@ class GatewayRunner: cached = _cache.get(session_key) if cached and cached[1] == _sig: agent = cached[0] + # Refresh LRU order so the cap enforcement evicts + # truly-oldest entries, not the one we just used. + if hasattr(_cache, "move_to_end"): + try: + _cache.move_to_end(session_key) + except KeyError: + pass + # Reset activity timestamp so the inactivity timeout + # handler doesn't see stale idle time from the previous + # turn and immediately kill this agent. (#9051) + agent._last_activity_ts = time.time() + agent._last_activity_desc = "starting new turn (cached)" + agent._api_call_count = 0 logger.debug("Reusing cached agent for session %s", session_key) if agent is None: @@ -8263,12 +9651,14 @@ class GatewayRunner: session_id=session_id, platform=platform_key, user_id=source.user_id, + gateway_session_key=session_key, session_db=self._session_db, fallback_model=self._fallback_model, ) if _cache_lock and _cache is not None: with _cache_lock: _cache[session_key] = (agent, _sig) + self._enforce_agent_cache_cap() logger.debug("Created new agent for session %s (sig=%s)", session_key, _sig) # Per-message state — callbacks and reasoning config change every @@ -8282,9 +9672,12 @@ class GatewayRunner: agent.service_tier = self._service_tier agent.request_overrides = turn_route.get("request_overrides") - # Background review delivery — send "💾 Memory updated" etc. to user - def _bg_review_send(message: str) -> None: - if not _status_adapter: + _bg_review_release = threading.Event() + _bg_review_pending: list[str] = [] + _bg_review_pending_lock = threading.Lock() + + def _deliver_bg_review_message(message: str) -> None: + if not _status_adapter or not _run_still_current(): return try: asyncio.run_coroutine_threadsafe( @@ -8298,7 +9691,39 @@ class GatewayRunner: except Exception as _e: logger.debug("background_review_callback error: %s", _e) + def _release_bg_review_messages() -> None: + _bg_review_release.set() + with _bg_review_pending_lock: + pending = list(_bg_review_pending) + _bg_review_pending.clear() + for queued in pending: + _deliver_bg_review_message(queued) + + # Background review delivery — send "💾 Memory updated" etc. to user + def _bg_review_send(message: str) -> None: + if not _status_adapter or not _run_still_current(): + return + if not _bg_review_release.is_set(): + with _bg_review_pending_lock: + if not _bg_review_release.is_set(): + _bg_review_pending.append(message) + return + _deliver_bg_review_message(message) + agent.background_review_callback = _bg_review_send + # Register the release hook on the adapter so base.py's finally + # block can fire it after delivering the main response. + if _status_adapter and session_key: + if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None: + _status_adapter.register_post_delivery_callback( + session_key, + _release_bg_review_messages, + generation=run_generation, + ) + else: + _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None) + if _pdc is not None: + _pdc[session_key] = _release_bg_review_messages # Store agent reference for interrupt support agent_holder[0] = agent @@ -8407,7 +9832,7 @@ class GatewayRunner: # false positives from MagicMock auto-attribute creation in tests. if getattr(type(_status_adapter), "send_exec_approval", None) is not None: try: - asyncio.run_coroutine_threadsafe( + _approval_result = asyncio.run_coroutine_threadsafe( _status_adapter.send_exec_approval( chat_id=_status_chat_id, command=cmd, @@ -8417,7 +9842,12 @@ class GatewayRunner: ), _loop_for_step, ).result(timeout=15) - return + if _approval_result.success: + return + logger.warning( + "Button-based approval failed (send returned error), falling back to text: %s", + _approval_result.error, + ) except Exception as _e: logger.warning( "Button-based approval failed, falling back to text: %s", _e @@ -8450,6 +9880,54 @@ class GatewayRunner: if _msn: message = _msn + "\n\n" + message + # Auto-continue: if the loaded history ends with a tool result, + # the previous agent turn was interrupted mid-work (gateway + # restart, crash, SIGTERM). Prepend a system note so the model + # finishes processing the pending tool results before addressing + # the user's new message. (#4493) + # + # Session-level resume_pending (set on drain-timeout shutdown) + # escalates the wording — the transcript's last role may be + # anything (tool, assistant with unfinished work, etc.), so we + # give a stronger, reason-aware instruction that subsumes the + # tool-tail case. + _resume_entry = None + if session_key: + try: + _resume_entry = self.session_store._entries.get(session_key) + except Exception: + _resume_entry = None + _is_resume_pending = bool( + _resume_entry is not None and getattr(_resume_entry, "resume_pending", False) + ) + + if _is_resume_pending: + _reason = getattr(_resume_entry, "resume_reason", None) or "restart_timeout" + _reason_phrase = ( + "a gateway restart" + if _reason == "restart_timeout" + else "a gateway shutdown" + if _reason == "shutdown_timeout" + else "a gateway interruption" + ) + message = ( + f"[System note: Your previous turn in this session was interrupted " + f"by {_reason_phrase}. The conversation history below is intact. " + f"If it contains unfinished tool result(s), process them first and " + f"summarize what was accomplished, then address the user's new " + f"message below.]\n\n" + + message + ) + elif agent_history and agent_history[-1].get("role") == "tool": + message = ( + "[System note: Your previous turn was interrupted before you could " + "process the last tool result(s). The conversation history contains " + "tool outputs you haven't responded to yet. Please finish processing " + "those results and summarize what was accomplished, then address the " + "user's new message below.]\n\n" + + message + ) + _approval_session_key = session_key or "" _approval_session_token = set_current_session_key(_approval_session_key) register_gateway_notify(_approval_session_key, _approval_notify_sync) @@ -8479,11 +9957,13 @@ class GatewayRunner: _resolved_model = getattr(_agent, "model", None) if _agent else None if not final_response: - error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)" + error_msg = f"⚠️ {result['error']}" if result.get("error") else "" return { "final_response": error_msg, "messages": result.get("messages", []), "api_calls": result.get("api_calls", 0), + "failed": result.get("failed", False), + "compression_exhausted": result.get("compression_exhausted", False), "tools": tools_holder[0] or [], "history_offset": len(agent_history), "last_prompt_tokens": _last_prompt_toks, @@ -8721,9 +10201,8 @@ class GatewayRunner: _agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900)) _agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None _warning_fired = False - loop = asyncio.get_event_loop() _executor_task = asyncio.ensure_future( - loop.run_in_executor(None, run_sync) + self._run_in_executor_with_context(run_sync) ) _inactivity_timeout = False @@ -8846,7 +10325,7 @@ class GatewayRunner: # Interrupt the agent if it's still running so the thread # pool worker is freed. if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"): - _timed_out_agent.interrupt("Execution timed out (inactivity)") + _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT) _timeout_mins = int(_agent_timeout // 60) or 1 @@ -8911,11 +10390,29 @@ class GatewayRunner: if result and adapter and session_key: pending_event = _dequeue_pending_event(adapter, session_key) if result.get("interrupted") and not pending_event and result.get("interrupt_message"): - pending = result.get("interrupt_message") + interrupt_message = result.get("interrupt_message") + if _is_control_interrupt_message(interrupt_message): + logger.info( + "Ignoring control interrupt message for session %s: %s", + session_key[:20] if session_key else "?", + interrupt_message, + ) + else: + pending = interrupt_message elif pending_event: pending = pending_event.text or _build_media_placeholder(pending_event) logger.debug("Processing queued message after agent completion: '%s...'", pending[:40]) + # Leftover /steer: if a steer arrived after the last tool batch + # (e.g. during the final API call), the agent couldn't inject it + # and returned it in result["pending_steer"]. Deliver it as the + # next user turn so it isn't silently dropped. + if result and not pending and not pending_event: + _leftover_steer = result.get("pending_steer") + if _leftover_steer: + pending = _leftover_steer + logger.debug("Delivering leftover /steer as next turn: '%s...'", pending[:40]) + # Safety net: if the pending text is a slash command (e.g. "/stop", # "/new"), discard it — commands should never be passed to the agent # as user input. The primary fix is in base.py (commands bypass the @@ -8988,20 +10485,18 @@ class GatewayRunner: pass except Exception as e: logger.debug("Stream consumer wait before queued message failed: %s", e) - _response_previewed = bool(result.get("response_previewed")) + _previewed = bool(result.get("response_previewed")) _already_streamed = bool( - _sc - and ( - getattr(_sc, "final_response_sent", False) - or ( - _response_previewed - and getattr(_sc, "already_sent", False) - ) - ) + (_sc and getattr(_sc, "final_response_sent", False)) + or _previewed ) first_response = result.get("final_response", "") if first_response and not _already_streamed: try: + logger.info( + "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.", + session_key[:20] if session_key else "?", + ) await adapter.send( source.chat_id, first_response, @@ -9009,6 +10504,32 @@ class GatewayRunner: ) except Exception as e: logger.warning("Failed to send first response before queued message: %s", e) + elif first_response: + logger.info( + "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.", + session_key[:20] if session_key else "?", + ) + # Release deferred bg-review notifications now that the + # first response has been delivered. Pop from the + # adapter's callback dict (prevents double-fire in + # base.py's finally block) and call it. + if getattr(type(adapter), "pop_post_delivery_callback", None) is not None: + _bg_cb = adapter.pop_post_delivery_callback( + session_key, + generation=run_generation, + ) + if callable(_bg_cb): + try: + _bg_cb() + except Exception: + pass + elif adapter and hasattr(adapter, "_post_delivery_callbacks"): + _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None) + if callable(_bg_cb): + try: + _bg_cb() + except Exception: + pass # else: interrupted — discard the interrupted response ("Operation # interrupted." is just noise; the user already knows they sent a # new message). @@ -9017,6 +10538,7 @@ class GatewayRunner: next_source = source next_message = pending next_message_id = None + next_channel_prompt = None if pending_event is not None: next_source = getattr(pending_event, "source", None) or source next_message = await self._prepare_inbound_message_text( @@ -9027,6 +10549,20 @@ class GatewayRunner: if next_message is None: return result next_message_id = getattr(pending_event, "message_id", None) + next_channel_prompt = getattr(pending_event, "channel_prompt", None) + + # Restart typing indicator so the user sees activity while + # the follow-up turn runs. The outer _process_message_background + # typing task is still alive but may be stale. + _followup_adapter = self.adapters.get(source.platform) + if _followup_adapter: + try: + await _followup_adapter.send_typing( + source.chat_id, + metadata=_status_thread_metadata, + ) + except Exception: + pass return await self._run_agent( message=next_message, @@ -9035,8 +10571,10 @@ class GatewayRunner: source=next_source, session_id=session_id, session_key=session_key, + run_generation=run_generation, _interrupt_depth=_interrupt_depth + 1, event_message_id=next_message_id, + channel_prompt=next_channel_prompt, ) finally: # Stop progress sender, interrupt monitor, and notification task @@ -9058,10 +10596,8 @@ class GatewayRunner: # Clean up tracking tracking_task.cancel() - if session_key and session_key in self._running_agents: - del self._running_agents[session_key] if session_key: - self._running_agents_ts.pop(session_key, None) + self._release_running_agent_state(session_key) if self._draining: self._update_runtime_status("draining") @@ -9078,16 +10614,31 @@ class GatewayRunner: # BUT: never suppress delivery when the agent failed — the error # message is new content the user hasn't seen, and it must reach # them even if streaming had sent earlier partial output. + # + # Also never suppress when the final response is "(empty)" — this + # means the model failed to produce content after tool calls (common + # with mimo-v2-pro, GLM-5, etc.). The stream consumer may have + # sent intermediate text ("Let me search for that…") alongside the + # tool call, setting already_sent=True, but that text is NOT the + # final answer. Suppressing delivery here leaves the user staring + # at silence. (#10xxx — "agent stops after web search") _sc = stream_consumer_holder[0] - if _sc and isinstance(response, dict) and not response.get("failed"): - _response_previewed = bool(response.get("response_previewed")) - if ( - getattr(_sc, "final_response_sent", False) - or ( - _response_previewed - and getattr(_sc, "already_sent", False) + if isinstance(response, dict) and not response.get("failed"): + _final = response.get("final_response") or "" + _is_empty_sentinel = not _final or _final == "(empty)" + _streamed = bool( + _sc and getattr(_sc, "final_response_sent", False) + ) + # response_previewed means the interim_assistant_callback already + # sent the final text via the adapter (non-streaming path). + _previewed = bool(response.get("response_previewed")) + if not _is_empty_sentinel and (_streamed or _previewed): + logger.info( + "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).", + session_key[:20] if session_key else "?", + _streamed, + _previewed, ) - ): response["already_sent"] = True return response @@ -9175,6 +10726,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = "Replacing existing gateway instance (PID %d) with --replace.", existing_pid, ) + # Record a takeover marker so the target's shutdown handler + # recognises its SIGTERM as a planned takeover and exits 0 + # (rather than exit 1, which would trigger systemd's + # Restart=on-failure and start a flap loop against us). + # Best-effort — proceed even if the write fails. + try: + from gateway.status import write_takeover_marker + write_takeover_marker(existing_pid) + except Exception as e: + logger.debug("Could not write takeover marker: %s", e) try: terminate_pid(existing_pid, force=False) except ProcessLookupError: @@ -9184,6 +10745,13 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = "Permission denied killing PID %d. Cannot replace.", existing_pid, ) + # Marker is scoped to a specific target; clean it up on + # give-up so it doesn't grief an unrelated future shutdown. + try: + from gateway.status import clear_takeover_marker + clear_takeover_marker() + except Exception: + pass return False # Wait up to 10 seconds for the old process to exit for _ in range(20): @@ -9204,6 +10772,13 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = except (ProcessLookupError, PermissionError, OSError): pass remove_pid_file() + # Clean up any takeover marker the old process didn't consume + # (e.g. SIGKILL'd before its shutdown handler could read it). + try: + from gateway.status import clear_takeover_marker + clear_takeover_marker() + except Exception: + pass # Also release all scoped locks left by the old process. # Stopped (Ctrl+Z) processes don't release locks on exit, # leaving stale lock files that block the new gateway from starting. @@ -9271,14 +10846,56 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = # Set up signal handlers def shutdown_signal_handler(): nonlocal _signal_initiated_shutdown - _signal_initiated_shutdown = True - logger.info("Received SIGTERM/SIGINT — initiating shutdown") + # Planned --replace takeover check: when a sibling gateway is + # taking over via --replace, it wrote a marker naming this PID + # before sending SIGTERM. If present, treat the signal as a + # planned shutdown and exit 0 so systemd's Restart=on-failure + # doesn't revive us (which would flap-fight the replacer when + # both services are enabled, e.g. hermes.service + hermes- + # gateway.service from pre-rename installs). + planned_takeover = False + try: + from gateway.status import consume_takeover_marker_for_self + planned_takeover = consume_takeover_marker_for_self() + except Exception as e: + logger.debug("Takeover marker check failed: %s", e) + + if planned_takeover: + logger.info( + "Received SIGTERM as a planned --replace takeover — exiting cleanly" + ) + else: + _signal_initiated_shutdown = True + logger.info("Received SIGTERM/SIGINT — initiating shutdown") + # Diagnostic: log all hermes-related processes so we can identify + # what triggered the signal (hermes update, hermes gateway restart, + # a stale detached subprocess, etc.). + try: + import subprocess as _sp + _ps = _sp.run( + ["ps", "aux"], + capture_output=True, text=True, timeout=3, + ) + _hermes_procs = [ + line for line in _ps.stdout.splitlines() + if ("hermes" in line.lower() or "gateway" in line.lower()) + and str(os.getpid()) not in line.split()[1:2] # exclude self + ] + if _hermes_procs: + logger.warning( + "Shutdown diagnostic — other hermes processes running:\n %s", + "\n ".join(_hermes_procs), + ) + else: + logger.info("Shutdown diagnostic — no other hermes processes found") + except Exception: + pass asyncio.create_task(runner.stop()) def restart_signal_handler(): runner.request_restart(detached=False, via_service=True) - loop = asyncio.get_event_loop() + loop = asyncio.get_running_loop() if threading.current_thread() is threading.main_thread(): for sig in (signal.SIGINT, signal.SIGTERM): try: @@ -9372,9 +10989,9 @@ def main(): config = None if args.config: - import json + import yaml with open(args.config, encoding="utf-8") as f: - data = json.load(f) + data = yaml.safe_load(f) config = GatewayConfig.from_dict(data) # Run the gateway - exit with code 1 if no platforms connected, diff --git a/gateway/session.py b/gateway/session.py index 33165dcd9d..81278e8521 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -82,6 +82,7 @@ class SessionSource: chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack) user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number) chat_id_alt: Optional[str] = None # Signal group internal ID + is_bot: bool = False # True when the message author is a bot/webhook (Discord) @property def description(self) -> str: @@ -301,6 +302,8 @@ def build_session_context_prompt( lines.append("") lines.append("**Delivery options for scheduled tasks:**") + from hermes_constants import display_hermes_home + # Origin delivery if context.source.platform == Platform.LOCAL: lines.append("- `\"origin\"` → Local output (saved to files)") @@ -309,9 +312,11 @@ def build_session_context_prompt( _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id ) lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})") - + # Local always available - lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)") + lines.append( + f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)" + ) # Platform home channels for platform, home in context.home_channels.items(): @@ -372,7 +377,19 @@ class SessionEntry: # this session (create a new session_id) so the user starts fresh. # Set by /stop to break stuck-resume loops (#7536). suspended: bool = False - + + # When True the session was interrupted by a gateway restart/shutdown + # drain timeout, but recovery is still expected. Unlike ``suspended``, + # ``resume_pending`` preserves the existing session_id on next access — + # the user stays on the same transcript and the agent auto-continues + # from where it left off. Cleared after the next successful turn. + # Escalation to ``suspended`` is handled by the existing + # ``.restart_failure_counts`` stuck-loop counter (#7536), not by a + # parallel counter on this entry. + resume_pending: bool = False + resume_reason: Optional[str] = None # e.g. "restart_timeout" + last_resume_marked_at: Optional[datetime] = None + def to_dict(self) -> Dict[str, Any]: result = { "session_key": self.session_key, @@ -392,6 +409,13 @@ class SessionEntry: "cost_status": self.cost_status, "memory_flushed": self.memory_flushed, "suspended": self.suspended, + "resume_pending": self.resume_pending, + "resume_reason": self.resume_reason, + "last_resume_marked_at": ( + self.last_resume_marked_at.isoformat() + if self.last_resume_marked_at + else None + ), } if self.origin: result["origin"] = self.origin.to_dict() @@ -409,7 +433,15 @@ class SessionEntry: platform = Platform(data["platform"]) except ValueError as e: logger.debug("Unknown platform value %r: %s", data["platform"], e) - + + last_resume_marked_at = None + _lrma = data.get("last_resume_marked_at") + if _lrma: + try: + last_resume_marked_at = datetime.fromisoformat(_lrma) + except (TypeError, ValueError): + last_resume_marked_at = None + return cls( session_key=data["session_key"], session_id=data["session_id"], @@ -429,6 +461,9 @@ class SessionEntry: cost_status=data.get("cost_status", "unknown"), memory_flushed=data.get("memory_flushed", False), suspended=data.get("suspended", False), + resume_pending=data.get("resume_pending", False), + resume_reason=data.get("resume_reason"), + last_resume_marked_at=last_resume_marked_at, ) @@ -705,9 +740,23 @@ class SessionStore: entry = self._entries[session_key] # Auto-reset sessions marked as suspended (e.g. after /stop - # broke a stuck loop — #7536). + # broke a stuck loop — #7536). ``suspended`` is the hard + # forced-wipe signal and always wins over ``resume_pending``, + # so repeated interrupted restarts that escalate via the + # existing ``.restart_failure_counts`` stuck-loop counter + # still converge to a clean slate. if entry.suspended: reset_reason = "suspended" + elif entry.resume_pending: + # Restart-interrupted session: preserve the session_id + # and return the existing entry so the transcript + # reloads intact. ``resume_pending`` is cleared after + # the NEXT successful turn completes (not here), which + # means a re-interrupted retry keeps trying — the + # stuck-loop counter handles terminal escalation. + entry.updated_at = now + self._save() + return entry else: reset_reason = self._should_reset(entry, source) if not reset_reason: @@ -797,6 +846,112 @@ class SessionStore: return True return False + def mark_resume_pending( + self, + session_key: str, + reason: str = "restart_timeout", + ) -> bool: + """Mark a session as resumable after a restart interruption. + + Unlike ``suspend_session()``, this preserves the existing + ``session_id`` and the transcript. The next call to + ``get_or_create_session()`` for this key returns the same entry + so the user auto-resumes on the same conversation lane. + + Returns True if the session existed and was marked. + """ + with self._lock: + self._ensure_loaded_locked() + if session_key in self._entries: + entry = self._entries[session_key] + # Never override an explicit ``suspended`` — that is a hard + # forced-wipe signal (from /stop or stuck-loop escalation). + if entry.suspended: + return False + entry.resume_pending = True + entry.resume_reason = reason + entry.last_resume_marked_at = _now() + self._save() + return True + return False + + def clear_resume_pending(self, session_key: str) -> bool: + """Clear the resume-pending flag after a successful resumed turn. + + Called from the gateway after ``run_conversation()`` returns a + final response for a session that had ``resume_pending=True``, + signalling that recovery succeeded. + + Returns True if a flag was cleared. + """ + with self._lock: + self._ensure_loaded_locked() + entry = self._entries.get(session_key) + if entry is None or not entry.resume_pending: + return False + entry.resume_pending = False + entry.resume_reason = None + entry.last_resume_marked_at = None + self._save() + return True + + def prune_old_entries(self, max_age_days: int) -> int: + """Drop SessionEntry records older than max_age_days. + + Pruning is based on ``updated_at`` (last activity), not ``created_at``. + A session that's been active within the window is kept regardless of + how old it is. Entries marked ``suspended`` are kept — the user + explicitly paused them for later resume. Entries held by an active + process (via has_active_processes_fn) are also kept so long-running + background work isn't orphaned. + + Pruning is functionally identical to a natural reset-policy expiry: + the transcript in SQLite stays, but the session_key → session_id + mapping is dropped and the user starts a fresh session on return. + + ``max_age_days <= 0`` disables pruning; returns 0 immediately. + Returns the number of entries removed. + """ + if max_age_days is None or max_age_days <= 0: + return 0 + from datetime import timedelta + + cutoff = _now() - timedelta(days=max_age_days) + removed_keys: list[str] = [] + + with self._lock: + self._ensure_loaded_locked() + for key, entry in list(self._entries.items()): + if entry.suspended: + continue + # Never prune sessions with an active background process + # attached — the user may still be waiting on output. + # The callback is keyed by session_key (see process_registry. + # has_active_for_session); passing session_id here used to + # never match, so active sessions got pruned anyway. + if self._has_active_processes_fn is not None: + try: + if self._has_active_processes_fn(entry.session_key): + continue + except Exception as exc: + logger.debug( + "has_active_processes_fn raised during prune for %s: %s", + entry.session_key, exc, + ) + if entry.updated_at < cutoff: + removed_keys.append(key) + for key in removed_keys: + self._entries.pop(key, None) + if removed_keys: + self._save() + + if removed_keys: + logger.info( + "SessionStore pruned %d entries older than %d days", + len(removed_keys), max_age_days, + ) + return len(removed_keys) + def suspend_recently_active(self, max_age_seconds: int = 120) -> int: """Mark recently-active sessions as suspended. @@ -805,6 +960,12 @@ class SessionStore: (#7536). Only suspends sessions updated within *max_age_seconds* to avoid resetting long-idle sessions that are harmless to resume. Returns the number of sessions that were suspended. + + Entries flagged ``resume_pending=True`` are skipped — those were + marked intentionally by the drain-timeout path as recoverable. + Terminal escalation for genuinely stuck ``resume_pending`` sessions + is handled by the existing ``.restart_failure_counts`` stuck-loop + counter, which runs after this method on startup. """ from datetime import timedelta @@ -813,6 +974,8 @@ class SessionStore: with self._lock: self._ensure_loaded_locked() for entry in self._entries.values(): + if entry.resume_pending: + continue if not entry.suspended and entry.updated_at >= cutoff: entry.suspended = True count += 1 diff --git a/gateway/session_context.py b/gateway/session_context.py index b9fdcdfaf7..7f8aca3eb9 100644 --- a/gateway/session_context.py +++ b/gateway/session_context.py @@ -37,18 +37,24 @@ needs to replace the import + call site: """ from contextvars import ContextVar +from typing import Any + +# Sentinel to distinguish "never set in this context" from "explicitly set to empty". +# When a contextvar holds _UNSET, we fall back to os.environ (CLI/cron compat). +# When it holds "" (after clear_session_vars resets it), we return "" — no fallback. +_UNSET: Any = object() # --------------------------------------------------------------------------- # Per-task session variables # --------------------------------------------------------------------------- -_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="") -_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="") -_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="") -_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="") -_SESSION_USER_ID: ContextVar[str] = ContextVar("HERMES_SESSION_USER_ID", default="") -_SESSION_USER_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_USER_NAME", default="") -_SESSION_KEY: ContextVar[str] = ContextVar("HERMES_SESSION_KEY", default="") +_SESSION_PLATFORM: ContextVar = ContextVar("HERMES_SESSION_PLATFORM", default=_UNSET) +_SESSION_CHAT_ID: ContextVar = ContextVar("HERMES_SESSION_CHAT_ID", default=_UNSET) +_SESSION_CHAT_NAME: ContextVar = ContextVar("HERMES_SESSION_CHAT_NAME", default=_UNSET) +_SESSION_THREAD_ID: ContextVar = ContextVar("HERMES_SESSION_THREAD_ID", default=_UNSET) +_SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNSET) +_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET) +_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET) _VAR_MAP = { "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM, @@ -91,10 +97,17 @@ def set_session_vars( def clear_session_vars(tokens: list) -> None: - """Restore session context variables to their pre-handler values.""" - if not tokens: - return - vars_in_order = [ + """Mark session context variables as explicitly cleared. + + Sets all variables to ``""`` so that ``get_session_env`` returns an empty + string instead of falling back to (potentially stale) ``os.environ`` + values. The *tokens* argument is accepted for API compatibility with + callers that saved the return value of ``set_session_vars``, but the + actual clearing uses ``var.set("")`` rather than ``var.reset(token)`` + to ensure the "explicitly cleared" state is distinguishable from + "never set" (which holds the ``_UNSET`` sentinel). + """ + for var in ( _SESSION_PLATFORM, _SESSION_CHAT_ID, _SESSION_CHAT_NAME, @@ -102,9 +115,8 @@ def clear_session_vars(tokens: list) -> None: _SESSION_USER_ID, _SESSION_USER_NAME, _SESSION_KEY, - ] - for var, token in zip(vars_in_order, tokens): - var.reset(token) + ): + var.set("") def get_session_env(name: str, default: str = "") -> str: @@ -113,8 +125,13 @@ def get_session_env(name: str, default: str = "") -> str: Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``. Resolution order: - 1. Context variable (set by the gateway for concurrency-safe access) - 2. ``os.environ`` (used by CLI, cron scheduler, and tests) + 1. Context variable (set by the gateway for concurrency-safe access). + If the variable was explicitly set (even to ``""``) via + ``set_session_vars`` or ``clear_session_vars``, that value is + returned — **no fallback to os.environ**. + 2. ``os.environ`` (only when the context variable was never set in + this context — i.e. CLI, cron scheduler, and test processes that + don't use ``set_session_vars`` at all). 3. *default* """ import os @@ -122,7 +139,7 @@ def get_session_env(name: str, default: str = "") -> str: var = _VAR_MAP.get(name) if var is not None: value = var.get() - if value: + if value is not _UNSET: return value # Fall back to os.environ for CLI, cron, and test compatibility return os.getenv(name, default) diff --git a/gateway/status.py b/gateway/status.py index becf9e8cb6..e1598e1797 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -188,8 +188,8 @@ def _write_json_file(path: Path, payload: dict[str, Any]) -> None: path.write_text(json.dumps(payload)) -def _read_pid_record() -> Optional[dict]: - pid_path = _get_pid_path() +def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]: + pid_path = pid_path or _get_pid_path() if not pid_path.exists(): return None @@ -212,6 +212,18 @@ def _read_pid_record() -> Optional[dict]: return None +def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None: + if not cleanup_stale: + return + try: + if pid_path == _get_pid_path(): + remove_pid_file() + else: + pid_path.unlink(missing_ok=True) + except Exception: + pass + + def write_pid_file() -> None: """Write the current process PID and metadata to the gateway PID file.""" _write_json_file(_get_pid_path(), _build_pid_record()) @@ -413,43 +425,179 @@ def release_all_scoped_locks() -> int: return removed -def get_running_pid() -> Optional[int]: +# ── --replace takeover marker ───────────────────────────────────────── +# +# When a new gateway starts with ``--replace``, it SIGTERMs the existing +# gateway so it can take over the bot token. PR #5646 made SIGTERM exit +# the gateway with code 1 so ``Restart=on-failure`` can revive it after +# unexpected kills — but that also means a --replace takeover target +# exits 1, which tricks systemd into reviving it 30 seconds later, +# starting a flap loop against the replacer when both services are +# enabled in the user's systemd (e.g. ``hermes.service`` + ``hermes- +# gateway.service``). +# +# The takeover marker breaks the loop: the replacer writes a short-lived +# file naming the target PID + start_time BEFORE sending SIGTERM. +# The target's shutdown handler reads the marker and, if it names +# this process, treats the SIGTERM as a planned takeover and exits 0. +# The marker is unlinked after the target has consumed it, so a stale +# marker left by a crashed replacer can grief at most one future +# shutdown on the same PID — and only within _TAKEOVER_MARKER_TTL_S. + +_TAKEOVER_MARKER_FILENAME = ".gateway-takeover.json" +_TAKEOVER_MARKER_TTL_S = 60 # Marker older than this is treated as stale + + +def _get_takeover_marker_path() -> Path: + """Return the path to the --replace takeover marker file.""" + home = get_hermes_home() + return home / _TAKEOVER_MARKER_FILENAME + + +def write_takeover_marker(target_pid: int) -> bool: + """Record that ``target_pid`` is being replaced by the current process. + + Captures the target's ``start_time`` so that PID reuse after the + target exits cannot later match the marker. Also records the + replacer's PID and a UTC timestamp for TTL-based staleness checks. + + Returns True on successful write, False on any failure. The caller + should proceed with the SIGTERM even if the write fails (the marker + is a best-effort signal, not a correctness requirement). + """ + try: + target_start_time = _get_process_start_time(target_pid) + record = { + "target_pid": target_pid, + "target_start_time": target_start_time, + "replacer_pid": os.getpid(), + "written_at": _utc_now_iso(), + } + _write_json_file(_get_takeover_marker_path(), record) + return True + except (OSError, PermissionError): + return False + + +def consume_takeover_marker_for_self() -> bool: + """Check & unlink the takeover marker if it names the current process. + + Returns True only when a valid (non-stale) marker names this PID + + start_time. A returning True indicates the current SIGTERM is a + planned --replace takeover; the caller should exit 0 instead of + signalling ``_signal_initiated_shutdown``. + + Always unlinks the marker on match (and on detected staleness) so + subsequent unrelated signals don't re-trigger. + """ + path = _get_takeover_marker_path() + record = _read_json_file(path) + if not record: + return False + + # Any malformed or stale marker → drop it and return False + try: + target_pid = int(record["target_pid"]) + target_start_time = record.get("target_start_time") + written_at = record.get("written_at") or "" + except (KeyError, TypeError, ValueError): + try: + path.unlink(missing_ok=True) + except OSError: + pass + return False + + # TTL guard: a stale marker older than _TAKEOVER_MARKER_TTL_S is ignored. + stale = False + try: + written_dt = datetime.fromisoformat(written_at) + age = (datetime.now(timezone.utc) - written_dt).total_seconds() + if age > _TAKEOVER_MARKER_TTL_S: + stale = True + except (TypeError, ValueError): + stale = True # Unparseable timestamp — treat as stale + + if stale: + try: + path.unlink(missing_ok=True) + except OSError: + pass + return False + + # Does the marker name THIS process? + our_pid = os.getpid() + our_start_time = _get_process_start_time(our_pid) + matches = ( + target_pid == our_pid + and target_start_time is not None + and our_start_time is not None + and target_start_time == our_start_time + ) + + # Consume the marker whether it matched or not — a marker that doesn't + # match our identity is stale-for-us anyway. + try: + path.unlink(missing_ok=True) + except OSError: + pass + + return matches + + +def clear_takeover_marker() -> None: + """Remove the takeover marker unconditionally. Safe to call repeatedly.""" + try: + _get_takeover_marker_path().unlink(missing_ok=True) + except OSError: + pass + + +def get_running_pid( + pid_path: Optional[Path] = None, + *, + cleanup_stale: bool = True, +) -> Optional[int]: """Return the PID of a running gateway instance, or ``None``. Checks the PID file and verifies the process is actually alive. Cleans up stale PID files automatically. """ - record = _read_pid_record() + resolved_pid_path = pid_path or _get_pid_path() + record = _read_pid_record(resolved_pid_path) if not record: - remove_pid_file() + _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale) return None try: pid = int(record["pid"]) except (KeyError, TypeError, ValueError): - remove_pid_file() + _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale) return None try: os.kill(pid, 0) # signal 0 = existence check, no actual signal sent except (ProcessLookupError, PermissionError): - remove_pid_file() + _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale) return None recorded_start = record.get("start_time") current_start = _get_process_start_time(pid) if recorded_start is not None and current_start is not None and current_start != recorded_start: - remove_pid_file() + _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale) return None if not _looks_like_gateway_process(pid): if not _record_looks_like_gateway(record): - remove_pid_file() + _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale) return None return pid -def is_gateway_running() -> bool: +def is_gateway_running( + pid_path: Optional[Path] = None, + *, + cleanup_stale: bool = True, +) -> bool: """Check if the gateway daemon is currently running.""" - return get_running_pid() is not None + return get_running_pid(pid_path, cleanup_stale=cleanup_stale) is not None diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index e6d96c802d..78e365712d 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -43,6 +43,7 @@ class StreamConsumerConfig: edit_interval: float = 1.0 buffer_threshold: int = 40 cursor: str = " ▉" + buffer_only: bool = False class GatewayStreamConsumer: @@ -99,6 +100,14 @@ class GatewayStreamConsumer: self._flood_strikes = 0 # Consecutive flood-control edit failures self._current_edit_interval = self.cfg.edit_interval # Adaptive backoff self._final_response_sent = False + # Cache adapter lifecycle capability: only platforms that need an + # explicit finalize call (e.g. DingTalk AI Cards) force us to make + # a redundant final edit. Everyone else keeps the fast path. + # Use ``is True`` (not ``bool(...)``) so MagicMock attribute access + # in tests doesn't incorrectly enable this path. + self._adapter_requires_finalize: bool = ( + getattr(adapter, "REQUIRES_EDIT_FINALIZE", False) is True + ) # Think-block filter state (mirrors CLI's _stream_delta tag suppression) self._in_think_block = False @@ -295,10 +304,13 @@ class GatewayStreamConsumer: got_done or got_segment_break or commentary_text is not None - or (elapsed >= self._current_edit_interval - and self._accumulated) - or len(self._accumulated) >= self.cfg.buffer_threshold ) + if not self.cfg.buffer_only: + should_edit = should_edit or ( + (elapsed >= self._current_edit_interval + and self._accumulated) + or len(self._accumulated) >= self.cfg.buffer_threshold + ) current_update_visible = False if should_edit and self._accumulated: @@ -357,7 +369,16 @@ class GatewayStreamConsumer: if not got_done and not got_segment_break and commentary_text is None: display_text += self.cfg.cursor - current_update_visible = await self._send_or_edit(display_text) + # Segment break: finalize the current message so platforms + # that need explicit closure (e.g. DingTalk AI Cards) don't + # leave the previous segment stuck in a loading state when + # the next segment (tool progress, next chunk) creates a + # new message below it. got_done has its own finalize + # path below so we don't finalize here for it. + current_update_visible = await self._send_or_edit( + display_text, + finalize=got_segment_break, + ) self._last_edit_time = time.monotonic() if got_done: @@ -368,10 +389,22 @@ class GatewayStreamConsumer: if self._accumulated: if self._fallback_final_send: await self._send_fallback_final(self._accumulated) - elif current_update_visible: + elif ( + current_update_visible + and not self._adapter_requires_finalize + ): + # Mid-stream edit above already delivered the + # final accumulated content. Skip the redundant + # final edit — but only for adapters that don't + # need an explicit finalize signal. self._final_response_sent = True elif self._message_id: - self._final_response_sent = await self._send_or_edit(self._accumulated) + # Either the mid-stream edit didn't run (no + # visible update this tick) OR the adapter needs + # explicit finalize=True to close the stream. + self._final_response_sent = await self._send_or_edit( + self._accumulated, finalize=True, + ) elif not self._already_sent: self._final_response_sent = await self._send_or_edit(self._accumulated) return @@ -397,24 +430,41 @@ class GatewayStreamConsumer: # a real string like "msg_1", not "__no_edit__", so that case # still resets and creates a fresh segment as intended.) if got_segment_break: + # If the segment-break edit failed to deliver the + # accumulated content (flood control that has not yet + # promoted to fallback mode, or fallback mode itself), + # _accumulated still holds pre-boundary text the user + # never saw. Flush that tail as a continuation message + # before the reset below wipes _accumulated — otherwise + # text generated before the tool boundary is silently + # dropped (issue #8124). + if ( + self._accumulated + and not current_update_visible + and self._message_id + and self._message_id != "__no_edit__" + ): + await self._flush_segment_tail_on_edit_failure() self._reset_segment_state(preserve_no_edit=True) await asyncio.sleep(0.05) # Small yield to not busy-loop except asyncio.CancelledError: # Best-effort final edit on cancellation + _best_effort_ok = False if self._accumulated and self._message_id: try: - await self._send_or_edit(self._accumulated) + _best_effort_ok = bool(await self._send_or_edit(self._accumulated)) except Exception: pass - # If we delivered any content before being cancelled, mark the - # final response as sent so the gateway's already_sent check - # doesn't trigger a duplicate message. The 5-second - # stream_task timeout (gateway/run.py) can cancel us while - # waiting on a slow Telegram API call — without this flag the - # gateway falls through to the normal send path. - if self._already_sent: + # Only confirm final delivery if the best-effort send above + # actually succeeded OR if the final response was already + # confirmed before we were cancelled. Previously this + # promoted any partial send (already_sent=True) to + # final_response_sent — which suppressed the gateway's + # fallback send even when only intermediate text (e.g. + # "Let me search…") had been delivered, not the real answer. + if _best_effort_ok and not self._final_response_sent: self._final_response_sent = True except Exception as e: logger.error("Stream consumer error: %s", e) @@ -513,9 +563,41 @@ class GatewayStreamConsumer: self._fallback_final_send = False if not continuation.strip(): # Nothing new to send — the visible partial already matches final text. - self._already_sent = True - self._final_response_sent = True - return + # BUT: if final_text itself has meaningful content (e.g. a timeout + # message after a long tool call), the prefix-based continuation + # calculation may wrongly conclude "already shown" because the + # streamed prefix was from a *previous* segment (before the tool + # boundary). In that case, send the full final_text as-is (#10807). + if final_text.strip() and final_text != self._visible_prefix(): + continuation = final_text + else: + # Defence-in-depth for #7183: the last edit may still show the + # cursor character because fallback mode was entered after an + # edit failure left it stuck. Try one final edit to strip it + # so the message doesn't freeze with a visible ▉. Best-effort + # — if this edit also fails (flood control still active), + # _try_strip_cursor has already been called on fallback entry + # and the adaptive-backoff retries will have had their shot. + if ( + self._message_id + and self._last_sent_text + and self.cfg.cursor + and self._last_sent_text.endswith(self.cfg.cursor) + ): + clean_text = self._last_sent_text[:-len(self.cfg.cursor)] + try: + result = await self.adapter.edit_message( + chat_id=self.chat_id, + message_id=self._message_id, + content=clean_text, + ) + if result.success: + self._last_sent_text = clean_text + except Exception: + pass + self._already_sent = True + self._final_response_sent = True + return raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096) safe_limit = max(500, raw_limit - 100) @@ -577,6 +659,39 @@ class GatewayStreamConsumer: err_lower = err.lower() return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower + async def _flush_segment_tail_on_edit_failure(self) -> None: + """Deliver un-sent tail content before a segment-break reset. + + When an edit fails (flood control, transport error) and a tool + boundary arrives before the next retry, ``_accumulated`` holds text + that was generated but never shown to the user. Without this flush, + the segment reset would discard that tail and leave a frozen cursor + in the partial message. + + Sends the tail that sits after the last successfully-delivered + prefix as a new message, and best-effort strips the stuck cursor + from the previous partial message. + """ + if not self._fallback_final_send: + await self._try_strip_cursor() + visible = self._fallback_prefix or self._visible_prefix() + tail = self._accumulated + if visible and tail.startswith(visible): + tail = tail[len(visible):].lstrip() + tail = self._clean_for_display(tail) + if not tail.strip(): + return + try: + result = await self.adapter.send( + chat_id=self.chat_id, + content=tail, + metadata=self.metadata, + ) + if result.success: + self._already_sent = True + except Exception as e: + logger.error("Segment-break tail flush error: %s", e) + async def _try_strip_cursor(self) -> None: """Best-effort edit to remove the cursor from the last visible message. @@ -609,19 +724,25 @@ class GatewayStreamConsumer: content=text, metadata=self.metadata, ) - if result.success: - self._already_sent = True - return True + # Note: do NOT set _already_sent = True here. + # Commentary messages are interim status updates (e.g. "Using browser + # tool..."), not the final response. Setting already_sent would cause + # the final response to be incorrectly suppressed when there are + # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454 + return result.success except Exception as e: logger.error("Commentary send error: %s", e) - return False + return False - async def _send_or_edit(self, text: str) -> bool: + async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool: """Send or edit the streaming message. Returns True if the text was successfully delivered (sent or edited), False otherwise. Callers like the overflow split loop use this to decide whether to advance past the delivered chunk. + + ``finalize`` is True when this is the last edit in a streaming + sequence. """ # Strip MEDIA: directives so they don't appear as visible text. # Media files are delivered as native attachments after the stream @@ -655,14 +776,22 @@ class GatewayStreamConsumer: try: if self._message_id is not None: if self._edit_supported: - # Skip if text is identical to what we last sent - if text == self._last_sent_text: + # Skip if text is identical to what we last sent. + # Exception: adapters that require an explicit finalize + # call (REQUIRES_EDIT_FINALIZE) must still receive the + # finalize=True edit even when content is unchanged, so + # their streaming UI can transition out of the in- + # progress state. Everyone else short-circuits. + if text == self._last_sent_text and not ( + finalize and self._adapter_requires_finalize + ): return True # Edit existing message result = await self.adapter.edit_message( chat_id=self.chat_id, message_id=self._message_id, content=text, + finalize=finalize, ) if result.success: self._already_sent = True diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py index 632aa5bae0..b9879e3b55 100644 --- a/hermes_cli/__init__.py +++ b/hermes_cli/__init__.py @@ -11,5 +11,5 @@ Provides subcommands for: - hermes cron - Manage cron jobs """ -__version__ = "0.9.0" -__release_date__ = "2026.4.13" +__version__ = "0.10.0" +__release_date__ = "2026.4.16" diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index e63a1ebb6b..c82bad3f02 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -20,6 +20,7 @@ import logging import os import shutil import shlex +import ssl import stat import base64 import hashlib @@ -70,6 +71,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1" DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com" DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot" +DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1" CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token" CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 @@ -77,6 +79,10 @@ QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56" QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token" QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 +# Google Gemini OAuth (google-gemini-cli provider, Cloud Code Assist backend) +DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google" +GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60 # refresh 60s before expiry + # ============================================================================= # Provider Registry @@ -121,6 +127,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { auth_type="oauth_external", inference_base_url=DEFAULT_QWEN_BASE_URL, ), + "google-gemini-cli": ProviderConfig( + id="google-gemini-cli", + name="Google Gemini (OAuth)", + auth_type="oauth_external", + inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL, + ), "copilot": ProviderConfig( id="copilot", name="GitHub Copilot", @@ -140,7 +152,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { id="gemini", name="Google AI Studio", auth_type="api_key", - inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai", + inference_base_url="https://generativelanguage.googleapis.com/v1beta", api_key_env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"), base_url_env_var="GEMINI_BASE_URL", ), @@ -222,6 +234,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("XAI_API_KEY",), base_url_env_var="XAI_BASE_URL", ), + "nvidia": ProviderConfig( + id="nvidia", + name="NVIDIA NIM", + auth_type="api_key", + inference_base_url="https://integrate.api.nvidia.com/v1", + api_key_env_vars=("NVIDIA_API_KEY",), + base_url_env_var="NVIDIA_BASE_URL", + ), "ai-gateway": ProviderConfig( id="ai-gateway", name="Vercel AI Gateway", @@ -274,6 +294,22 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("XIAOMI_API_KEY",), base_url_env_var="XIAOMI_BASE_URL", ), + "ollama-cloud": ProviderConfig( + id="ollama-cloud", + name="Ollama Cloud", + auth_type="api_key", + inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL, + api_key_env_vars=("OLLAMA_API_KEY",), + base_url_env_var="OLLAMA_BASE_URL", + ), + "bedrock": ProviderConfig( + id="bedrock", + name="AWS Bedrock", + auth_type="aws_sdk", + inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com", + api_key_env_vars=(), + base_url_env_var="BEDROCK_BASE_URL", + ), } @@ -318,6 +354,9 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> """ if env_override: return env_override + # No key → nothing to infer from. Return default without inspecting. + if not api_key: + return default_url if api_key.startswith("sk-kimi-"): return KIMI_CODE_BASE_URL return default_url @@ -383,13 +422,16 @@ def _resolve_api_key_provider_secret( # Z.AI has separate billing for general vs coding plans, and global vs China # endpoints. A key that works on one may return "Insufficient balance" on # another. We probe at setup time and store the working endpoint. +# Each entry lists candidate models to try in order — newer coding plan accounts +# may only have access to recent models (glm-5.1, glm-5v-turbo) while older +# ones still use glm-4.7. ZAI_ENDPOINTS = [ - # (id, base_url, default_model, label) - ("global", "https://api.z.ai/api/paas/v4", "glm-5", "Global"), - ("cn", "https://open.bigmodel.cn/api/paas/v4", "glm-5", "China"), - ("coding-global", "https://api.z.ai/api/coding/paas/v4", "glm-4.7", "Global (Coding Plan)"), - ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"), + # (id, base_url, probe_models, label) + ("global", "https://api.z.ai/api/paas/v4", ["glm-5"], "Global"), + ("cn", "https://open.bigmodel.cn/api/paas/v4", ["glm-5"], "China"), + ("coding-global", "https://api.z.ai/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"), + ("coding-cn", "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"), ] @@ -397,35 +439,37 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str """Probe z.ai endpoints to find one that accepts this API key. Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the - first working endpoint, or None if all fail. + first working endpoint, or None if all fail. For endpoints with multiple + candidate models, tries each in order and returns the first that succeeds. """ - for ep_id, base_url, model, label in ZAI_ENDPOINTS: - try: - resp = httpx.post( - f"{base_url}/chat/completions", - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json", - }, - json={ - "model": model, - "stream": False, - "max_tokens": 1, - "messages": [{"role": "user", "content": "ping"}], - }, - timeout=timeout, - ) - if resp.status_code == 200: - logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url) - return { - "id": ep_id, - "base_url": base_url, - "model": model, - "label": label, - } - logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code) - except Exception as exc: - logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc) + for ep_id, base_url, probe_models, label in ZAI_ENDPOINTS: + for model in probe_models: + try: + resp = httpx.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "stream": False, + "max_tokens": 1, + "messages": [{"role": "user", "content": "ping"}], + }, + timeout=timeout, + ) + if resp.status_code == 200: + logger.debug("Z.AI endpoint probe: %s (%s) model=%s OK", ep_id, base_url, model) + return { + "id": ep_id, + "base_url": base_url, + "model": model, + "label": label, + } + logger.debug("Z.AI endpoint probe: %s model=%s returned %s", ep_id, model, resp.status_code) + except Exception as exc: + logger.debug("Z.AI endpoint probe: %s model=%s failed: %s", ep_id, model, exc) return None @@ -440,6 +484,14 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> if env_override: return env_override + # No API key set → don't probe (would fire N×M HTTPS requests with an + # empty Bearer token, all returning 401). This path is hit during + # auxiliary-client auto-detection when the user has no Z.AI credentials + # at all — the caller discards the result immediately, so the probe is + # pure latency for every AIAgent construction. + if not api_key: + return default_url + # Check provider-state cache for a previously-detected endpoint. auth_store = _load_auth_store() state = _load_provider_state(auth_store, "zai") or {} @@ -741,6 +793,28 @@ def is_source_suppressed(provider_id: str, source: str) -> bool: return False +def unsuppress_credential_source(provider_id: str, source: str) -> bool: + """Clear a suppression marker so the source will be re-seeded on the next load. + + Returns True if a marker was cleared, False if no marker existed. + """ + with _auth_store_lock(): + auth_store = _load_auth_store() + suppressed = auth_store.get("suppressed_sources") + if not isinstance(suppressed, dict): + return False + provider_list = suppressed.get(provider_id) + if not isinstance(provider_list, list) or source not in provider_list: + return False + provider_list.remove(source) + if not provider_list: + suppressed.pop(provider_id, None) + if not suppressed: + auth_store.pop("suppressed_sources", None) + _save_auth_store(auth_store) + return True + + def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]: """Return persisted auth state for a provider, or None.""" auth_store = _load_auth_store() @@ -906,6 +980,7 @@ def resolve_provider( _PROVIDER_ALIASES = { "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai", "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini", + "x-ai": "xai", "x.ai": "xai", "grok": "xai", "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding", "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", "arcee-ai": "arcee", "arceeai": "arcee", @@ -916,14 +991,16 @@ def resolve_provider( "github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp", "aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway", "opencode": "opencode-zen", "zen": "opencode-zen", - "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", + "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli", "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", + "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock", "go": "opencode-go", "opencode-go-sub": "opencode-go", "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", # Local server aliases — route through the generic custom provider "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom", - "ollama": "custom", "vllm": "custom", "llamacpp": "custom", + "ollama": "custom", "ollama_cloud": "ollama-cloud", + "vllm": "custom", "llamacpp": "custom", "llama.cpp": "custom", "llama-cpp": "custom", } normalized = _PROVIDER_ALIASES.get(normalized, normalized) @@ -975,6 +1052,15 @@ def resolve_provider( if has_usable_secret(os.getenv(env_var, "")): return pid + # AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars). + # This runs after API-key providers so explicit keys always win. + try: + from agent.bedrock_adapter import has_aws_credentials + if has_aws_credentials(): + return "bedrock" + except ImportError: + pass # boto3 not installed — skip Bedrock auto-detection + raise AuthError( "No inference provider configured. Run 'hermes model' to choose a " "provider and model, or set an API key (OPENROUTER_API_KEY, " @@ -1217,6 +1303,83 @@ def get_qwen_auth_status() -> Dict[str, Any]: } +# ============================================================================= +# Google Gemini OAuth (google-gemini-cli) — PKCE flow + Cloud Code Assist. +# +# Tokens live in ~/.hermes/auth/google_oauth.json (managed by agent.google_oauth). +# The `base_url` here is the marker "cloudcode-pa://google" that run_agent.py +# uses to construct a GeminiCloudCodeClient instead of the default OpenAI SDK. +# Actual HTTP traffic goes to https://cloudcode-pa.googleapis.com/v1internal:*. +# ============================================================================= + +def resolve_gemini_oauth_runtime_credentials( + *, + force_refresh: bool = False, +) -> Dict[str, Any]: + """Resolve runtime OAuth creds for google-gemini-cli.""" + try: + from agent.google_oauth import ( + GoogleOAuthError, + _credentials_path, + get_valid_access_token, + load_credentials, + ) + except ImportError as exc: + raise AuthError( + f"agent.google_oauth is not importable: {exc}", + provider="google-gemini-cli", + code="google_oauth_module_missing", + ) from exc + + try: + access_token = get_valid_access_token(force_refresh=force_refresh) + except GoogleOAuthError as exc: + raise AuthError( + str(exc), + provider="google-gemini-cli", + code=exc.code, + ) from exc + + creds = load_credentials() + base_url = DEFAULT_GEMINI_CLOUDCODE_BASE_URL + return { + "provider": "google-gemini-cli", + "base_url": base_url, + "api_key": access_token, + "source": "google-oauth", + "expires_at_ms": (creds.expires_ms if creds else None), + "auth_file": str(_credentials_path()), + "email": (creds.email if creds else "") or "", + "project_id": (creds.project_id if creds else "") or "", + } + + +def get_gemini_oauth_auth_status() -> Dict[str, Any]: + """Return a status dict for `hermes auth list` / `hermes status`.""" + try: + from agent.google_oauth import _credentials_path, load_credentials + except ImportError: + return {"logged_in": False, "error": "agent.google_oauth unavailable"} + auth_path = _credentials_path() + creds = load_credentials() + if creds is None or not creds.access_token: + return { + "logged_in": False, + "auth_file": str(auth_path), + "error": "not logged in", + } + return { + "logged_in": True, + "auth_file": str(auth_path), + "source": "google-oauth", + "api_key": creds.access_token, + "expires_at_ms": creds.expires_ms, + "email": creds.email, + "project_id": creds.project_id, + } + + + # ============================================================================= # SSH / remote session detection # ============================================================================= @@ -1283,49 +1446,6 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]: } -def _write_codex_cli_tokens( - access_token: str, - refresh_token: str, - *, - last_refresh: Optional[str] = None, -) -> None: - """Write refreshed tokens back to ~/.codex/auth.json. - - OpenAI OAuth refresh tokens are single-use and rotate on every refresh. - When Hermes refreshes a token it consumes the old refresh_token; if we - don't write the new pair back, the Codex CLI (or VS Code extension) will - fail with ``refresh_token_reused`` on its next refresh attempt. - - This mirrors the Anthropic write-back to ~/.claude/.credentials.json - via ``_write_claude_code_credentials()``. - """ - codex_home = os.getenv("CODEX_HOME", "").strip() - if not codex_home: - codex_home = str(Path.home() / ".codex") - auth_path = Path(codex_home).expanduser() / "auth.json" - try: - existing: Dict[str, Any] = {} - if auth_path.is_file(): - existing = json.loads(auth_path.read_text(encoding="utf-8")) - if not isinstance(existing, dict): - existing = {} - - tokens_dict = existing.get("tokens") - if not isinstance(tokens_dict, dict): - tokens_dict = {} - tokens_dict["access_token"] = access_token - tokens_dict["refresh_token"] = refresh_token - existing["tokens"] = tokens_dict - if last_refresh is not None: - existing["last_refresh"] = last_refresh - - auth_path.parent.mkdir(parents=True, exist_ok=True) - auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8") - auth_path.chmod(0o600) - except (OSError, IOError) as exc: - logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc) - - def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None: """Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json).""" if last_refresh is None: @@ -1393,6 +1513,11 @@ def refresh_codex_oauth_pure( "then run `hermes auth` to re-authenticate." ) relogin_required = True + # A 401/403 from the token endpoint always means the refresh token + # is invalid/expired — force relogin even if the body error code + # wasn't one of the known strings above. + if response.status_code in (401, 403) and not relogin_required: + relogin_required = True raise AuthError( message, provider="openai-codex", @@ -1448,12 +1573,6 @@ def _refresh_codex_auth_tokens( updated_tokens["refresh_token"] = refreshed["refresh_token"] _save_codex_tokens(updated_tokens) - # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync. - _write_codex_cli_tokens( - refreshed["access_token"], - refreshed["refresh_token"], - last_refresh=refreshed.get("last_refresh"), - ) return updated_tokens @@ -1498,25 +1617,7 @@ def resolve_codex_runtime_credentials( refresh_skew_seconds: int = CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, ) -> Dict[str, Any]: """Resolve runtime credentials from Hermes's own Codex token store.""" - try: - data = _read_codex_tokens() - except AuthError as orig_err: - # Only attempt migration when there are NO tokens stored at all - # (code == "codex_auth_missing"), not when tokens exist but are invalid. - if orig_err.code != "codex_auth_missing": - raise - - # Migration: user had Codex as active provider with old storage (~/.codex/). - cli_tokens = _import_codex_cli_tokens() - if cli_tokens: - logger.info("Migrating Codex credentials from ~/.codex/ to Hermes auth store") - print("⚠️ Migrating Codex credentials to Hermes's own auth store.") - print(" This avoids conflicts with Codex CLI and VS Code.") - print(" Run `hermes auth` to create a fully independent session.\n") - _save_codex_tokens(cli_tokens) - data = _read_codex_tokens() - else: - raise + data = _read_codex_tokens() tokens = dict(data["tokens"]) access_token = str(tokens.get("access_token", "") or "").strip() refresh_timeout_seconds = float(os.getenv("HERMES_CODEX_REFRESH_TIMEOUT_SECONDS", "20")) @@ -1563,7 +1664,7 @@ def _resolve_verify( insecure: Optional[bool] = None, ca_bundle: Optional[str] = None, auth_state: Optional[Dict[str, Any]] = None, -) -> bool | str: +) -> bool | ssl.SSLContext: tls_state = auth_state.get("tls") if isinstance(auth_state, dict) else {} tls_state = tls_state if isinstance(tls_state, dict) else {} @@ -1583,13 +1684,12 @@ def _resolve_verify( if effective_ca: ca_path = str(effective_ca) if not os.path.isfile(ca_path): - import logging - logging.getLogger("hermes.auth").warning( + logger.warning( "CA bundle path does not exist: %s — falling back to default certificates", ca_path, ) return True - return ca_path + return ssl.create_default_context(cafile=ca_path) return True @@ -2008,6 +2108,62 @@ def refresh_nous_oauth_from_state( ) +NOUS_DEVICE_CODE_SOURCE = "device_code" + + +def persist_nous_credentials( + creds: Dict[str, Any], + *, + label: Optional[str] = None, +): + """Persist minted Nous OAuth credentials as the singleton provider state + and ensure the credential pool is in sync. + + Nous credentials are read at runtime from two independent locations: + + - ``providers.nous``: singleton state read by + ``resolve_nous_runtime_credentials()`` during 401 recovery and by + ``_seed_from_singletons()`` during pool load. + - ``credential_pool.nous``: used by the runtime ``pool.select()`` path. + + Historically ``hermes auth add nous`` wrote a ``manual:device_code`` pool + entry only, skipping ``providers.nous``. When the 24h agent_key TTL + expired, the recovery path read the empty singleton state and raised + ``AuthError`` silently (``logger.debug`` at INFO level). + + This helper writes ``providers.nous`` then calls ``load_pool("nous")`` so + ``_seed_from_singletons`` materialises the canonical ``device_code`` pool + entry from the singleton. Re-running login upserts the same entry in + place; the pool never accumulates duplicate device_code rows. + + ``label`` is an optional user-chosen display name (from + ``hermes auth add nous --label ``). It gets embedded in the + singleton state so that ``_seed_from_singletons`` uses it as the pool + entry's label on every subsequent ``load_pool("nous")`` instead of the + auto-derived token fingerprint. When ``None``, the auto-derived label + via ``label_from_token`` is used (unchanged default behaviour). + + Returns the upserted :class:`PooledCredential` entry (or ``None`` if + seeding somehow produced no match — shouldn't happen). + """ + from agent.credential_pool import load_pool + + state = dict(creds) + if label and str(label).strip(): + state["label"] = str(label).strip() + + with _auth_store_lock(): + auth_store = _load_auth_store() + _save_provider_state(auth_store, "nous", state) + _save_auth_store(auth_store) + + pool = load_pool("nous") + return next( + (e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE), + None, + ) + + def resolve_nous_runtime_credentials( *, min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, @@ -2379,7 +2535,7 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]: if pconfig.base_url_env_var: env_url = os.getenv(pconfig.base_url_env_var, "").strip() - if provider_id == "kimi-coding": + if provider_id in ("kimi-coding", "kimi-coding-cn"): base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url) elif env_url: base_url = env_url @@ -2435,12 +2591,21 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: return get_codex_auth_status() if target == "qwen-oauth": return get_qwen_auth_status() + if target == "google-gemini-cli": + return get_gemini_oauth_auth_status() if target == "copilot-acp": return get_external_process_provider_status(target) # API-key providers pconfig = PROVIDER_REGISTRY.get(target) if pconfig and pconfig.auth_type == "api_key": return get_api_key_provider_status(target) + # AWS SDK providers (Bedrock) — check via boto3 credential chain + if pconfig and pconfig.auth_type == "aws_sdk": + try: + from agent.bedrock_adapter import has_aws_credentials + return {"logged_in": has_aws_credentials(), "provider": target} + except ImportError: + return {"logged_in": False, "provider": target, "error": "boto3 not installed"} return {"logged_in": False} @@ -2465,7 +2630,7 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]: if pconfig.base_url_env_var: env_url = os.getenv(pconfig.base_url_env_var, "").strip() - if provider_id == "kimi-coding": + if provider_id in ("kimi-coding", "kimi-coding-cn"): base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url) elif provider_id == "zai": base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url) @@ -2567,6 +2732,17 @@ def _update_config_for_provider( # Clear stale base_url to prevent contamination when switching providers model_cfg.pop("base_url", None) + # Clear stale api_key/api_mode left over from a previous custom provider. + # When the user switches from e.g. a MiniMax custom endpoint + # (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider + # (e.g. OpenRouter), the stale api_key/api_mode would override the new + # provider's credentials and transport choice. Built-in providers that + # need a specific api_mode (copilot, xai) set it at request-resolution + # time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so + # removing the persisted value here is safe. + model_cfg.pop("api_key", None) + model_cfg.pop("api_mode", None) + # When switching to a non-OpenRouter provider, ensure model.default is # valid for the new provider. An OpenRouter-formatted name like # "anthropic/claude-opus-4.6" will fail on direct-API providers. @@ -3167,6 +3343,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: inference_base_url = auth_state["inference_base_url"] + # Snapshot the prior active_provider BEFORE _save_provider_state + # overwrites it to "nous". If the user picks "Skip (keep current)" + # during model selection below, we restore this so the user's previous + # provider (e.g. openrouter) is preserved. + with _auth_store_lock(): + _prior_store = _load_auth_store() + prior_active_provider = _prior_store.get("active_provider") + with _auth_store_lock(): auth_store = _load_auth_store() _save_provider_state(auth_store, "nous", auth_state) @@ -3226,6 +3410,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: print(f"Login succeeded, but could not fetch available models. Reason: {message}") # Write provider + model atomically so config is never mismatched. + # If no model was selected (user picked "Skip (keep current)", + # model list fetch failed, or no curated models were available), + # preserve the user's previous provider — don't silently switch + # them to Nous with a mismatched model. The Nous OAuth tokens + # stay saved for future use. + if not selected_model: + # Restore the prior active_provider that _save_provider_state + # overwrote to "nous". config.yaml model.provider is left + # untouched, so the user's previous provider is fully preserved. + with _auth_store_lock(): + auth_store = _load_auth_store() + if prior_active_provider: + auth_store["active_provider"] = prior_active_provider + else: + auth_store.pop("active_provider", None) + _save_auth_store(auth_store) + print() + print("No provider change. Nous credentials saved for future use.") + print(" Run `hermes model` again to switch to Nous Portal.") + return + config_path = _update_config_for_provider( "nous", inference_base_url, default_model=selected_model, ) diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index c1cf0ff618..30e5182949 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -4,6 +4,7 @@ from __future__ import annotations from getpass import getpass import math +import sys import time from types import SimpleNamespace import uuid @@ -32,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL # Providers that support OAuth login in addition to API keys. -_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"} +_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} def _get_custom_provider_names() -> list: @@ -147,7 +148,7 @@ def auth_add_command(args) -> None: if provider.startswith(CUSTOM_POOL_PREFIX): requested_type = AUTH_TYPE_API_KEY else: - requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY + requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY pool = load_pool(provider) @@ -160,7 +161,10 @@ def auth_add_command(args) -> None: default_label = _api_key_default_label(len(pool.entries()) + 1) label = (getattr(args, "label", None) or "").strip() if not label: - label = input(f"Label (optional, default: {default_label}): ").strip() or default_label + if sys.stdin.isatty(): + label = input(f"Label (optional, default: {default_label}): ").strip() or default_label + else: + label = default_label entry = PooledCredential( provider=provider, id=uuid.uuid4().hex[:6], @@ -213,22 +217,21 @@ def auth_add_command(args) -> None: ca_bundle=getattr(args, "ca_bundle", None), min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))), ) - label = (getattr(args, "label", None) or "").strip() or label_from_token( - creds.get("access_token", ""), - _oauth_default_label(provider, len(pool.entries()) + 1), + # Honor `--label ` so nous matches other providers' UX. The + # helper embeds this into providers.nous so that label_from_token + # doesn't overwrite it on every subsequent load_pool("nous"). + custom_label = (getattr(args, "label", None) or "").strip() or None + entry = auth_mod.persist_nous_credentials(creds, label=custom_label) + shown_label = entry.label if entry is not None else label_from_token( + creds.get("access_token", ""), _oauth_default_label(provider, 1), ) - entry = PooledCredential.from_dict(provider, { - **creds, - "label": label, - "auth_type": AUTH_TYPE_OAUTH, - "source": f"{SOURCE_MANUAL}:device_code", - "base_url": creds.get("inference_base_url"), - }) - pool.add_entry(entry) - print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + print(f'Saved {provider} OAuth device-code credentials: "{shown_label}"') return if provider == "openai-codex": + # Clear any existing suppression marker so a re-link after `hermes auth + # remove openai-codex` works without the new tokens being skipped. + auth_mod.unsuppress_credential_source(provider, "device_code") creds = auth_mod._codex_device_code_login() label = (getattr(args, "label", None) or "").strip() or label_from_token( creds["tokens"]["access_token"], @@ -250,6 +253,27 @@ def auth_add_command(args) -> None: print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') return + if provider == "google-gemini-cli": + from agent.google_oauth import run_gemini_oauth_login_pure + + creds = run_gemini_oauth_login_pure() + label = (getattr(args, "label", None) or "").strip() or ( + creds.get("email") or _oauth_default_label(provider, len(pool.entries()) + 1) + ) + entry = PooledCredential( + provider=provider, + id=uuid.uuid4().hex[:6], + label=label, + auth_type=AUTH_TYPE_OAUTH, + priority=0, + source=f"{SOURCE_MANUAL}:google_pkce", + access_token=creds["access_token"], + refresh_token=creds.get("refresh_token"), + ) + pool.add_entry(entry) + print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"') + return + if provider == "qwen-oauth": creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False) label = (getattr(args, "label", None) or "").strip() or label_from_token( @@ -327,7 +351,34 @@ def auth_remove_command(args) -> None: # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce), # clear the underlying auth store / credential file so it doesn't get # re-seeded on the next load_pool() call. - elif removed.source == "device_code" and provider in ("openai-codex", "nous"): + elif provider == "openai-codex" and ( + removed.source == "device_code" or removed.source.endswith(":device_code") + ): + # Codex tokens live in TWO places: the Hermes auth store and + # ~/.codex/auth.json (the Codex CLI shared file). On every refresh, + # refresh_codex_oauth_pure() writes to both. So clearing only the + # Hermes auth store is not enough — _seed_from_singletons() will + # auto-import from ~/.codex/auth.json on the next load_pool() and + # the removal is instantly undone. Mark the source as suppressed + # so auto-import is skipped; leave ~/.codex/auth.json untouched so + # the Codex CLI itself keeps working. + from hermes_cli.auth import ( + _load_auth_store, _save_auth_store, _auth_store_lock, + suppress_credential_source, + ) + with _auth_store_lock(): + auth_store = _load_auth_store() + providers_dict = auth_store.get("providers") + if isinstance(providers_dict, dict) and provider in providers_dict: + del providers_dict[provider] + _save_auth_store(auth_store) + print(f"Cleared {provider} OAuth tokens from auth store") + suppress_credential_source(provider, "device_code") + print("Suppressed openai-codex device_code source — it will not be re-seeded.") + print("Note: Codex CLI credentials still live in ~/.codex/auth.json") + print("Run `hermes auth add openai-codex` to re-enable if needed.") + + elif removed.source == "device_code" and provider == "nous": from hermes_cli.auth import ( _load_auth_store, _save_auth_store, _auth_store_lock, ) @@ -368,6 +419,27 @@ def _interactive_auth() -> None: print("=" * 50) auth_list_command(SimpleNamespace(provider=None)) + + # Show AWS Bedrock credential status (not in the pool — uses boto3 chain) + try: + from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region + if has_aws_credentials(): + auth_source = resolve_aws_auth_env_var() or "unknown" + region = resolve_bedrock_region() + print(f"bedrock (AWS SDK credential chain):") + print(f" Auth: {auth_source}") + print(f" Region: {region}") + try: + import boto3 + sts = boto3.client("sts", region_name=region) + identity = sts.get_caller_identity() + arn = identity.get("Arn", "unknown") + print(f" Identity: {arn}") + except Exception: + print(f" Identity: (could not resolve — boto3 STS call failed)") + print() + except ImportError: + pass # boto3 or bedrock_adapter not available print() # Main menu diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 667b8915af..8b5b90ef1f 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -201,7 +201,7 @@ def run_backup(args) -> None: else: zf.write(abs_path, arcname=str(rel_path)) total_bytes += abs_path.stat().st_size - except (PermissionError, OSError) as exc: + except (PermissionError, OSError, ValueError) as exc: errors.append(f" {rel_path}: {exc}") continue diff --git a/hermes_cli/callbacks.py b/hermes_cli/callbacks.py index 724e6e4c86..fa40eced5e 100644 --- a/hermes_cli/callbacks.py +++ b/hermes_cli/callbacks.py @@ -75,12 +75,12 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict: if not hasattr(cli, "_secret_deadline"): cli._secret_deadline = 0 try: - value = getpass.getpass(f"{prompt} (hidden, Enter to skip): ") + value = getpass.getpass(f"{prompt} (hidden, ESC or empty Enter to skip): ") except (EOFError, KeyboardInterrupt): value = "" if not value: - cprint(f"\n{_DIM} ⏭ Secret entry cancelled{_RST}") + cprint(f"\n{_DIM} ⏭ Secret entry skipped{_RST}") return { "success": True, "reason": "cancelled", @@ -133,7 +133,7 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict: cli._app.invalidate() if not value: - cprint(f"\n{_DIM} ⏭ Secret entry cancelled{_RST}") + cprint(f"\n{_DIM} ⏭ Secret entry skipped{_RST}") return { "success": True, "reason": "cancelled", diff --git a/hermes_cli/clipboard.py b/hermes_cli/clipboard.py index fd81ed4c8b..facc8f3c50 100644 --- a/hermes_cli/clipboard.py +++ b/hermes_cli/clipboard.py @@ -7,8 +7,8 @@ CLI tools that ship with the platform (or are commonly installed). Platform support: macOS — osascript (always available), pngpaste (if installed) - Windows — PowerShell via .NET System.Windows.Forms.Clipboard - WSL2 — powershell.exe via .NET System.Windows.Forms.Clipboard + Windows — PowerShell via WinForms, Get-Clipboard, file-drop fallback + WSL2 — powershell.exe via WinForms, Get-Clipboard, file-drop fallback Linux — wl-paste (Wayland), xclip (X11) """ @@ -46,10 +46,11 @@ def has_clipboard_image() -> bool: return _macos_has_image() if sys.platform == "win32": return _windows_has_image() - if _is_wsl(): - return _wsl_has_image() - if os.environ.get("WAYLAND_DISPLAY"): - return _wayland_has_image() + # Match _linux_save fallthrough order: WSL → Wayland → X11 + if _is_wsl() and _wsl_has_image(): + return True + if os.environ.get("WAYLAND_DISPLAY") and _wayland_has_image(): + return True return _xclip_has_image() @@ -135,6 +136,114 @@ _PS_EXTRACT_IMAGE = ( "[System.Convert]::ToBase64String($ms.ToArray())" ) +_PS_CHECK_IMAGE_GET_CLIPBOARD = ( + "try { " + "$img = Get-Clipboard -Format Image -ErrorAction Stop;" + "if ($null -ne $img) { 'True' } else { 'False' }" + "} catch { 'False' }" +) + +_PS_EXTRACT_IMAGE_GET_CLIPBOARD = ( + "try { " + "Add-Type -AssemblyName System.Drawing;" + "Add-Type -AssemblyName PresentationCore;" + "Add-Type -AssemblyName WindowsBase;" + "$img = Get-Clipboard -Format Image -ErrorAction Stop;" + "if ($null -eq $img) { exit 1 }" + "$ms = New-Object System.IO.MemoryStream;" + "if ($img -is [System.Drawing.Image]) {" + "$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)" + "} elseif ($img -is [System.Windows.Media.Imaging.BitmapSource]) {" + "$enc = New-Object System.Windows.Media.Imaging.PngBitmapEncoder;" + "$enc.Frames.Add([System.Windows.Media.Imaging.BitmapFrame]::Create($img));" + "$enc.Save($ms)" + "} else { exit 2 }" + "[System.Convert]::ToBase64String($ms.ToArray())" + "} catch { exit 1 }" +) + +_FILEDROP_IMAGE_EXTS = "'.png','.jpg','.jpeg','.gif','.webp','.bmp','.tiff','.tif'" + +_PS_CHECK_FILEDROP_IMAGE = ( + "try { " + "$files = Get-Clipboard -Format FileDropList -ErrorAction Stop;" + f"$exts = @({_FILEDROP_IMAGE_EXTS});" + "$hit = $files | Where-Object { $exts -contains ([System.IO.Path]::GetExtension($_).ToLowerInvariant()) } | Select-Object -First 1;" + "if ($null -ne $hit) { 'True' } else { 'False' }" + "} catch { 'False' }" +) + +_PS_EXTRACT_FILEDROP_IMAGE = ( + "try { " + "$files = Get-Clipboard -Format FileDropList -ErrorAction Stop;" + f"$exts = @({_FILEDROP_IMAGE_EXTS});" + "$hit = $files | Where-Object { $exts -contains ([System.IO.Path]::GetExtension($_).ToLowerInvariant()) } | Select-Object -First 1;" + "if ($null -eq $hit) { exit 1 }" + "[System.Convert]::ToBase64String([System.IO.File]::ReadAllBytes($hit))" + "} catch { exit 1 }" +) + +_POWERSHELL_HAS_IMAGE_SCRIPTS = ( + _PS_CHECK_IMAGE, + _PS_CHECK_IMAGE_GET_CLIPBOARD, + _PS_CHECK_FILEDROP_IMAGE, +) + +_POWERSHELL_EXTRACT_IMAGE_SCRIPTS = ( + _PS_EXTRACT_IMAGE, + _PS_EXTRACT_IMAGE_GET_CLIPBOARD, + _PS_EXTRACT_FILEDROP_IMAGE, +) + + +def _run_powershell(exe: str, script: str, timeout: int) -> subprocess.CompletedProcess: + return subprocess.run( + [exe, "-NoProfile", "-NonInteractive", "-Command", script], + capture_output=True, text=True, timeout=timeout, + ) + + +def _write_base64_image(dest: Path, b64_data: str) -> bool: + image_bytes = base64.b64decode(b64_data, validate=True) + dest.write_bytes(image_bytes) + return dest.exists() and dest.stat().st_size > 0 + + +def _powershell_has_image(exe: str, *, timeout: int, label: str) -> bool: + for script in _POWERSHELL_HAS_IMAGE_SCRIPTS: + try: + r = _run_powershell(exe, script, timeout=timeout) + if r.returncode == 0 and "True" in r.stdout: + return True + except FileNotFoundError: + logger.debug("%s not found — clipboard unavailable", exe) + return False + except Exception as e: + logger.debug("%s clipboard image check failed: %s", label, e) + return False + + +def _powershell_save_image(exe: str, dest: Path, *, timeout: int, label: str) -> bool: + for script in _POWERSHELL_EXTRACT_IMAGE_SCRIPTS: + try: + r = _run_powershell(exe, script, timeout=timeout) + if r.returncode != 0: + continue + + b64_data = r.stdout.strip() + if not b64_data: + continue + + if _write_base64_image(dest, b64_data): + return True + except FileNotFoundError: + logger.debug("%s not found — clipboard unavailable", exe) + return False + except Exception as e: + logger.debug("%s clipboard image extraction failed: %s", label, e) + dest.unlink(missing_ok=True) + return False + # ── Native Windows ──────────────────────────────────────────────────────── @@ -175,15 +284,7 @@ def _windows_has_image() -> bool: ps = _get_ps_exe() if ps is None: return False - try: - r = subprocess.run( - [ps, "-NoProfile", "-NonInteractive", "-Command", _PS_CHECK_IMAGE], - capture_output=True, text=True, timeout=5, - ) - return r.returncode == 0 and "True" in r.stdout - except Exception as e: - logger.debug("Windows clipboard image check failed: %s", e) - return False + return _powershell_has_image(ps, timeout=5, label="Windows") def _windows_save(dest: Path) -> bool: @@ -192,26 +293,7 @@ def _windows_save(dest: Path) -> bool: if ps is None: logger.debug("No PowerShell found — Windows clipboard image paste unavailable") return False - try: - r = subprocess.run( - [ps, "-NoProfile", "-NonInteractive", "-Command", _PS_EXTRACT_IMAGE], - capture_output=True, text=True, timeout=15, - ) - if r.returncode != 0: - return False - - b64_data = r.stdout.strip() - if not b64_data: - return False - - png_bytes = base64.b64decode(b64_data) - dest.write_bytes(png_bytes) - return dest.exists() and dest.stat().st_size > 0 - - except Exception as e: - logger.debug("Windows clipboard image extraction failed: %s", e) - dest.unlink(missing_ok=True) - return False + return _powershell_save_image(ps, dest, timeout=15, label="Windows") # ── Linux ──────────────────────────────────────────────────────────────── @@ -235,45 +317,12 @@ def _linux_save(dest: Path) -> bool: def _wsl_has_image() -> bool: """Check if Windows clipboard has an image (via powershell.exe).""" - try: - r = subprocess.run( - ["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", - _PS_CHECK_IMAGE], - capture_output=True, text=True, timeout=8, - ) - return r.returncode == 0 and "True" in r.stdout - except FileNotFoundError: - logger.debug("powershell.exe not found — WSL clipboard unavailable") - except Exception as e: - logger.debug("WSL clipboard check failed: %s", e) - return False + return _powershell_has_image("powershell.exe", timeout=8, label="WSL") def _wsl_save(dest: Path) -> bool: """Extract clipboard image via powershell.exe → base64 → decode to PNG.""" - try: - r = subprocess.run( - ["powershell.exe", "-NoProfile", "-NonInteractive", "-Command", - _PS_EXTRACT_IMAGE], - capture_output=True, text=True, timeout=15, - ) - if r.returncode != 0: - return False - - b64_data = r.stdout.strip() - if not b64_data: - return False - - png_bytes = base64.b64decode(b64_data) - dest.write_bytes(png_bytes) - return dest.exists() and dest.stat().st_size > 0 - - except FileNotFoundError: - logger.debug("powershell.exe not found — WSL clipboard unavailable") - except Exception as e: - logger.debug("WSL clipboard extraction failed: %s", e) - dest.unlink(missing_ok=True) - return False + return _powershell_save_image("powershell.exe", dest, timeout=15, label="WSL") # ── Wayland (wl-paste) ────────────────────────────────────────────────── diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py index f5616b68d6..9e2181b501 100644 --- a/hermes_cli/codex_models.py +++ b/hermes_cli/codex_models.py @@ -24,7 +24,6 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [ ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.3-codex", ("gpt-5.2-codex",)), - ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")), ] diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index e62c7e610c..797acab5e9 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -87,8 +87,12 @@ COMMAND_REGISTRY: list[CommandDef] = [ aliases=("bg",), args_hint=""), CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session", args_hint=""), + CommandDef("agents", "Show active agents and running tasks", "Session", + aliases=("tasks",)), CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session", aliases=("q",), args_hint=""), + CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session", + args_hint=""), CommandDef("status", "Show session info", "Session"), CommandDef("profile", "Show active profile name and home directory", "Info"), CommandDef("sethome", "Set this chat as the home channel", "Session", @@ -99,9 +103,10 @@ COMMAND_REGISTRY: list[CommandDef] = [ # Configuration CommandDef("config", "Show current configuration", "Configuration", cli_only=True), - CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"), + CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"), CommandDef("provider", "Show available providers and current provider", "Configuration"), + CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"), CommandDef("personality", "Set a predefined personality", "Configuration", args_hint="[name]"), @@ -119,7 +124,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[normal|fast|status]", subcommands=("normal", "fast", "status", "on", "off")), CommandDef("skin", "Show or change the display skin/theme", "Configuration", - cli_only=True, args_hint="[name]"), + args_hint="[name]"), CommandDef("voice", "Toggle voice mode", "Configuration", args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")), @@ -154,7 +159,9 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[days]"), CommandDef("platforms", "Show gateway/messaging platform status", "Info", cli_only=True, aliases=("gateway",)), - CommandDef("paste", "Check clipboard for an image and attach it", "Info", + CommandDef("copy", "Copy the last assistant response to clipboard", "Info", + cli_only=True, args_hint="[number]"), + CommandDef("paste", "Attach clipboard image from your clipboard", "Info", cli_only=True), CommandDef("image", "Attach a local image file for your next prompt", "Info", cli_only=True, args_hint=""), @@ -164,7 +171,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ # Exit CommandDef("quit", "Exit the CLI", "Exit", - cli_only=True, aliases=("exit", "q")), + cli_only=True, aliases=("exit",)), ] @@ -253,6 +260,53 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset( ) +# Commands with explicit Level-2 running-agent handlers in gateway/run.py. +# Listed here for introspection / tests; semantically a subset of +# "all resolvable commands" — which is the real bypass set (see +# should_bypass_active_session below). +ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset( + { + "agents", + "approve", + "background", + "commands", + "deny", + "help", + "new", + "profile", + "queue", + "restart", + "status", + "steer", + "stop", + "update", + } +) + + +def should_bypass_active_session(command_name: str | None) -> bool: + """Return True for any resolvable slash command. + + Rationale: every gateway-registered slash command either has a + specific Level-2 handler in gateway/run.py (/stop, /new, /model, + /approve, etc.) or reaches the running-agent catch-all that returns + a "busy — wait or /stop first" response. In both paths the command + is dispatched, not queued. + + Queueing is always wrong for a recognized slash command because the + safety net in gateway.run discards any command text that reaches + the pending queue — which meant a mid-run /model (or /reasoning, + /voice, /insights, /title, /resume, /retry, /undo, /compress, + /usage, /provider, /reload-mcp, /sethome, /reset) would silently + interrupt the agent AND get discarded, producing a zero-char + response. See issue #5057 / PRs #6252, #10370, #4665. + + ACTIVE_SESSION_BYPASS_COMMANDS remains the subset of commands with + explicit Level-2 handlers; the rest fall through to the catch-all. + """ + return resolve_command(command_name) is not None if command_name else False + + def _resolve_config_gates() -> set[str]: """Return canonical names of commands whose ``gateway_config_gate`` is truthy. @@ -443,14 +497,13 @@ def _collect_gateway_skill_entries( # --- Tier 1: Plugin slash commands (never trimmed) --------------------- plugin_pairs: list[tuple[str, str]] = [] try: - from hermes_cli.plugins import get_plugin_manager - pm = get_plugin_manager() - plugin_cmds = getattr(pm, "_plugin_commands", {}) + from hermes_cli.plugins import get_plugin_commands + plugin_cmds = get_plugin_commands() for cmd_name in sorted(plugin_cmds): name = sanitize_name(cmd_name) if sanitize_name else cmd_name if not name: continue - desc = "Plugin command" + desc = plugin_cmds[cmd_name].get("description", "Plugin command") if len(desc) > desc_limit: desc = desc[:desc_limit - 3] + "..." plugin_pairs.append((name, desc)) @@ -582,6 +635,116 @@ def discord_skill_commands( ) +def discord_skill_commands_by_category( + reserved_names: set[str], +) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]: + """Return skill entries organized by category for Discord ``/skill`` subcommand groups. + + Skills whose directory is nested at least 2 levels under ``SKILLS_DIR`` + (e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level + category. Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as + *uncategorized* — the caller should register them as direct subcommands + of the ``/skill`` group. + + The same filtering as :func:`discord_skill_commands` is applied: hub + skills excluded, per-platform disabled excluded, names clamped. + + Returns: + ``(categories, uncategorized, hidden_count)`` + + - *categories*: ``{category_name: [(name, description, cmd_key), ...]}`` + - *uncategorized*: ``[(name, description, cmd_key), ...]`` + - *hidden_count*: skills dropped due to Discord group limits + (25 subcommand groups, 25 subcommands per group) + """ + from pathlib import Path as _P + + _platform_disabled: set[str] = set() + try: + from agent.skill_utils import get_disabled_skill_names + _platform_disabled = get_disabled_skill_names(platform="discord") + except Exception: + pass + + # Collect raw skill data -------------------------------------------------- + categories: dict[str, list[tuple[str, str, str]]] = {} + uncategorized: list[tuple[str, str, str]] = [] + _names_used: set[str] = set(reserved_names) + hidden = 0 + + try: + from agent.skill_commands import get_skill_commands + from tools.skills_tool import SKILLS_DIR + _skills_dir = SKILLS_DIR.resolve() + _hub_dir = (SKILLS_DIR / ".hub").resolve() + skill_cmds = get_skill_commands() + + for cmd_key in sorted(skill_cmds): + info = skill_cmds[cmd_key] + skill_path = info.get("skill_md_path", "") + if not skill_path: + continue + sp = _P(skill_path).resolve() + # Skip skills outside SKILLS_DIR or from the hub + if not str(sp).startswith(str(_skills_dir)): + continue + if str(sp).startswith(str(_hub_dir)): + continue + + skill_name = info.get("name", "") + if skill_name in _platform_disabled: + continue + + raw_name = cmd_key.lstrip("/") + # Clamp to 32 chars (Discord limit) + discord_name = raw_name[:32] + if discord_name in _names_used: + continue + _names_used.add(discord_name) + + desc = info.get("description", "") + if len(desc) > 100: + desc = desc[:97] + "..." + + # Determine category from the relative path within SKILLS_DIR. + # e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art") + try: + rel = sp.parent.relative_to(_skills_dir) + except ValueError: + continue + parts = rel.parts + if len(parts) >= 2: + cat = parts[0] + categories.setdefault(cat, []).append((discord_name, desc, cmd_key)) + else: + uncategorized.append((discord_name, desc, cmd_key)) + except Exception: + pass + + # Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------ + _MAX_GROUPS = 25 + _MAX_PER_GROUP = 25 + + trimmed_categories: dict[str, list[tuple[str, str, str]]] = {} + group_count = 0 + for cat in sorted(categories): + if group_count >= _MAX_GROUPS: + hidden += len(categories[cat]) + continue + entries = categories[cat][:_MAX_PER_GROUP] + hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP) + trimmed_categories[cat] = entries + group_count += 1 + + # Uncategorized skills also count against the 25 top-level limit + remaining_slots = _MAX_GROUPS - group_count + if len(uncategorized) > remaining_slots: + hidden += len(uncategorized) - remaining_slots + uncategorized = uncategorized[:remaining_slots] + + return trimmed_categories, uncategorized, hidden + + def slack_subcommand_map() -> dict[str, str]: """Return subcommand -> /command mapping for Slack /hermes handler. @@ -734,8 +897,7 @@ class SlashCommandCompleter(Completer): return None return word - @staticmethod - def _context_completions(word: str, limit: int = 30): + def _context_completions(self, word: str, limit: int = 30): """Yield Claude Code-style @ context completions. Bare ``@`` or ``@partial`` shows static references and matching @@ -934,6 +1096,51 @@ class SlashCommandCompleter(Completer): display_meta=f"{fp} {meta}" if meta else fp, ) + @staticmethod + def _skin_completions(sub_text: str, sub_lower: str): + """Yield completions for /skin from available skins.""" + try: + from hermes_cli.skin_engine import list_skins + for s in list_skins(): + name = s["name"] + if name.startswith(sub_lower) and name != sub_lower: + yield Completion( + name, + start_position=-len(sub_text), + display=name, + display_meta=s.get("description", "") or s.get("source", ""), + ) + except Exception: + pass + + @staticmethod + def _personality_completions(sub_text: str, sub_lower: str): + """Yield completions for /personality from configured personalities.""" + try: + from hermes_cli.config import load_config + personalities = load_config().get("agent", {}).get("personalities", {}) + if "none".startswith(sub_lower) and "none" != sub_lower: + yield Completion( + "none", + start_position=-len(sub_text), + display="none", + display_meta="clear personality overlay", + ) + for name, prompt in personalities.items(): + if name.startswith(sub_lower) and name != sub_lower: + if isinstance(prompt, dict): + meta = prompt.get("description") or prompt.get("system_prompt", "")[:50] + else: + meta = str(prompt)[:50] + yield Completion( + name, + start_position=-len(sub_text), + display=name, + display_meta=meta, + ) + except Exception: + pass + def _model_completions(self, sub_text: str, sub_lower: str): """Yield completions for /model from config aliases + built-in aliases.""" seen = set() @@ -988,10 +1195,17 @@ class SlashCommandCompleter(Completer): sub_text = parts[1] if len(parts) > 1 else "" sub_lower = sub_text.lower() - # Dynamic model alias completions for /model - if " " not in sub_text and base_cmd == "/model": - yield from self._model_completions(sub_text, sub_lower) - return + # Dynamic completions for commands with runtime lists + if " " not in sub_text: + if base_cmd == "/model": + yield from self._model_completions(sub_text, sub_lower) + return + if base_cmd == "/skin": + yield from self._skin_completions(sub_text, sub_lower) + return + if base_cmd == "/personality": + yield from self._personality_completions(sub_text, sub_lower) + return # Static subcommand completions if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd): @@ -1030,6 +1244,22 @@ class SlashCommandCompleter(Completer): display_meta=f"⚡ {short_desc}", ) + # Plugin-registered slash commands + try: + from hermes_cli.plugins import get_plugin_commands + for cmd_name, cmd_info in get_plugin_commands().items(): + if cmd_name.startswith(word): + desc = str(cmd_info.get("description", "Plugin command")) + short_desc = desc[:50] + ("..." if len(desc) > 50 else "") + yield Completion( + self._completion_text(cmd_name, word), + start_position=-len(word), + display=f"/{cmd_name}", + display_meta=f"🔌 {short_desc}", + ) + except Exception: + pass + # --------------------------------------------------------------------------- # Inline auto-suggest (ghost text) for slash commands diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d121bc517f..ef5e3d2fcd 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -12,6 +12,8 @@ This module provides: - hermes config wizard - Re-run setup wizard """ +import copy +import logging import os import platform import re @@ -23,10 +25,11 @@ from dataclasses import dataclass from pathlib import Path from typing import Dict, Any, Optional, List, Tuple -from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled +logger = logging.getLogger(__name__) _IS_WINDOWS = platform.system() == "Windows" _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") +_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {} # Env var names written to .env that aren't in OPTIONAL_ENV_VARS # (managed by setup/provider flows directly). _EXTRA_ENV_KEYS = frozenset({ @@ -45,7 +48,8 @@ _EXTRA_ENV_KEYS = frozenset({ "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY", "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS", "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD", - "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", + "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME", + "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat) "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT", "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL", "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT", @@ -241,13 +245,41 @@ def _secure_dir(path): pass +def _is_container() -> bool: + """Detect if we're running inside a Docker/Podman/LXC container. + + When Hermes runs in a container with volume-mounted config files, forcing + 0o600 permissions breaks multi-process setups where the gateway and + dashboard run as different UIDs or the volume mount requires broader + permissions. + """ + # Explicit opt-out + if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"): + return True + # Docker / Podman marker file + if os.path.exists("/.dockerenv"): + return True + # LXC / cgroup-based detection + try: + with open("/proc/1/cgroup", "r") as f: + cgroup_content = f.read() + if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content: + return True + except (OSError, IOError): + pass + return False + + def _secure_file(path): """Set file to owner-only read/write (0600). No-op on Windows. Skipped in managed mode — the NixOS activation script sets group-readable permissions (0640) on config files. + + Skipped in containers — Docker/Podman volume mounts often need broader + permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems. """ - if is_managed(): + if is_managed() or _is_container(): return try: if os.path.exists(str(path)): @@ -373,7 +405,11 @@ DEFAULT_CONFIG = { "container_persistent": True, # Persist filesystem across sessions # Docker volume mounts — share host directories with the container. # Each entry is "host_path:container_path" (standard Docker -v syntax). - # Example: ["/home/user/projects:/workspace/projects", "/data:/data"] + # Example: + # ["/home/user/projects:/workspace/projects", + # "/home/user/.hermes/cache/documents:/output"] + # For gateway MEDIA delivery, write inside Docker to /output/... and emit + # the host-visible path in MEDIA:, not the container path. "docker_volumes": [], # Explicit opt-in: mount the host cwd into /workspace for Docker sessions. # Default off because passing host directories into a sandbox weakens isolation. @@ -390,10 +426,10 @@ DEFAULT_CONFIG = { "command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.) "record_sessions": False, # Auto-record browser sessions as WebM videos "allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.) + "cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome "camofox": { # When true, Hermes sends a stable profile-scoped userId to Camofox - # so the server can map it to a persistent browser profile directory. - # Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR. + # so the server maps it to a persistent Firefox profile automatically. # When false (default), each session gets a random userId (ephemeral). "managed_persistence": False, }, @@ -419,13 +455,27 @@ DEFAULT_CONFIG = { "protect_last_n": 20, # minimum recent messages to keep uncompressed }, - "smart_model_routing": { - "enabled": False, - "max_simple_chars": 160, - "max_simple_words": 28, - "cheap_model": {}, + + # AWS Bedrock provider configuration. + # Only used when model.provider is "bedrock". + "bedrock": { + "region": "", # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1) + "discovery": { + "enabled": True, # Auto-discover models via ListFoundationModels + "provider_filter": [], # Only show models from these providers (e.g. ["anthropic", "amazon"]) + "refresh_interval": 3600, # Cache discovery results for this many seconds + }, + "guardrail": { + # Amazon Bedrock Guardrails — content filtering and safety policies. + # Create a guardrail in the Bedrock console, then set the ID and version here. + # See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html + "guardrail_identifier": "", # e.g. "abc123def456" + "guardrail_version": "", # e.g. "1" or "DRAFT" + "stream_processing_mode": "async", # "sync" or "async" + "trace": "disabled", # "enabled", "disabled", or "enabled_full" + }, }, - + # Auxiliary model config — provider:model for each side task. # Format: provider is the provider name, model is the model slug. # "auto" for provider = auto-detect best available provider. @@ -439,6 +489,7 @@ DEFAULT_CONFIG = { "base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider) "api_key": "", # API key for base_url (falls back to OPENAI_API_KEY) "timeout": 120, # seconds — LLM API call timeout; vision payloads need generous timeout + "extra_body": {}, # OpenAI-compatible provider-specific request fields "download_timeout": 30, # seconds — image HTTP download timeout; increase for slow connections }, "web_extract": { @@ -447,6 +498,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 360, # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models + "extra_body": {}, }, "compression": { "provider": "auto", @@ -454,6 +506,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 120, # seconds — compression summarises large contexts; increase for local models + "extra_body": {}, }, "session_search": { "provider": "auto", @@ -461,6 +514,8 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, + "max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers }, "skills_hub": { "provider": "auto", @@ -468,6 +523,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "approval": { "provider": "auto", @@ -475,6 +531,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "mcp": { "provider": "auto", @@ -482,6 +539,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "flush_memories": { "provider": "auto", @@ -489,6 +547,15 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, + }, + "title_generation": { + "provider": "auto", + "model": "", + "base_url": "", + "api_key": "", + "timeout": 30, + "extra_body": {}, }, }, @@ -500,9 +567,14 @@ DEFAULT_CONFIG = { "bell_on_complete": False, "show_reasoning": False, "streaming": False, + "final_response_markdown": "strip", # render | strip | raw "inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage) "show_cost": False, # Show $ cost in the status bar (off by default) "skin": "default", + "user_message_preview": { # CLI: how many submitted user-message lines to echo back in scrollback + "first_lines": 2, + "last_lines": 2, + }, "interim_assistant_messages": True, # Gateway: show natural mid-turn assistant status messages "tool_progress_command": False, # Enable /verbose command in messaging gateway "tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead @@ -510,6 +582,11 @@ DEFAULT_CONFIG = { "platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}} }, + # Web dashboard settings + "dashboard": { + "theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose" + }, + # Privacy settings "privacy": { "redact_pii": False, # When True, hash user IDs and strip phone numbers from LLM context @@ -517,7 +594,7 @@ DEFAULT_CONFIG = { # Text-to-speech configuration "tts": { - "provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local) + "provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local) "edge": { "voice": "en-US-AriaNeural", # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural @@ -531,6 +608,12 @@ DEFAULT_CONFIG = { "voice": "alloy", # Voices: alloy, echo, fable, onyx, nova, shimmer }, + "xai": { + "voice_id": "eve", + "language": "en", + "sample_rate": 24000, + "bit_rate": 128000, + }, "mistral": { "model": "voxtral-mini-tts-2603", "voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral @@ -638,6 +721,15 @@ DEFAULT_CONFIG = { "allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist) "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) "reactions": True, # Add 👀/✅/❌ reactions to messages during processing + "channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads) + # discord_server tool: restrict which actions the agent may call. + # Default (empty) = all actions allowed (subject to bot privileged intents). + # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages") + # or YAML list. Unknown names are dropped with a warning at load time. + # Actions: list_guilds, server_info, list_channels, channel_info, + # list_roles, member_info, search_members, fetch_messages, list_pins, + # pin_message, unpin_message, create_thread, add_role, remove_role. + "server_actions": "", }, # WhatsApp platform settings (gateway mode) @@ -648,13 +740,33 @@ DEFAULT_CONFIG = { # Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n" }, + # Telegram platform settings (gateway mode) + "telegram": { + "channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group) + }, + + # Slack platform settings (gateway mode) + "slack": { + "channel_prompts": {}, # Per-channel ephemeral system prompts + }, + + # Mattermost platform settings (gateway mode) + "mattermost": { + "channel_prompts": {}, # Per-channel ephemeral system prompts + }, + # Approval mode for dangerous commands: # manual — always prompt the user (default) # smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk # off — skip all approval prompts (equivalent to --yolo) + # + # cron_mode — what to do when a cron job hits a dangerous command: + # deny — block the command and let the agent find another way (default, safe) + # approve — auto-approve all dangerous commands in cron jobs "approvals": { "mode": "manual", "timeout": 60, + "cron_mode": "deny", }, # Permanently allowed dangerous command patterns (added via "always" approval) @@ -686,6 +798,20 @@ DEFAULT_CONFIG = { "wrap_response": True, }, + # execute_code settings — controls the tool used for programmatic tool calls. + "code_execution": { + # Execution mode: + # project (default) — scripts run in the session's working directory + # with the active virtualenv/conda env's python, so project deps + # (pandas, torch, project packages) and relative paths resolve. + # strict — scripts run in an isolated temp directory with + # hermes-agent's own python (sys.executable). Maximum isolation + # and reproducibility; project deps and relative paths won't work. + # Env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, ...) and the + # tool whitelist apply identically in both modes. + "mode": "project", + }, + # Logging — controls file logging to ~/.hermes/logs/. # agent.log captures INFO+ (all agent activity); errors.log captures WARNING+. "logging": { @@ -703,7 +829,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 17, + "_config_version": 21, } # ============================================================================= @@ -771,6 +897,38 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "XAI_API_KEY": { + "description": "xAI API key", + "prompt": "xAI API key", + "url": "https://console.x.ai/", + "password": True, + "category": "provider", + "advanced": True, + }, + "XAI_BASE_URL": { + "description": "xAI base URL override", + "prompt": "xAI base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, + "NVIDIA_API_KEY": { + "description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)", + "prompt": "NVIDIA NIM API key", + "url": "https://build.nvidia.com/", + "password": True, + "category": "provider", + "advanced": True, + }, + "NVIDIA_BASE_URL": { + "description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)", + "prompt": "NVIDIA NIM base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "GLM_API_KEY": { "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)", "prompt": "Z.AI / GLM API key", @@ -912,6 +1070,30 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "HERMES_GEMINI_CLIENT_ID": { + "description": "Google OAuth client ID for google-gemini-cli (optional; defaults to Google's public gemini-cli client)", + "prompt": "Google OAuth client ID (optional — leave empty to use the public default)", + "url": "https://console.cloud.google.com/apis/credentials", + "password": False, + "category": "provider", + "advanced": True, + }, + "HERMES_GEMINI_CLIENT_SECRET": { + "description": "Google OAuth client secret for google-gemini-cli (optional)", + "prompt": "Google OAuth client secret (optional)", + "url": "https://console.cloud.google.com/apis/credentials", + "password": True, + "category": "provider", + "advanced": True, + }, + "HERMES_GEMINI_PROJECT_ID": { + "description": "GCP project ID for paid Gemini tiers (free tier auto-provisions)", + "prompt": "GCP project ID for Gemini OAuth (leave empty for free tier)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "OPENCODE_ZEN_API_KEY": { "description": "OpenCode Zen API key (pay-as-you-go access to curated models)", "prompt": "OpenCode Zen API key", @@ -959,6 +1141,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "OLLAMA_API_KEY": { + "description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)", + "prompt": "Ollama Cloud API key", + "url": "https://ollama.com/settings", + "password": True, + "category": "provider", + "advanced": True, + }, + "OLLAMA_BASE_URL": { + "description": "Ollama Cloud base URL override (default: https://ollama.com/v1)", + "prompt": "Ollama base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "XIAOMI_API_KEY": { "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)", "prompt": "Xiaomi MiMo API Key", @@ -974,6 +1172,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "AWS_REGION": { + "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)", + "prompt": "AWS Region", + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html", + "password": False, + "category": "provider", + "advanced": True, + }, + "AWS_PROFILE": { + "description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)", + "prompt": "AWS Profile", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, # ── Tool API keys ── "EXA_API_KEY": { @@ -1171,6 +1385,12 @@ OPTIONAL_ENV_VARS = { "password": False, "category": "messaging", }, + "TELEGRAM_PROXY": { + "description": "Proxy URL for Telegram connections (overrides HTTPS_PROXY). Supports http://, https://, socks5://", + "prompt": "Telegram proxy URL (optional)", + "password": False, + "category": "messaging", + }, "DISCORD_BOT_TOKEN": { "description": "Discord bot token from Developer Portal", "prompt": "Discord bot token", @@ -1366,12 +1586,12 @@ OPTIONAL_ENV_VARS = { "prompt": "Allow All QQ Users", "category": "messaging", }, - "QQ_HOME_CHANNEL": { + "QQBOT_HOME_CHANNEL": { "description": "Default QQ channel/group for cron delivery and notifications", "prompt": "QQ Home Channel", "category": "messaging", }, - "QQ_HOME_CHANNEL_NAME": { + "QQBOT_HOME_CHANNEL_NAME": { "description": "Display name for the QQ home channel", "prompt": "QQ Home Channel Name", "category": "messaging", @@ -1468,13 +1688,8 @@ OPTIONAL_ENV_VARS = { }, # ── Agent settings ── - "MESSAGING_CWD": { - "description": "Working directory for terminal commands via messaging", - "prompt": "Messaging working directory (default: home)", - "url": None, - "password": False, - "category": "setting", - }, + # NOTE: MESSAGING_CWD was removed here — use terminal.cwd in config.yaml + # instead. The gateway reads TERMINAL_CWD (bridged from terminal.cwd). "SUDO_PASSWORD": { "description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting", "prompt": "Sudo password", @@ -1522,14 +1737,8 @@ OPTIONAL_ENV_VARS = { }, } -if not _managed_nous_tools_enabled(): - for _hidden_var in ( - "FIRECRAWL_GATEWAY_URL", - "TOOL_GATEWAY_DOMAIN", - "TOOL_GATEWAY_SCHEME", - "TOOL_GATEWAY_USER_TOKEN", - ): - OPTIONAL_ENV_VARS.pop(_hidden_var, None) +# Tool Gateway env vars are always visible — they're useful for +# self-hosted / custom gateway setups regardless of subscription state. def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]: @@ -1643,12 +1852,53 @@ def _normalize_custom_provider_entry( if not isinstance(entry, dict): return None + # Accept camelCase aliases commonly used in hand-written configs. + _CAMEL_ALIASES: Dict[str, str] = { + "apiKey": "api_key", + "baseUrl": "base_url", + "apiMode": "api_mode", + "keyEnv": "key_env", + "defaultModel": "default_model", + "contextLength": "context_length", + "rateLimitDelay": "rate_limit_delay", + } + _KNOWN_KEYS = { + "name", "api", "url", "base_url", "api_key", "key_env", + "api_mode", "transport", "model", "default_model", "models", + "context_length", "rate_limit_delay", + } + for camel, snake in _CAMEL_ALIASES.items(): + if camel in entry and snake not in entry: + logger.warning( + "providers.%s: camelCase key '%s' auto-mapped to '%s' " + "(use snake_case to avoid this warning)", + provider_key or "?", camel, snake, + ) + entry[snake] = entry[camel] + unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys()) + if unknown: + logger.warning( + "providers.%s: unknown config keys ignored: %s", + provider_key or "?", ", ".join(sorted(unknown)), + ) + + from urllib.parse import urlparse + base_url = "" - for url_key in ("api", "url", "base_url"): + for url_key in ("base_url", "url", "api"): raw_url = entry.get(url_key) if isinstance(raw_url, str) and raw_url.strip(): - base_url = raw_url.strip() - break + candidate = raw_url.strip() + parsed = urlparse(candidate) + if parsed.scheme and parsed.netloc: + base_url = candidate + break + else: + logger.warning( + "providers.%s: '%s' value '%s' is not a valid URL " + "(no scheme or host) — skipped", + provider_key or "?", url_key, candidate, + ) if not base_url: return None @@ -1953,6 +2203,52 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None: sys.stderr.write("\n".join(lines) + "\n\n") +def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> None: + """Warn if MESSAGING_CWD or TERMINAL_CWD is set in .env instead of config.yaml. + + These env vars are deprecated — the canonical setting is terminal.cwd + in config.yaml. Prints a migration hint to stderr. + """ + import os, sys + messaging_cwd = os.environ.get("MESSAGING_CWD") + terminal_cwd_env = os.environ.get("TERMINAL_CWD") + + if config is None: + try: + config = load_config() + except Exception: + return + + terminal_cfg = config.get("terminal", {}) + config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "." + # Only warn if config.yaml doesn't have an explicit path + config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "") + + lines: list[str] = [] + if messaging_cwd: + lines.append( + f" \033[33m⚠\033[0m MESSAGING_CWD={messaging_cwd} found in .env — " + f"this is deprecated." + ) + if terminal_cwd_env and not config_has_explicit_cwd: + # TERMINAL_CWD in env but not from config bridge — likely from .env + lines.append( + f" \033[33m⚠\033[0m TERMINAL_CWD={terminal_cwd_env} found in .env — " + f"this is deprecated." + ) + if lines: + hint_path = os.environ.get("HERMES_HOME", "~/.hermes") + lines.insert(0, "\033[33m⚠ Deprecated .env settings detected:\033[0m") + lines.append( + f" \033[2mMove to config.yaml instead: " + f"terminal:\\n cwd: /your/project/path\033[0m" + ) + lines.append( + f" \033[2mThen remove the old entries from {hint_path}/.env\033[0m" + ) + sys.stderr.write("\n".join(lines) + "\n\n") + + def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]: """ Migrate config to latest version, prompting for new required fields. @@ -2231,6 +2527,72 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A else: print(" ✓ Removed unused compression.summary_* keys") + # ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ── + # The loader now requires plugins to appear in ``plugins.enabled`` before + # loading. Existing installs had all discovered plugins loading by default + # (minus anything in ``plugins.disabled``). To avoid silently breaking + # those setups on upgrade, populate ``plugins.enabled`` with the set of + # currently-installed user plugins that aren't already disabled. + # + # Bundled plugins (shipped in the repo itself) are NOT grandfathered — + # they ship off for everyone, including existing users, so any user who + # wants one has to opt in explicitly. + if current_ver < 21: + config = read_raw_config() + plugins_cfg = config.get("plugins") + if not isinstance(plugins_cfg, dict): + plugins_cfg = {} + # Only migrate if the enabled allow-list hasn't been set yet. + if "enabled" not in plugins_cfg: + disabled = plugins_cfg.get("disabled", []) or [] + if not isinstance(disabled, list): + disabled = [] + disabled_set = set(disabled) + + # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins. + grandfathered: List[str] = [] + try: + from hermes_constants import get_hermes_home as _ghome + user_plugins_dir = _ghome() / "plugins" + if user_plugins_dir.is_dir(): + for child in sorted(user_plugins_dir.iterdir()): + if not child.is_dir(): + continue + manifest_file = child / "plugin.yaml" + if not manifest_file.exists(): + manifest_file = child / "plugin.yml" + if not manifest_file.exists(): + continue + try: + with open(manifest_file) as _mf: + manifest = yaml.safe_load(_mf) or {} + except Exception: + manifest = {} + name = manifest.get("name") or child.name + if name in disabled_set: + continue + grandfathered.append(name) + except Exception: + grandfathered = [] + + plugins_cfg["enabled"] = grandfathered + config["plugins"] = plugins_cfg + save_config(config) + results["config_added"].append( + f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)" + ) + if not quiet: + if grandfathered: + print( + f" ✓ Plugins now opt-in: grandfathered " + f"{len(grandfathered)} existing plugin(s) into plugins.enabled" + ) + else: + print( + " ✓ Plugins now opt-in: no existing plugins to grandfather. " + "Use `hermes plugins enable ` to activate." + ) + if current_ver < latest_ver and not quiet: print(f"Config version: {current_ver} → {latest_ver}") @@ -2423,6 +2785,85 @@ def _expand_env_vars(obj): return obj +def _items_by_unique_name(items): + """Return a name-indexed dict only when all items have unique string names.""" + if not isinstance(items, list): + return None + indexed = {} + for item in items: + if not isinstance(item, dict) or not isinstance(item.get("name"), str): + return None + name = item["name"] + if name in indexed: + return None + indexed[name] = item + return indexed + + +def _preserve_env_ref_templates(current, raw, loaded_expanded=None): + """Restore raw ``${VAR}`` templates when a value is otherwise unchanged. + + ``load_config()`` expands env refs for runtime use. When a caller later + persists that config after modifying some unrelated setting, keep the + original on-disk template instead of writing the expanded plaintext + secret back to ``config.yaml``. + + Prefer preserving the raw template when ``current`` still matches either + the value previously returned by ``load_config()`` for this config path or + the current environment expansion of ``raw``. This handles env-var + rotation between load and save while still treating mixed literal/template + string edits as caller-owned once their rendered value diverges. + """ + if isinstance(current, str) and isinstance(raw, str) and re.search(r"\${[^}]+}", raw): + if current == raw: + return raw + if isinstance(loaded_expanded, str) and current == loaded_expanded: + return raw + if _expand_env_vars(raw) == current: + return raw + return current + + if isinstance(current, dict) and isinstance(raw, dict): + return { + key: _preserve_env_ref_templates( + value, + raw.get(key), + loaded_expanded.get(key) if isinstance(loaded_expanded, dict) else None, + ) + for key, value in current.items() + } + + if isinstance(current, list) and isinstance(raw, list): + # Prefer matching named config objects (e.g. custom_providers) by name + # so harmless reordering doesn't drop the original template. If names + # are duplicated, fall back to positional matching instead of silently + # shadowing one entry. + current_by_name = _items_by_unique_name(current) + raw_by_name = _items_by_unique_name(raw) + loaded_by_name = _items_by_unique_name(loaded_expanded) + if current_by_name is not None and raw_by_name is not None: + return [ + _preserve_env_ref_templates( + item, + raw_by_name.get(item.get("name")), + loaded_by_name.get(item.get("name")) if loaded_by_name is not None else None, + ) + for item in current + ] + return [ + _preserve_env_ref_templates( + item, + raw[index] if index < len(raw) else None, + loaded_expanded[index] + if isinstance(loaded_expanded, list) and index < len(loaded_expanded) + else None, + ) + for index, item in enumerate(current) + ] + + return current + + def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]: """Move stale root-level provider/base_url into model section. @@ -2490,7 +2931,6 @@ def read_raw_config() -> Dict[str, Any]: def load_config() -> Dict[str, Any]: """Load configuration from ~/.hermes/config.yaml.""" - import copy ensure_hermes_home() config_path = get_config_path() @@ -2511,8 +2951,11 @@ def load_config() -> Dict[str, Any]: config = _deep_merge(config, user_config) except Exception as e: print(f"Warning: Failed to load config: {e}") - - return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config))) + + normalized = _normalize_root_model_keys(_normalize_max_turns_config(config)) + expanded = _expand_env_vars(normalized) + _LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded) + return expanded _SECURITY_COMMENT = """ @@ -2547,24 +2990,11 @@ _FALLBACK_COMMENT = """ # minimax (MINIMAX_API_KEY) — MiniMax # minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) # -# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# For custom OpenAI-compatible endpoints, add base_url and key_env. # # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 -# -# ── Smart Model Routing ──────────────────────────────────────────────── -# Optional cheap-vs-strong routing for simple turns. -# Keeps the primary model for complex work, but can route short/simple -# messages to a cheaper model across providers. -# -# smart_model_routing: -# enabled: true -# max_simple_chars: 160 -# max_simple_words: 28 -# cheap_model: -# provider: openrouter -# model: google/gemini-2.5-flash """ @@ -2591,24 +3021,11 @@ _COMMENTED_SECTIONS = """ # minimax (MINIMAX_API_KEY) — MiniMax # minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) # -# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# For custom OpenAI-compatible endpoints, add base_url and key_env. # # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 -# -# ── Smart Model Routing ──────────────────────────────────────────────── -# Optional cheap-vs-strong routing for simple turns. -# Keeps the primary model for complex work, but can route short/simple -# messages to a cheaper model across providers. -# -# smart_model_routing: -# enabled: true -# max_simple_chars: 160 -# max_simple_words: 28 -# cheap_model: -# provider: openrouter -# model: google/gemini-2.5-flash """ @@ -2621,7 +3038,15 @@ def save_config(config: Dict[str, Any]): ensure_hermes_home() config_path = get_config_path() - normalized = _normalize_root_model_keys(_normalize_max_turns_config(config)) + current_normalized = _normalize_root_model_keys(_normalize_max_turns_config(config)) + normalized = current_normalized + raw_existing = _normalize_root_model_keys(_normalize_max_turns_config(read_raw_config())) + if raw_existing: + normalized = _preserve_env_ref_templates( + normalized, + raw_existing, + _LAST_EXPANDED_CONFIG_BY_PATH.get(str(config_path)), + ) # Build optional commented-out sections for features that are off by # default or only relevant when explicitly configured. @@ -2639,6 +3064,7 @@ def save_config(config: Dict[str, Any]): extra_content="".join(parts) if parts else None, ) _secure_file(config_path) + _LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(current_normalized) def load_env() -> Dict[str, str]: @@ -2766,6 +3192,47 @@ def sanitize_env_file() -> int: return fixes +def _check_non_ascii_credential(key: str, value: str) -> str: + """Warn and strip non-ASCII characters from credential values. + + API keys and tokens must be pure ASCII — they are sent as HTTP header + values which httpx/httpcore encode as ASCII. Non-ASCII characters + (commonly introduced by copy-pasting from rich-text editors or PDFs + that substitute lookalike Unicode glyphs for ASCII letters) cause + ``UnicodeEncodeError: 'ascii' codec can't encode character`` at + request time. + + Returns the sanitized (ASCII-only) value. Prints a warning if any + non-ASCII characters were found and removed. + """ + try: + value.encode("ascii") + return value # all ASCII — nothing to do + except UnicodeEncodeError: + pass + + # Build a readable list of the offending characters + bad_chars: list[str] = [] + for i, ch in enumerate(value): + if ord(ch) > 127: + bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})") + sanitized = value.encode("ascii", errors="ignore").decode("ascii") + + import sys + print( + f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n" + f" This usually happens when copy-pasting from a PDF, rich-text editor,\n" + f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n" + f"\n" + + "\n".join(f" {line}" for line in bad_chars[:5]) + + ("\n ... and more" if len(bad_chars) > 5 else "") + + f"\n\n The non-ASCII characters have been stripped automatically.\n" + f" If authentication fails, re-copy the key from the provider's dashboard.\n", + file=sys.stderr, + ) + return sanitized + + def save_env_value(key: str, value: str): """Save or update a value in ~/.hermes/.env.""" if is_managed(): @@ -2774,6 +3241,8 @@ def save_env_value(key: str, value: str): if not _ENV_VAR_NAME_RE.match(key): raise ValueError(f"Invalid environment variable name: {key!r}") value = value.replace("\n", "").replace("\r", "") + # API keys / tokens must be ASCII — strip non-ASCII with a warning. + value = _check_non_ascii_credential(key, value) ensure_hermes_home() env_path = get_env_path() @@ -2804,12 +3273,25 @@ def save_env_value(key: str, value: str): lines.append(f"{key}={value}\n") fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_') + # Preserve original permissions so Docker volume mounts aren't clobbered. + original_mode = None + if env_path.exists(): + try: + original_mode = stat.S_IMODE(env_path.stat().st_mode) + except OSError: + pass try: with os.fdopen(fd, 'w', **write_kw) as f: f.writelines(lines) f.flush() os.fsync(f.fileno()) os.replace(tmp_path, env_path) + # Restore original permissions before _secure_file may tighten them. + if original_mode is not None: + try: + os.chmod(env_path, original_mode) + except OSError: + pass except BaseException: try: os.unlink(tmp_path) @@ -2820,13 +3302,6 @@ def save_env_value(key: str, value: str): os.environ[key] = value - # Restrict .env permissions to owner-only (contains API keys) - if not _IS_WINDOWS: - try: - os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR) - except OSError: - pass - def remove_env_value(key: str) -> bool: """Remove a key from ~/.hermes/.env and os.environ. @@ -2855,12 +3330,23 @@ def remove_env_value(key: str) -> bool: if found: fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_') + # Preserve original permissions so Docker volume mounts aren't clobbered. + original_mode = None + try: + original_mode = stat.S_IMODE(env_path.stat().st_mode) + except OSError: + pass try: with os.fdopen(fd, 'w', **write_kw) as f: f.writelines(new_lines) f.flush() os.fsync(f.fileno()) os.replace(tmp_path, env_path) + if original_mode is not None: + try: + os.chmod(env_path, original_mode) + except OSError: + pass except BaseException: try: os.unlink(tmp_path) @@ -3002,6 +3488,10 @@ def show_config(): print(f" Personality: {display.get('personality', 'kawaii')}") print(f" Reasoning: {'on' if display.get('show_reasoning', False) else 'off'}") print(f" Bell: {'on' if display.get('bell_on_complete', False) else 'off'}") + ump = display.get('user_message_preview', {}) if isinstance(display.get('user_message_preview', {}), dict) else {} + ump_first = ump.get('first_lines', 2) + ump_last = ump.get('last_lines', 2) + print(f" User preview: first {ump_first} line(s), last {ump_last} line(s)") # Terminal print() diff --git a/hermes_cli/curses_ui.py b/hermes_cli/curses_ui.py index 4880171fd4..b05295f1e6 100644 --- a/hermes_cli/curses_ui.py +++ b/hermes_cli/curses_ui.py @@ -166,6 +166,7 @@ def curses_radiolist( selected: int = 0, *, cancel_returns: int | None = None, + description: str | None = None, ) -> int: """Curses single-select radio list. Returns the selected index. @@ -174,6 +175,9 @@ def curses_radiolist( items: Display labels for each row. selected: Index that starts selected (pre-selected). cancel_returns: Returned on ESC/q. Defaults to the original *selected*. + description: Optional multi-line text shown between the title and + the item list. Useful for context that should survive the + curses screen clear. """ if cancel_returns is None: cancel_returns = selected @@ -181,6 +185,10 @@ def curses_radiolist( if not sys.stdin.isatty(): return cancel_returns + desc_lines: list[str] = [] + if description: + desc_lines = description.splitlines() + try: import curses result_holder: list = [None] @@ -199,22 +207,35 @@ def curses_radiolist( stdscr.clear() max_y, max_x = stdscr.getmaxyx() + row = 0 + # Header try: hattr = curses.A_BOLD if curses.has_colors(): hattr |= curses.color_pair(2) - stdscr.addnstr(0, 0, title, max_x - 1, hattr) + stdscr.addnstr(row, 0, title, max_x - 1, hattr) + row += 1 + + # Description lines + for dline in desc_lines: + if row >= max_y - 1: + break + stdscr.addnstr(row, 0, dline, max_x - 1, curses.A_NORMAL) + row += 1 + stdscr.addnstr( - 1, 0, + row, 0, " \u2191\u2193 navigate ENTER/SPACE select ESC cancel", max_x - 1, curses.A_DIM, ) + row += 1 except curses.error: pass # Scrollable item list - visible_rows = max_y - 4 + items_start = row + 1 + visible_rows = max_y - items_start - 1 if cursor < scroll_offset: scroll_offset = cursor elif cursor >= scroll_offset + visible_rows: @@ -223,7 +244,7 @@ def curses_radiolist( for draw_i, i in enumerate( range(scroll_offset, min(len(items), scroll_offset + visible_rows)) ): - y = draw_i + 3 + y = draw_i + items_start if y >= max_y - 1: break radio = "\u25cf" if i == selected else "\u25cb" diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py index 3607db9231..9dde9d7c16 100644 --- a/hermes_cli/debug.py +++ b/hermes_cli/debug.py @@ -6,7 +6,10 @@ Currently supports: """ import io +import json +import os import sys +import time import urllib.error import urllib.parse import urllib.request @@ -27,6 +30,205 @@ _DPASTE_COM_URL = "https://dpaste.com/api/" # paste.rs caps at ~1 MB; we stay under that with headroom. _MAX_LOG_BYTES = 512_000 +# Auto-delete pastes after this many seconds (6 hours). +_AUTO_DELETE_SECONDS = 21600 + + +# --------------------------------------------------------------------------- +# Pending-deletion tracking (replaces the old fork-and-sleep subprocess). +# --------------------------------------------------------------------------- + +def _pending_file() -> Path: + """Path to ``~/.hermes/pastes/pending.json``. + + Each entry: ``{"url": "...", "expire_at": }``. Scheduled + DELETEs used to be handled by spawning a detached Python process per + paste that slept for 6 hours; those accumulated forever if the user + ran ``hermes debug share`` repeatedly. We now persist the schedule + to disk and sweep expired entries on the next debug invocation. + """ + return get_hermes_home() / "pastes" / "pending.json" + + +def _load_pending() -> list[dict]: + path = _pending_file() + if not path.exists(): + return [] + try: + data = json.loads(path.read_text(encoding="utf-8")) + if isinstance(data, list): + # Filter to well-formed entries only + return [ + e for e in data + if isinstance(e, dict) and "url" in e and "expire_at" in e + ] + except (OSError, ValueError, json.JSONDecodeError): + pass + return [] + + +def _save_pending(entries: list[dict]) -> None: + path = _pending_file() + try: + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(".json.tmp") + tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8") + os.replace(tmp, path) + except OSError: + # Non-fatal — worst case the user has to run ``hermes debug delete`` + # manually. + pass + + +def _record_pending(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS) -> None: + """Record *urls* for deletion at ``now + delay_seconds``. + + Only paste.rs URLs are recorded (dpaste.com auto-expires). Entries + are merged into any existing pending.json. + """ + paste_rs_urls = [u for u in urls if _extract_paste_id(u)] + if not paste_rs_urls: + return + + entries = _load_pending() + # Dedupe by URL: keep the later expire_at if same URL appears twice + by_url: dict[str, float] = {e["url"]: float(e["expire_at"]) for e in entries} + expire_at = time.time() + delay_seconds + for u in paste_rs_urls: + by_url[u] = max(expire_at, by_url.get(u, 0.0)) + merged = [{"url": u, "expire_at": ts} for u, ts in by_url.items()] + _save_pending(merged) + + +def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]: + """Synchronously DELETE any pending pastes whose ``expire_at`` has passed. + + Returns ``(deleted, remaining)``. Best-effort: failed deletes stay in + the pending file and will be retried on the next sweep. Silent — + intended to be called from every ``hermes debug`` invocation with + minimal noise. + """ + entries = _load_pending() + if not entries: + return (0, 0) + + current = time.time() if now is None else now + deleted = 0 + remaining: list[dict] = [] + + for entry in entries: + try: + expire_at = float(entry.get("expire_at", 0)) + except (TypeError, ValueError): + continue # drop malformed entries + if expire_at > current: + remaining.append(entry) + continue + + url = entry.get("url", "") + try: + if delete_paste(url): + deleted += 1 + continue + except Exception: + # Network hiccup, 404 (already gone), etc. — drop the entry + # after a grace period; don't retry forever. + pass + + # Retain failed deletes for up to 24h past expiration, then give up. + if expire_at + 86400 > current: + remaining.append(entry) + else: + deleted += 1 # count as reaped (paste.rs will GC eventually) + + if deleted: + _save_pending(remaining) + + return (deleted, len(remaining)) + + +# --------------------------------------------------------------------------- +# Privacy / delete helpers +# --------------------------------------------------------------------------- + +_PRIVACY_NOTICE = """\ +⚠️ This will upload the following to a public paste service: + • System info (OS, Python version, Hermes version, provider, which API keys + are configured — NOT the actual keys) + • Recent log lines (agent.log, errors.log, gateway.log — may contain + conversation fragments and file paths) + • Full agent.log and gateway.log (up to 512 KB each — likely contains + conversation content, tool outputs, and file paths) + +Pastes auto-delete after 6 hours. +""" + +_GATEWAY_PRIVACY_NOTICE = ( + "⚠️ **Privacy notice:** This uploads system info + recent log tails " + "(may contain conversation fragments) to a public paste service. " + "Full logs are NOT included from the gateway — use `hermes debug share` " + "from the CLI for full log uploads.\n" + "Pastes auto-delete after 6 hours." +) + + +def _extract_paste_id(url: str) -> Optional[str]: + """Extract the paste ID from a paste.rs or dpaste.com URL. + + Returns the ID string, or None if the URL doesn't match a known service. + """ + url = url.strip().rstrip("/") + for prefix in ("https://paste.rs/", "http://paste.rs/"): + if url.startswith(prefix): + return url[len(prefix):] + return None + + +def delete_paste(url: str) -> bool: + """Delete a paste from paste.rs. Returns True on success. + + Only paste.rs supports unauthenticated DELETE. dpaste.com pastes + expire automatically but cannot be deleted via API. + """ + paste_id = _extract_paste_id(url) + if not paste_id: + raise ValueError( + f"Cannot delete: only paste.rs URLs are supported. Got: {url}" + ) + + target = f"{_PASTE_RS_URL}{paste_id}" + req = urllib.request.Request( + target, method="DELETE", + headers={"User-Agent": "hermes-agent/debug-share"}, + ) + with urllib.request.urlopen(req, timeout=30) as resp: + return 200 <= resp.status < 300 + + +def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS): + """Record *urls* for deletion ``delay_seconds`` from now. + + Previously this spawned a detached Python subprocess per call that slept + for 6 hours and then issued DELETE requests. Those subprocesses leaked — + every ``hermes debug share`` invocation added ~20 MB of resident Python + interpreters that never exited until the sleep completed. + + The replacement is stateless: we append to ``~/.hermes/pastes/pending.json`` + and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from + every ``hermes debug`` invocation. If the user never runs ``hermes debug`` + again, paste.rs's own retention policy handles cleanup. + """ + _record_pending(urls, delay_seconds=delay_seconds) + + +def _delete_hint(url: str) -> str: + """Return a one-liner delete command for the given paste URL.""" + paste_id = _extract_paste_id(url) + if paste_id: + return f"hermes debug delete {url}" + # dpaste.com — no API delete, expires on its own. + return "(auto-expires per dpaste.com policy)" + def _upload_paste_rs(content: str) -> str: """Upload to paste.rs. Returns the paste URL. @@ -250,6 +452,9 @@ def run_debug_share(args): expiry = getattr(args, "expire", 7) local_only = getattr(args, "local", False) + if not local_only: + print(_PRIVACY_NOTICE) + print("Collecting debug report...") # Capture dump once — prepended to every paste for context. @@ -315,22 +520,66 @@ def run_debug_share(args): if failures: print(f"\n (failed to upload: {', '.join(failures)})") + # Schedule auto-deletion after 6 hours + _schedule_auto_delete(list(urls.values())) + print(f"\n⏱ Pastes will auto-delete in 6 hours.") + + # Manual delete fallback + print(f"To delete now: hermes debug delete ") + print(f"\nShare these links with the Hermes team for support.") +def run_debug_delete(args): + """Delete one or more paste URLs uploaded by /debug.""" + urls = getattr(args, "urls", []) + if not urls: + print("Usage: hermes debug delete [ ...]") + print(" Deletes paste.rs pastes uploaded by 'hermes debug share'.") + return + + for url in urls: + try: + ok = delete_paste(url) + if ok: + print(f" ✓ Deleted: {url}") + else: + print(f" ✗ Failed to delete: {url} (unexpected response)") + except ValueError as exc: + print(f" ✗ {exc}") + except Exception as exc: + print(f" ✗ Could not delete {url}: {exc}") + + def run_debug(args): """Route debug subcommands.""" + # Opportunistic sweep of expired pastes on every ``hermes debug`` call. + # Replaces the old per-paste sleeping subprocess that used to leak as + # one orphaned Python interpreter per scheduled deletion. Silent and + # best-effort — any failure is swallowed so ``hermes debug`` stays + # reliable even when offline. + try: + _sweep_expired_pastes() + except Exception: + pass + subcmd = getattr(args, "debug_command", None) if subcmd == "share": run_debug_share(args) + elif subcmd == "delete": + run_debug_delete(args) else: # Default: show help - print("Usage: hermes debug share [--lines N] [--expire N] [--local]") + print("Usage: hermes debug ") print() print("Commands:") print(" share Upload debug report to a paste service and print URL") + print(" delete Delete a previously uploaded paste") print() - print("Options:") + print("Options (share):") print(" --lines N Number of log lines to include (default: 200)") print(" --expire N Paste expiry in days (default: 7)") print(" --local Print report locally instead of uploading") + print() + print("Options (delete):") + print(" ... One or more paste URLs to delete") diff --git a/hermes_cli/dingtalk_auth.py b/hermes_cli/dingtalk_auth.py new file mode 100644 index 0000000000..e1034c53da --- /dev/null +++ b/hermes_cli/dingtalk_auth.py @@ -0,0 +1,294 @@ +""" +DingTalk Device Flow authorization. + +Implements the same 3-step registration flow as dingtalk-openclaw-connector: + 1. POST /app/registration/init → get nonce + 2. POST /app/registration/begin → get device_code + verification_uri_complete + 3. POST /app/registration/poll → poll until SUCCESS → get client_id + client_secret + +The verification_uri_complete is rendered as a QR code in the terminal so the +user can scan it with DingTalk to authorize, yielding AppKey + AppSecret +automatically. +""" + +from __future__ import annotations + +import io +import os +import sys +import time +import logging +from typing import Optional, Tuple + +import requests + +logger = logging.getLogger(__name__) + +# ── Configuration ────────────────────────────────────────────────────────── + +REGISTRATION_BASE_URL = os.environ.get( + "DINGTALK_REGISTRATION_BASE_URL", "https://oapi.dingtalk.com" +).rstrip("/") + +REGISTRATION_SOURCE = os.environ.get("DINGTALK_REGISTRATION_SOURCE", "openClaw") + + +# ── API helpers ──────────────────────────────────────────────────────────── + +class RegistrationError(Exception): + """Raised when a DingTalk registration API call fails.""" + + +def _api_post(path: str, payload: dict) -> dict: + """POST to the registration API and return the parsed JSON body.""" + url = f"{REGISTRATION_BASE_URL}{path}" + try: + resp = requests.post(url, json=payload, timeout=15) + resp.raise_for_status() + data = resp.json() + except requests.RequestException as exc: + raise RegistrationError(f"Network error calling {url}: {exc}") from exc + + errcode = data.get("errcode", -1) + if errcode != 0: + errmsg = data.get("errmsg", "unknown error") + raise RegistrationError(f"API error [{path}]: {errmsg} (errcode={errcode})") + return data + + +# ── Core flow ────────────────────────────────────────────────────────────── + +def begin_registration() -> dict: + """Start a device-flow registration. + + Returns a dict with keys: + device_code, verification_uri_complete, expires_in, interval + """ + # Step 1: init → nonce + init_data = _api_post("/app/registration/init", {"source": REGISTRATION_SOURCE}) + nonce = str(init_data.get("nonce", "")).strip() + if not nonce: + raise RegistrationError("init response missing nonce") + + # Step 2: begin → device_code, verification_uri_complete + begin_data = _api_post("/app/registration/begin", {"nonce": nonce}) + device_code = str(begin_data.get("device_code", "")).strip() + verification_uri_complete = str(begin_data.get("verification_uri_complete", "")).strip() + if not device_code: + raise RegistrationError("begin response missing device_code") + if not verification_uri_complete: + raise RegistrationError("begin response missing verification_uri_complete") + + return { + "device_code": device_code, + "verification_uri_complete": verification_uri_complete, + "expires_in": int(begin_data.get("expires_in", 7200)), + "interval": max(int(begin_data.get("interval", 3)), 2), + } + + +def poll_registration(device_code: str) -> dict: + """Poll the registration status once. + + Returns a dict with keys: status, client_id?, client_secret?, fail_reason? + """ + data = _api_post("/app/registration/poll", {"device_code": device_code}) + status_raw = str(data.get("status", "")).strip().upper() + if status_raw not in ("WAITING", "SUCCESS", "FAIL", "EXPIRED"): + status_raw = "UNKNOWN" + return { + "status": status_raw, + "client_id": str(data.get("client_id", "")).strip() or None, + "client_secret": str(data.get("client_secret", "")).strip() or None, + "fail_reason": str(data.get("fail_reason", "")).strip() or None, + } + + +def wait_for_registration_success( + device_code: str, + interval: int = 3, + expires_in: int = 7200, + on_waiting: Optional[callable] = None, +) -> Tuple[str, str]: + """Block until the registration succeeds or times out. + + Returns (client_id, client_secret). + """ + deadline = time.monotonic() + expires_in + retry_window = 120 # 2 minutes for transient errors + retry_start = 0.0 + + while time.monotonic() < deadline: + time.sleep(interval) + try: + result = poll_registration(device_code) + except RegistrationError: + if retry_start == 0: + retry_start = time.monotonic() + if time.monotonic() - retry_start < retry_window: + continue + raise + + status = result["status"] + if status == "WAITING": + retry_start = 0 + if on_waiting: + on_waiting() + continue + if status == "SUCCESS": + cid = result["client_id"] + csecret = result["client_secret"] + if not cid or not csecret: + raise RegistrationError("authorization succeeded but credentials are missing") + return cid, csecret + # FAIL / EXPIRED / UNKNOWN + if retry_start == 0: + retry_start = time.monotonic() + if time.monotonic() - retry_start < retry_window: + continue + reason = result.get("fail_reason") or status + raise RegistrationError(f"authorization failed: {reason}") + + raise RegistrationError("authorization timed out, please retry") + + +# ── QR code rendering ───────────────────────────────────────────────────── + +def _ensure_qrcode_installed() -> bool: + """Try to import qrcode; if missing, auto-install it via pip/uv.""" + try: + import qrcode # noqa: F401 + return True + except ImportError: + pass + + import subprocess + + # Try uv first (Hermes convention), then pip + for cmd in ( + [sys.executable, "-m", "uv", "pip", "install", "qrcode"], + [sys.executable, "-m", "pip", "install", "-q", "qrcode"], + ): + try: + subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + import qrcode # noqa: F401,F811 + return True + except (subprocess.CalledProcessError, ImportError, FileNotFoundError): + continue + return False + + +def render_qr_to_terminal(url: str) -> bool: + """Render *url* as a compact QR code in the terminal. + + Returns True if the QR code was printed, False if the library is missing. + """ + try: + import qrcode + except ImportError: + return False + + qr = qrcode.QRCode( + version=1, + error_correction=qrcode.constants.ERROR_CORRECT_L, + box_size=1, + border=1, + ) + qr.add_data(url) + qr.make(fit=True) + + # Use half-block characters for compact rendering (2 rows per character) + matrix = qr.get_matrix() + rows = len(matrix) + lines: list[str] = [] + + TOP_HALF = "\u2580" # ▀ + BOTTOM_HALF = "\u2584" # ▄ + FULL_BLOCK = "\u2588" # █ + EMPTY = " " + + for r in range(0, rows, 2): + line_chars: list[str] = [] + for c in range(len(matrix[r])): + top = matrix[r][c] + bottom = matrix[r + 1][c] if r + 1 < rows else False + if top and bottom: + line_chars.append(FULL_BLOCK) + elif top: + line_chars.append(TOP_HALF) + elif bottom: + line_chars.append(BOTTOM_HALF) + else: + line_chars.append(EMPTY) + lines.append(" " + "".join(line_chars)) + + print("\n".join(lines)) + return True + + +# ── High-level entry point for the setup wizard ─────────────────────────── + +def dingtalk_qr_auth() -> Optional[Tuple[str, str]]: + """Run the interactive QR-code device-flow authorization. + + Returns (client_id, client_secret) on success, or None if the user + cancelled or the flow failed. + """ + from hermes_cli.setup import print_info, print_success, print_warning, print_error + + print() + print_info(" Initializing DingTalk device authorization...") + print_info(" Note: the scan page is branded 'OpenClaw' — DingTalk's") + print_info(" ecosystem onboarding bridge. Safe to use.") + + try: + reg = begin_registration() + except RegistrationError as exc: + print_error(f" Authorization init failed: {exc}") + return None + + url = reg["verification_uri_complete"] + + # Ensure qrcode library is available (auto-install if missing) + if not _ensure_qrcode_installed(): + print_warning(" qrcode library install failed, will show link only.") + + print() + print_info(" Please scan the QR code below with DingTalk to authorize:") + print() + + if not render_qr_to_terminal(url): + print_warning(f" QR code render failed, please open the link below to authorize:") + + print() + print_info(f" Or open this link manually: {url}") + print() + print_info(" Waiting for QR scan authorization... (timeout: 2 hours)") + + dot_count = 0 + + def _on_waiting(): + nonlocal dot_count + dot_count += 1 + if dot_count % 10 == 0: + sys.stdout.write(".") + sys.stdout.flush() + + try: + client_id, client_secret = wait_for_registration_success( + device_code=reg["device_code"], + interval=reg["interval"], + expires_in=reg["expires_in"], + on_waiting=_on_waiting, + ) + except RegistrationError as exc: + print() + print_error(f" Authorization failed: {exc}") + return None + + print() + print_success(" QR scan authorization successful!") + print_success(f" Client ID: {client_id}") + print_success(f" Client Secret: {client_secret[:8]}{'*' * (len(client_secret) - 8)}") + + return client_id, client_secret diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 892ff00219..8247d25913 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -8,6 +8,7 @@ import os import sys import subprocess import shutil +from pathlib import Path from hermes_cli.config import get_project_root, get_hermes_home, get_env_path from hermes_constants import display_hermes_home @@ -276,6 +277,86 @@ def run_doctor(args): config_path = HERMES_HOME / 'config.yaml' if config_path.exists(): check_ok(f"{_DHH}/config.yaml exists") + + # Validate model.provider and model.default values + try: + import yaml as _yaml + cfg = _yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + model_section = cfg.get("model") or {} + provider_raw = (model_section.get("provider") or "").strip() + provider = provider_raw.lower() + default_model = (model_section.get("default") or model_section.get("model") or "").strip() + + known_providers: set = set() + try: + from hermes_cli.auth import PROVIDER_REGISTRY + known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"} + except Exception: + pass + try: + from hermes_cli.auth import resolve_provider as _resolve_provider + except Exception: + _resolve_provider = None + + canonical_provider = provider + if provider and _resolve_provider is not None and provider != "auto": + try: + canonical_provider = _resolve_provider(provider) + except Exception: + canonical_provider = None + + if provider and provider != "auto": + if canonical_provider is None or (known_providers and canonical_provider not in known_providers): + known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)" + check_fail( + f"model.provider '{provider_raw}' is not a recognised provider", + f"(known: {known_list})", + ) + issues.append( + f"model.provider '{provider_raw}' is unknown. " + f"Valid providers: {known_list}. " + f"Fix: run 'hermes config set model.provider '" + ) + + # Warn if model is set to a provider-prefixed name on a provider that doesn't use them + if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"): + check_warn( + f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'", + "(vendor-prefixed slugs belong to aggregators like openrouter)", + ) + issues.append( + f"model.default '{default_model}' is vendor-prefixed but model.provider is '{provider_raw}'. " + "Either set model.provider to 'openrouter', or drop the vendor prefix." + ) + + # Check credentials for the configured provider. + # Limit to API-key providers in PROVIDER_REGISTRY — other provider + # types (OAuth, SDK, openrouter/anthropic/custom/auto) have their + # own env-var checks elsewhere in doctor, and get_auth_status() + # returns a bare {logged_in: False} for anything it doesn't + # explicitly dispatch, which would produce false positives. + if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"): + try: + from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status + pconfig = PROVIDER_REGISTRY.get(canonical_provider) + if pconfig and getattr(pconfig, "auth_type", "") == "api_key": + status = get_auth_status(canonical_provider) or {} + configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key")) + if not configured: + check_fail( + f"model.provider '{canonical_provider}' is set but no API key is configured", + "(check ~/.hermes/.env or run 'hermes setup')", + ) + issues.append( + f"No credentials found for provider '{canonical_provider}'. " + f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, " + f"or switch providers with 'hermes config set model.provider '" + ) + except Exception: + pass + + except Exception as e: + check_warn("Could not validate model/provider config", f"({e})") else: fallback_config = PROJECT_ROOT / 'cli-config.yaml' if fallback_config.exists(): @@ -372,7 +453,11 @@ def run_doctor(args): print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD)) try: - from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status + from hermes_cli.auth import ( + get_nous_auth_status, + get_codex_auth_status, + get_gemini_oauth_auth_status, + ) nous_status = get_nous_auth_status() if nous_status.get("logged_in"): @@ -387,6 +472,20 @@ def run_doctor(args): check_warn("OpenAI Codex auth", "(not logged in)") if codex_status.get("error"): check_info(codex_status["error"]) + + gemini_status = get_gemini_oauth_auth_status() + if gemini_status.get("logged_in"): + email = gemini_status.get("email") or "" + project = gemini_status.get("project_id") or "" + pieces = [] + if email: + pieces.append(email) + if project: + pieces.append(f"project={project}") + suffix = f" ({', '.join(pieces)})" if pieces else "" + check_ok("Google Gemini OAuth", f"(logged in{suffix})") + else: + check_warn("Google Gemini OAuth", "(not logged in)") except Exception as e: check_warn("Auth provider status", f"(could not check: {e})") @@ -513,7 +612,87 @@ def run_doctor(args): pass _check_gateway_service_linger(issues) - + + # ========================================================================= + # Check: Command installation (hermes bin symlink) + # ========================================================================= + if sys.platform != "win32": + print() + print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD)) + + # Determine the venv entry point location + _venv_bin = None + for _venv_name in ("venv", ".venv"): + _candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes" + if _candidate.exists(): + _venv_bin = _candidate + break + + # Determine the expected command link directory (mirrors install.sh logic) + _prefix = os.environ.get("PREFIX", "") + _is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix + if _is_termux_env and _prefix: + _cmd_link_dir = Path(_prefix) / "bin" + _cmd_link_display = "$PREFIX/bin" + else: + _cmd_link_dir = Path.home() / ".local" / "bin" + _cmd_link_display = "~/.local/bin" + _cmd_link = _cmd_link_dir / "hermes" + + if _venv_bin is None: + check_warn( + "Venv entry point not found", + "(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')" + ) + manual_issues.append( + f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'" + ) + else: + check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})") + + # Check the symlink at the command link location + if _cmd_link.is_symlink(): + _target = _cmd_link.resolve() + _expected = _venv_bin.resolve() + if _target == _expected: + check_ok(f"{_cmd_link_display}/hermes → correct target") + else: + check_warn( + f"{_cmd_link_display}/hermes points to wrong target", + f"(→ {_target}, expected → {_expected})" + ) + if should_fix: + _cmd_link.unlink() + _cmd_link.symlink_to(_venv_bin) + check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}") + fixed_count += 1 + else: + issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'") + elif _cmd_link.exists(): + # It's a regular file, not a symlink — possibly a wrapper script + check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)") + else: + check_fail( + f"{_cmd_link_display}/hermes not found", + "(hermes command may not work outside the venv)" + ) + if should_fix: + _cmd_link_dir.mkdir(parents=True, exist_ok=True) + _cmd_link.symlink_to(_venv_bin) + check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}") + fixed_count += 1 + + # Check if the link dir is on PATH + _path_dirs = os.environ.get("PATH", "").split(os.pathsep) + if str(_cmd_link_dir) not in _path_dirs: + check_warn( + f"{_cmd_link_display} is not on your PATH", + "(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")" + ) + manual_issues.append(f"Add {_cmd_link_display} to your PATH") + else: + issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'") + # ========================================================================= # Check: External tools # ========================================================================= @@ -679,6 +858,16 @@ def run_doctor(args): elif response.status_code == 401: print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ") issues.append("Check OPENROUTER_API_KEY in .env") + elif response.status_code == 402: + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}") + issues.append( + "OpenRouter account has insufficient credits. " + "Fix: run 'hermes config set model.provider ' to switch providers, " + "or fund your OpenRouter account at https://openrouter.ai/settings/credits" + ) + elif response.status_code == 429: + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)} ") + issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting") else: print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ") except Exception as e: @@ -726,6 +915,7 @@ def run_doctor(args): ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True), ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), + ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does. ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), @@ -733,7 +923,8 @@ def run_doctor(args): ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True), ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True), - ("OpenCode Go", ("OPENCODE_GO_API_KEY",), "https://opencode.ai/zen/go/v1/models", "OPENCODE_GO_BASE_URL", True), + # OpenCode Go has no shared /models endpoint; skip the health check. + ("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False), ] for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers: _key = "" @@ -778,6 +969,31 @@ def run_doctor(args): except Exception as _e: print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ") + # -- AWS Bedrock -- + # Bedrock uses the AWS SDK credential chain, not API keys. + try: + from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region + if has_aws_credentials(): + _auth_var = resolve_aws_auth_env_var() + _region = resolve_bedrock_region() + _label = "AWS Bedrock".ljust(20) + print(f" Checking AWS Bedrock...", end="", flush=True) + try: + import boto3 + _br_client = boto3.client("bedrock", region_name=_region) + _br_resp = _br_client.list_foundation_models() + _model_count = len(_br_resp.get("modelSummaries", [])) + print(f"\r {color('✓', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ") + except ImportError: + print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)} ") + issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3") + except Exception as _e: + _err_name = type(_e).__name__ + print(f"\r {color('⚠', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ") + issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels") + except ImportError: + pass # bedrock_adapter not available — skip silently + # ========================================================================= # Check: Submodules # ========================================================================= diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index a520790857..90364a261a 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -43,41 +43,20 @@ def _redact(value: str) -> str: def _gateway_status() -> str: """Return a short gateway status string.""" - if sys.platform.startswith("linux"): - from hermes_constants import is_container - if is_container(): - try: - from hermes_cli.gateway import find_gateway_pids - pids = find_gateway_pids() - if pids: - return f"running (docker, pid {pids[0]})" - return "stopped (docker)" - except Exception: - return "stopped (docker)" - try: - from hermes_cli.gateway import get_service_name - svc = get_service_name() - except Exception: - svc = "hermes-gateway" - try: - r = subprocess.run( - ["systemctl", "--user", "is-active", svc], - capture_output=True, text=True, timeout=5, - ) - return "running (systemd)" if r.stdout.strip() == "active" else "stopped" - except Exception: - return "unknown" - elif sys.platform == "darwin": - try: - from hermes_cli.gateway import get_launchd_label - r = subprocess.run( - ["launchctl", "list", get_launchd_label()], - capture_output=True, text=True, timeout=5, - ) - return "loaded (launchd)" if r.returncode == 0 else "not loaded" - except Exception: - return "unknown" - return "N/A" + try: + from hermes_cli.gateway import get_gateway_runtime_snapshot + + snapshot = get_gateway_runtime_snapshot() + if snapshot.running: + mode = snapshot.manager + if snapshot.has_process_service_mismatch: + mode = "manual" + return f"running ({mode}, pid {snapshot.gateway_pids[0]})" + if snapshot.service_installed and not snapshot.service_running: + return f"stopped ({snapshot.manager})" + return f"stopped ({snapshot.manager})" + except Exception: + return "unknown" if sys.platform.startswith(("linux", "darwin")) else "N/A" def _count_skills(hermes_home: Path) -> int: @@ -181,7 +160,6 @@ def _config_overrides(config: dict) -> dict[str, str]: ("display", "streaming"), ("display", "skin"), ("display", "show_reasoning"), - ("smart_model_routing", "enabled"), ("privacy", "redact_pii"), ("tts", "provider"), ] @@ -296,6 +274,7 @@ def run_dump(args): ("DEEPSEEK_API_KEY", "deepseek"), ("DASHSCOPE_API_KEY", "dashscope"), ("HF_TOKEN", "huggingface"), + ("NVIDIA_API_KEY", "nvidia"), ("AI_GATEWAY_API_KEY", "ai_gateway"), ("OPENCODE_ZEN_API_KEY", "opencode_zen"), ("OPENCODE_GO_API_KEY", "opencode_go"), diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py index 8d6a1449d9..853f0d2626 100644 --- a/hermes_cli/env_loader.py +++ b/hermes_cli/env_loader.py @@ -8,11 +8,40 @@ from pathlib import Path from dotenv import load_dotenv +# Env var name suffixes that indicate credential values. These are the +# only env vars whose values we sanitize on load — we must not silently +# alter arbitrary user env vars, but credentials are known to require +# pure ASCII (they become HTTP header values). +_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY") + + +def _sanitize_loaded_credentials() -> None: + """Strip non-ASCII characters from credential env vars in os.environ. + + Called after dotenv loads so the rest of the codebase never sees + non-ASCII API keys. Only touches env vars whose names end with + known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.). + """ + for key, value in list(os.environ.items()): + if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES): + continue + try: + value.encode("ascii") + except UnicodeEncodeError: + os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii") + + def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None: try: load_dotenv(dotenv_path=path, override=override, encoding="utf-8") except UnicodeDecodeError: load_dotenv(dotenv_path=path, override=override, encoding="latin-1") + # Strip non-ASCII characters from credential env vars that were just + # loaded. API keys must be pure ASCII since they're sent as HTTP + # header values (httpx encodes headers as ASCII). Non-ASCII chars + # typically come from copy-pasting keys from PDFs or rich-text editors + # that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v). + _sanitize_loaded_credentials() def _sanitize_env_file_if_needed(path: Path) -> None: diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index fe7bb9bd8e..bc809cadf9 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -10,6 +10,7 @@ import shutil import signal import subprocess import sys +from dataclasses import dataclass from pathlib import Path PROJECT_ROOT = Path(__file__).parent.parent.resolve() @@ -41,6 +42,23 @@ from hermes_cli.colors import Colors, color # Process Management (for manual gateway runs) # ============================================================================= + +@dataclass(frozen=True) +class GatewayRuntimeSnapshot: + manager: str + service_installed: bool = False + service_running: bool = False + gateway_pids: tuple[int, ...] = () + service_scope: str | None = None + + @property + def running(self) -> bool: + return self.service_running or bool(self.gateway_pids) + + @property + def has_process_service_mismatch(self) -> bool: + return self.service_installed and self.running and not self.service_running + def _get_service_pids() -> set: """Return PIDs currently managed by systemd or launchd gateway services. @@ -157,20 +175,22 @@ def _request_gateway_self_restart(pid: int) -> bool: return True -def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = False) -> list: - """Find PIDs of running gateway processes. +def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None: + if pid is None or pid <= 0: + return + if pid == os.getpid() or pid in exclude_pids or pid in pids: + return + pids.append(pid) - Args: - exclude_pids: PIDs to exclude from the result (e.g. service-managed - PIDs that should not be killed during a stale-process sweep). - all_profiles: When ``True``, return gateway PIDs across **all** - profiles (the pre-7923 global behaviour). ``hermes update`` - needs this because a code update affects every profile. - When ``False`` (default), only PIDs belonging to the current - Hermes profile are returned. + +def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> list[int]: + """Best-effort process-table scan for gateway PIDs. + + This supplements the profile-scoped PID file so status views can still spot + a live gateway when the PID file is stale/missing, and ``--all`` sweeps can + discover gateways outside the current profile. """ - _exclude = exclude_pids or set() - pids = [pid for pid in _get_service_pids() if pid not in _exclude] + pids: list[int] = [] patterns = [ "hermes_cli.main gateway", "hermes_cli.main --profile", @@ -203,33 +223,39 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals if is_windows(): result = subprocess.run( ["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"], - capture_output=True, text=True, timeout=10 + capture_output=True, + text=True, + timeout=10, ) + if result.returncode != 0: + return [] current_cmd = "" - for line in result.stdout.split('\n'): + for line in result.stdout.split("\n"): line = line.strip() if line.startswith("CommandLine="): current_cmd = line[len("CommandLine="):] elif line.startswith("ProcessId="): pid_str = line[len("ProcessId="):] - if any(p in current_cmd for p in patterns) and (all_profiles or _matches_current_profile(current_cmd)): + if any(p in current_cmd for p in patterns) and ( + all_profiles or _matches_current_profile(current_cmd) + ): try: - pid = int(pid_str) - if pid != os.getpid() and pid not in pids and pid not in _exclude: - pids.append(pid) + _append_unique_pid(pids, int(pid_str), exclude_pids) except ValueError: pass current_cmd = "" else: result = subprocess.run( - ["ps", "eww", "-ax", "-o", "pid=,command="], + ["ps", "-A", "eww", "-o", "pid=,command="], capture_output=True, text=True, timeout=10, ) - for line in result.stdout.split('\n'): + if result.returncode != 0: + return [] + for line in result.stdout.split("\n"): stripped = line.strip() - if not stripped or 'grep' in stripped: + if not stripped or "grep" in stripped: continue pid = None @@ -251,16 +277,137 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals if pid is None: continue - if pid == os.getpid() or pid in pids or pid in _exclude: - continue - if any(pattern in command for pattern in patterns) and (all_profiles or _matches_current_profile(command)): - pids.append(pid) + if any(pattern in command for pattern in patterns) and ( + all_profiles or _matches_current_profile(command) + ): + _append_unique_pid(pids, pid, exclude_pids) except (OSError, subprocess.TimeoutExpired): - pass + return [] return pids +def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = False) -> list: + """Find PIDs of running gateway processes. + + Args: + exclude_pids: PIDs to exclude from the result (e.g. service-managed + PIDs that should not be killed during a stale-process sweep). + all_profiles: When ``True``, return gateway PIDs across **all** + profiles (the pre-7923 global behaviour). ``hermes update`` + needs this because a code update affects every profile. + When ``False`` (default), only PIDs belonging to the current + Hermes profile are returned. + """ + _exclude = set(exclude_pids or set()) + pids: list[int] = [] + if not all_profiles: + try: + from gateway.status import get_running_pid + + _append_unique_pid(pids, get_running_pid(), _exclude) + except Exception: + pass + for pid in _get_service_pids(): + _append_unique_pid(pids, pid, _exclude) + for pid in _scan_gateway_pids(_exclude, all_profiles=all_profiles): + _append_unique_pid(pids, pid, _exclude) + return pids + + +def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]: + selected_system = _select_systemd_scope(system) + unit_exists = get_systemd_unit_path(system=selected_system).exists() + if not unit_exists: + return selected_system, False + try: + result = _run_systemctl( + ["is-active", get_service_name()], + system=selected_system, + capture_output=True, + text=True, + timeout=10, + ) + except (RuntimeError, subprocess.TimeoutExpired): + return selected_system, False + return selected_system, result.stdout.strip() == "active" + + +def _probe_launchd_service_running() -> bool: + if not get_launchd_plist_path().exists(): + return False + try: + result = subprocess.run( + ["launchctl", "list", get_launchd_label()], + capture_output=True, + text=True, + timeout=10, + ) + except subprocess.TimeoutExpired: + return False + return result.returncode == 0 + + +def get_gateway_runtime_snapshot(system: bool = False) -> GatewayRuntimeSnapshot: + """Return a unified view of gateway liveness for the current profile.""" + gateway_pids = tuple(find_gateway_pids()) + if is_termux(): + return GatewayRuntimeSnapshot( + manager="Termux / manual process", + gateway_pids=gateway_pids, + ) + + from hermes_constants import is_container + + if is_linux() and is_container(): + return GatewayRuntimeSnapshot( + manager="docker (foreground)", + gateway_pids=gateway_pids, + ) + + if supports_systemd_services(): + selected_system, service_running = _probe_systemd_service_running(system=system) + scope_label = _service_scope_label(selected_system) + return GatewayRuntimeSnapshot( + manager=f"systemd ({scope_label})", + service_installed=get_systemd_unit_path(system=selected_system).exists(), + service_running=service_running, + gateway_pids=gateway_pids, + service_scope=scope_label, + ) + + if is_macos(): + return GatewayRuntimeSnapshot( + manager="launchd", + service_installed=get_launchd_plist_path().exists(), + service_running=_probe_launchd_service_running(), + gateway_pids=gateway_pids, + service_scope="launchd", + ) + + return GatewayRuntimeSnapshot( + manager="manual process", + gateway_pids=gateway_pids, + ) + + +def _format_gateway_pids(pids: tuple[int, ...] | list[int], *, limit: int | None = 3) -> str: + rendered = [str(pid) for pid in pids[:limit] if pid > 0] if limit is not None else [str(pid) for pid in pids if pid > 0] + if limit is not None and len(pids) > limit: + rendered.append("...") + return ", ".join(rendered) + + +def _print_gateway_process_mismatch(snapshot: GatewayRuntimeSnapshot) -> None: + if not snapshot.has_process_service_mismatch: + return + print() + print("⚠ Gateway process is running for this profile, but the service is not active") + print(f" PID(s): {_format_gateway_pids(snapshot.gateway_pids, limit=None)}") + print(" This is usually a manual foreground/tmux/nohup run, so `hermes gateway`") + print(" can refuse to start another copy until this process stops.") + + def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None, all_profiles: bool = False) -> int: """Kill any running gateway processes. Returns count killed. @@ -340,25 +487,44 @@ def _wsl_systemd_operational() -> bool: WSL2 with ``systemd=true`` in wsl.conf has working systemd. WSL2 without it (or WSL1) does not — systemctl commands fail. """ + return _systemd_operational(system=True) + + +def _systemd_operational(system: bool = False) -> bool: + """Return True when the requested systemd scope is usable.""" try: - result = subprocess.run( - ["systemctl", "is-system-running"], - capture_output=True, text=True, timeout=5, + result = _run_systemctl( + ["is-system-running"], + system=system, + capture_output=True, + text=True, + timeout=5, ) # "running", "degraded", "starting" all mean systemd is PID 1 status = result.stdout.strip().lower() return status in ("running", "degraded", "starting", "initializing") - except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + except (RuntimeError, subprocess.TimeoutExpired, OSError): return False +def _container_systemd_operational() -> bool: + """Return True when a container exposes working user or system systemd.""" + if _systemd_operational(system=False): + return True + if _systemd_operational(system=True): + return True + return False + + def supports_systemd_services() -> bool: - if not is_linux() or is_termux() or is_container(): + if not is_linux() or is_termux(): return False if shutil.which("systemctl") is None: return False if is_wsl(): return _wsl_systemd_operational() + if is_container(): + return _container_systemd_operational() return True @@ -521,6 +687,195 @@ def has_conflicting_systemd_units() -> bool: return len(get_installed_systemd_scopes()) > 1 +# Legacy service names from older Hermes installs that predate the +# hermes-gateway rename. Kept as an explicit allowlist (NOT a glob) so +# profile units (hermes-gateway-*.service) and unrelated third-party +# "hermes" units are never matched. +_LEGACY_SERVICE_NAMES: tuple[str, ...] = ("hermes.service",) + +# ExecStart content markers that identify a unit as running our gateway. +# A legacy unit is only flagged when its file contains one of these. +_LEGACY_UNIT_EXECSTART_MARKERS: tuple[str, ...] = ( + "hermes_cli.main gateway", + "hermes_cli/main.py gateway", + "gateway/run.py", + " hermes gateway ", + "/hermes gateway ", +) + + +def _legacy_unit_search_paths() -> list[tuple[bool, Path]]: + """Return ``[(is_system, base_dir), ...]`` — directories to scan for legacy units. + + Factored out so tests can monkeypatch the search roots without touching + real filesystem paths. + """ + return [ + (False, Path.home() / ".config" / "systemd" / "user"), + (True, Path("/etc/systemd/system")), + ] + + +def _find_legacy_hermes_units() -> list[tuple[str, Path, bool]]: + """Return ``[(unit_name, unit_path, is_system)]`` for legacy Hermes gateway units. + + Detects unit files installed by older Hermes versions that used a + different service name (e.g. ``hermes.service`` before the rename to + ``hermes-gateway.service``). When both a legacy unit and the current + ``hermes-gateway.service`` are active, they fight over the same bot + token — the PR #5646 signal-recovery change turns this into a 30-second + SIGTERM flap loop. + + Safety guards: + + * Explicit allowlist of legacy names (no globbing). Profile units such + as ``hermes-gateway-coder.service`` and unrelated third-party + ``hermes-*`` services are never matched. + * ExecStart content check — only flag units that invoke our gateway + entrypoint. A user-created ``hermes.service`` running an unrelated + binary is left untouched. + * Results are returned purely for caller inspection; this function + never mutates or removes anything. + """ + results: list[tuple[str, Path, bool]] = [] + for is_system, base in _legacy_unit_search_paths(): + for name in _LEGACY_SERVICE_NAMES: + unit_path = base / name + try: + if not unit_path.exists(): + continue + text = unit_path.read_text(encoding="utf-8", errors="ignore") + except (OSError, PermissionError): + continue + if not any(marker in text for marker in _LEGACY_UNIT_EXECSTART_MARKERS): + # Not our gateway — leave alone + continue + results.append((name, unit_path, is_system)) + return results + + +def has_legacy_hermes_units() -> bool: + """Return True when any legacy Hermes gateway unit files exist.""" + return bool(_find_legacy_hermes_units()) + + +def print_legacy_unit_warning() -> None: + """Warn about legacy Hermes gateway unit files if any are installed. + + Idempotent: prints nothing when no legacy units are detected. Safe to + call from any status/install/setup path. + """ + legacy = _find_legacy_hermes_units() + if not legacy: + return + print_warning("Legacy Hermes gateway unit(s) detected from an older install:") + for name, path, is_system in legacy: + scope = "system" if is_system else "user" + print_info(f" {path} ({scope} scope)") + print_info(" These run alongside the current hermes-gateway service and") + print_info(" cause SIGTERM flap loops — both try to use the same bot token.") + print_info(" Remove them with:") + print_info(" hermes gateway migrate-legacy") + + +def remove_legacy_hermes_units( + interactive: bool = True, + dry_run: bool = False, +) -> tuple[int, list[Path]]: + """Stop, disable, and remove legacy Hermes gateway unit files. + + Iterates over whatever ``_find_legacy_hermes_units()`` returns — which is + an explicit allowlist of legacy names (not a glob). Profile units and + unrelated third-party services are never touched. + + Args: + interactive: When True, prompt before removing. When False, remove + without asking (used when another prompt has already confirmed, + e.g. from the install flow). + dry_run: When True, list what would be removed and return. + + Returns: + ``(removed_count, remaining_paths)`` — remaining includes units we + couldn't remove (typically system-scope when not running as root). + """ + legacy = _find_legacy_hermes_units() + if not legacy: + print("No legacy Hermes gateway units found.") + return 0, [] + + user_units = [(n, p) for n, p, is_sys in legacy if not is_sys] + system_units = [(n, p) for n, p, is_sys in legacy if is_sys] + + print() + print("Legacy Hermes gateway unit(s) found:") + for name, path, is_system in legacy: + scope = "system" if is_system else "user" + print(f" {path} ({scope} scope)") + print() + + if dry_run: + print("(dry-run — nothing removed)") + return 0, [p for _, p, _ in legacy] + + if interactive and not prompt_yes_no("Remove these legacy units?", True): + print("Skipped. Run again with: hermes gateway migrate-legacy") + return 0, [p for _, p, _ in legacy] + + removed = 0 + remaining: list[Path] = [] + + # User-scope removal + for name, path in user_units: + try: + _run_systemctl(["stop", name], system=False, check=False, timeout=90) + _run_systemctl(["disable", name], system=False, check=False, timeout=30) + path.unlink(missing_ok=True) + print(f" ✓ Removed {path}") + removed += 1 + except (OSError, RuntimeError) as e: + print(f" ⚠ Could not remove {path}: {e}") + remaining.append(path) + + if user_units: + try: + _run_systemctl(["daemon-reload"], system=False, check=False, timeout=30) + except RuntimeError: + pass + + # System-scope removal (needs root) + if system_units: + if os.geteuid() != 0: + print() + print_warning("System-scope legacy units require root to remove.") + print_info(" Re-run with: sudo hermes gateway migrate-legacy") + for _, path in system_units: + remaining.append(path) + else: + for name, path in system_units: + try: + _run_systemctl(["stop", name], system=True, check=False, timeout=90) + _run_systemctl(["disable", name], system=True, check=False, timeout=30) + path.unlink(missing_ok=True) + print(f" ✓ Removed {path}") + removed += 1 + except (OSError, RuntimeError) as e: + print(f" ⚠ Could not remove {path}: {e}") + remaining.append(path) + + try: + _run_systemctl(["daemon-reload"], system=True, check=False, timeout=30) + except RuntimeError: + pass + + print() + if remaining: + print_warning(f"{len(remaining)} legacy unit(s) still present — see messages above.") + else: + print_success(f"Removed {removed} legacy unit(s).") + + return removed, remaining + + def print_systemd_scope_conflict_warning() -> None: scopes = get_installed_systemd_scopes() if len(scopes) < 2: @@ -715,7 +1070,9 @@ def _detect_venv_dir() -> Path | None: """Detect the active virtualenv directory. Checks ``sys.prefix`` first (works regardless of the directory name), - then falls back to probing common directory names under PROJECT_ROOT. + then ``VIRTUAL_ENV`` env var (covers uv-managed environments where + sys.prefix == sys.base_prefix), then falls back to probing common + directory names under PROJECT_ROOT. Returns ``None`` when no virtualenv can be found. """ # If we're running inside a virtualenv, sys.prefix points to it. @@ -724,6 +1081,15 @@ def _detect_venv_dir() -> Path | None: if venv.is_dir(): return venv + # uv and some other tools set VIRTUAL_ENV without changing sys.prefix. + # This catches `uv run` where sys.prefix == sys.base_prefix but the + # environment IS a venv. (#8620) + _virtual_env = os.environ.get("VIRTUAL_ENV") + if _virtual_env: + venv = Path(_virtual_env) + if venv.is_dir(): + return venv + # Fallback: check common virtualenv directory names under the project root. for candidate in (".venv", "venv"): venv = PROJECT_ROOT / candidate @@ -1043,6 +1409,19 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str if system: _require_root_for_system_service("install") + # Offer to remove legacy units (hermes.service from pre-rename installs) + # before installing the new hermes-gateway.service. If both remain, they + # flap-fight for the Telegram bot token on every gateway startup. + # Only removes units matching _LEGACY_SERVICE_NAMES + our ExecStart + # signature — profile units are never touched. + if has_legacy_hermes_units(): + print() + print_legacy_unit_warning() + print() + if prompt_yes_no("Remove the legacy unit(s) before installing?", True): + remove_legacy_hermes_units(interactive=False) + print() + unit_path = get_systemd_unit_path(system=system) scope_flag = " --system" if system else "" @@ -1081,6 +1460,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str _ensure_linger_enabled() print_systemd_scope_conflict_warning() + print_legacy_unit_warning() def systemd_uninstall(system: bool = False): @@ -1128,7 +1508,62 @@ def systemd_restart(system: bool = False): pid = get_running_pid() if pid is not None and _request_gateway_self_restart(pid): - print(f"✓ {_service_scope_label(system).capitalize()} service restart requested") + # SIGUSR1 sent — the gateway will drain active agents, exit with + # code 75, and systemd will restart it after RestartSec (30s). + # Wait for the old process to die and the new one to become active + # so the CLI doesn't return while the service is still restarting. + import time + scope_label = _service_scope_label(system).capitalize() + svc = get_service_name() + scope_cmd = _systemctl_cmd(system) + + # Phase 1: wait for old process to exit (drain + shutdown) + print(f"⏳ {scope_label} service draining active work...") + deadline = time.time() + 90 + while time.time() < deadline: + try: + os.kill(pid, 0) + time.sleep(1) + except (ProcessLookupError, PermissionError): + break # old process is gone + else: + print(f"⚠ Old process (PID {pid}) still alive after 90s") + + # Phase 2: wait for systemd to start the new process + print(f"⏳ Waiting for {svc} to restart...") + deadline = time.time() + 60 + while time.time() < deadline: + try: + result = subprocess.run( + scope_cmd + ["is-active", svc], + capture_output=True, text=True, timeout=5, + ) + if result.stdout.strip() == "active": + # Verify it's a NEW process, not the old one somehow + new_pid = get_running_pid() + if new_pid and new_pid != pid: + print(f"✓ {scope_label} service restarted (PID {new_pid})") + return + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + time.sleep(2) + + # Timed out — check final state + try: + result = subprocess.run( + scope_cmd + ["is-active", svc], + capture_output=True, text=True, timeout=5, + ) + if result.stdout.strip() == "active": + print(f"✓ {scope_label} service restarted") + return + except Exception: + pass + print( + f"⚠ {scope_label} service did not become active within 60s.\n" + f" Check status: {'sudo ' if system else ''}hermes gateway status\n" + f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'" + ) return _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90) print(f"✓ {_service_scope_label(system).capitalize()} service restarted") @@ -1149,6 +1584,10 @@ def systemd_status(deep: bool = False, system: bool = False): print_systemd_scope_conflict_warning() print() + if has_legacy_hermes_units(): + print_legacy_unit_warning() + print() + if not systemd_unit_is_current(system=system): print("⚠ Installed gateway service definition is outdated") print(f" Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag} # auto-refreshes the unit") @@ -1932,7 +2371,7 @@ _PLATFORMS = [ {"name": "QQ_ALLOWED_USERS", "prompt": "Allowed user OpenIDs (comma-separated, leave empty for open access)", "password": False, "is_allowlist": True, "help": "Optional — restrict DM access to specific user OpenIDs."}, - {"name": "QQ_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False, + {"name": "QQBOT_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False, "help": "OpenID to deliver cron results and notifications to."}, ], }, @@ -2145,9 +2584,62 @@ def _setup_sms(): def _setup_dingtalk(): - """Configure DingTalk via the standard platform setup.""" + """Configure DingTalk — QR scan (recommended) or manual credential entry.""" + from hermes_cli.setup import ( + prompt_choice, prompt_yes_no, print_info, print_success, print_warning, + ) + dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk") - _setup_standard_platform(dingtalk_platform) + emoji = dingtalk_platform["emoji"] + label = dingtalk_platform["label"] + + print() + print(color(f" ─── {emoji} {label} Setup ───", Colors.CYAN)) + + existing = get_env_value("DINGTALK_CLIENT_ID") + if existing: + print() + print_success(f"{label} is already configured (Client ID: {existing}).") + if not prompt_yes_no(f" Reconfigure {label}?", False): + return + + print() + method = prompt_choice( + " Choose setup method", + [ + "QR Code Scan (Recommended, auto-obtain Client ID and Client Secret)", + "Manual Input (Client ID and Client Secret)", + ], + default=0, + ) + + if method == 0: + # ── QR-code device-flow authorization ── + try: + from hermes_cli.dingtalk_auth import dingtalk_qr_auth + except ImportError as exc: + print_warning(f" QR auth module failed to load ({exc}), falling back to manual input.") + _setup_standard_platform(dingtalk_platform) + return + + result = dingtalk_qr_auth() + if result is None: + print_warning(" QR auth incomplete, falling back to manual input.") + _setup_standard_platform(dingtalk_platform) + return + + client_id, client_secret = result + save_env_value("DINGTALK_CLIENT_ID", client_id) + save_env_value("DINGTALK_CLIENT_SECRET", client_secret) + save_env_value("DINGTALK_ALLOW_ALL_USERS", "true") + print() + print_success(f"{emoji} {label} configured via QR scan!") + else: + # ── Manual entry ── + _setup_standard_platform(dingtalk_platform) + # Also enable allow-all by default for convenience + if get_env_value("DINGTALK_CLIENT_ID"): + save_env_value("DINGTALK_ALLOW_ALL_USERS", "true") def _setup_wecom(): @@ -2506,6 +2998,215 @@ def _setup_feishu(): print_info(f" Bot: {bot_name}") +def _setup_qqbot(): + """Interactive setup for QQ Bot — scan-to-configure or manual credentials.""" + print() + print(color(" ─── 🐧 QQ Bot Setup ───", Colors.CYAN)) + + existing_app_id = get_env_value("QQ_APP_ID") + existing_secret = get_env_value("QQ_CLIENT_SECRET") + if existing_app_id and existing_secret: + print() + print_success("QQ Bot is already configured.") + if not prompt_yes_no(" Reconfigure QQ Bot?", False): + return + + # ── Choose setup method ── + print() + method_choices = [ + "Scan QR code to add bot automatically (recommended)", + "Enter existing App ID and App Secret manually", + ] + method_idx = prompt_choice(" How would you like to set up QQ Bot?", method_choices, 0) + + credentials = None + used_qr = False + + if method_idx == 0: + # ── QR scan-to-configure ── + try: + credentials = _qqbot_qr_flow() + except KeyboardInterrupt: + print() + print_warning(" QQ Bot setup cancelled.") + return + if credentials: + used_qr = True + if not credentials: + print_info(" QR setup did not complete. Continuing with manual input.") + + # ── Manual credential input ── + if not credentials: + print() + print_info(" Go to https://q.qq.com to register a QQ Bot application.") + print_info(" Note your App ID and App Secret from the application page.") + print() + app_id = prompt(" App ID", password=False) + if not app_id: + print_warning(" Skipped — QQ Bot won't work without an App ID.") + return + app_secret = prompt(" App Secret", password=True) + if not app_secret: + print_warning(" Skipped — QQ Bot won't work without an App Secret.") + return + credentials = {"app_id": app_id.strip(), "client_secret": app_secret.strip(), "user_openid": ""} + + # ── Save core credentials ── + save_env_value("QQ_APP_ID", credentials["app_id"]) + save_env_value("QQ_CLIENT_SECRET", credentials["client_secret"]) + + user_openid = credentials.get("user_openid", "") + + # ── DM security policy ── + print() + access_choices = [ + "Use DM pairing approval (recommended)", + "Allow all direct messages", + "Only allow listed user OpenIDs", + ] + access_idx = prompt_choice(" How should direct messages be authorized?", access_choices, 0) + if access_idx == 0: + save_env_value("QQ_ALLOW_ALL_USERS", "false") + if user_openid: + print() + if prompt_yes_no(f" Add yourself ({user_openid}) to the allow list?", True): + save_env_value("QQ_ALLOWED_USERS", user_openid) + print_success(f" Allow list set to {user_openid}") + else: + save_env_value("QQ_ALLOWED_USERS", "") + else: + save_env_value("QQ_ALLOWED_USERS", "") + print_success(" DM pairing enabled.") + print_info(" Unknown users can request access; approve with `hermes pairing approve`.") + elif access_idx == 1: + save_env_value("QQ_ALLOW_ALL_USERS", "true") + save_env_value("QQ_ALLOWED_USERS", "") + print_warning(" Open DM access enabled for QQ Bot.") + else: + default_allow = user_openid or "" + allowlist = prompt(" Allowed user OpenIDs (comma-separated)", default_allow, password=False).replace(" ", "") + save_env_value("QQ_ALLOW_ALL_USERS", "false") + save_env_value("QQ_ALLOWED_USERS", allowlist) + print_success(" Allowlist saved.") + + # ── Home channel ── + if user_openid: + print() + if prompt_yes_no(f" Use your QQ user ID ({user_openid}) as the home channel?", True): + save_env_value("QQBOT_HOME_CHANNEL", user_openid) + print_success(f" Home channel set to {user_openid}") + else: + print() + home_channel = prompt(" Home channel OpenID (for cron/notifications, or empty)", password=False) + if home_channel: + save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip()) + print_success(f" Home channel set to {home_channel.strip()}") + + print() + print_success("🐧 QQ Bot configured!") + print_info(f" App ID: {credentials['app_id']}") + + +def _qqbot_render_qr(url: str) -> bool: + """Try to render a QR code in the terminal. Returns True if successful.""" + try: + import qrcode as _qr + qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L) + qr.add_data(url) + qr.make(fit=True) + qr.print_ascii(invert=True) + return True + except Exception: + return False + + +def _qqbot_qr_flow(): + """Run the QR-code scan-to-configure flow. + + Returns a dict with app_id, client_secret, user_openid on success, + or None on failure/cancel. + """ + try: + from gateway.platforms.qqbot import ( + create_bind_task, poll_bind_result, build_connect_url, + decrypt_secret, BindStatus, + ) + from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL + except Exception as exc: + print_error(f" QQBot onboard import failed: {exc}") + return None + + import asyncio + import time + + MAX_REFRESHES = 3 + refresh_count = 0 + + while refresh_count <= MAX_REFRESHES: + loop = asyncio.new_event_loop() + + # ── Create bind task ── + try: + task_id, aes_key = loop.run_until_complete(create_bind_task()) + except Exception as e: + print_warning(f" Failed to create bind task: {e}") + loop.close() + return None + + url = build_connect_url(task_id) + + # ── Display QR code + URL ── + print() + if _qqbot_render_qr(url): + print(f" Scan the QR code above, or open this URL directly:\n {url}") + else: + print(f" Open this URL in QQ on your phone:\n {url}") + print_info(" Tip: pip install qrcode to show a scannable QR code here") + + # ── Poll loop (silent — keep QR visible at bottom) ── + try: + while True: + try: + status, app_id, encrypted_secret, user_openid = loop.run_until_complete( + poll_bind_result(task_id) + ) + except Exception: + time.sleep(ONBOARD_POLL_INTERVAL) + continue + + if status == BindStatus.COMPLETED: + client_secret = decrypt_secret(encrypted_secret, aes_key) + print() + print_success(f" QR scan complete! (App ID: {app_id})") + if user_openid: + print_info(f" Scanner's OpenID: {user_openid}") + return { + "app_id": app_id, + "client_secret": client_secret, + "user_openid": user_openid, + } + + if status == BindStatus.EXPIRED: + refresh_count += 1 + if refresh_count > MAX_REFRESHES: + print() + print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.") + return None + print() + print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})") + loop.close() + break # outer while creates a new task + + time.sleep(ONBOARD_POLL_INTERVAL) + except KeyboardInterrupt: + loop.close() + raise + finally: + loop.close() + + return None + + def _setup_signal(): """Interactive setup for Signal messenger.""" import shutil @@ -2643,6 +3344,10 @@ def gateway_setup(): print_systemd_scope_conflict_warning() print() + if supports_systemd_services() and has_legacy_hermes_units(): + print_legacy_unit_warning() + print() + if service_installed and service_running: print_success("Gateway service is installed and running.") elif service_installed: @@ -2683,8 +3388,12 @@ def gateway_setup(): _setup_signal() elif platform["key"] == "weixin": _setup_weixin() + elif platform["key"] == "dingtalk": + _setup_dingtalk() elif platform["key"] == "feishu": _setup_feishu() + elif platform["key"] == "qqbot": + _setup_qqbot() else: _setup_standard_platform(platform) @@ -2864,6 +3573,15 @@ def gateway_command(args): elif subcmd == "start": system = getattr(args, 'system', False) + start_all = getattr(args, 'all', False) + + if start_all: + # Kill all stale gateway processes across all profiles before starting + killed = kill_gateway_processes(all_profiles=True) + if killed: + print(f"✓ Killed {killed} stale gateway process(es) across all profiles") + _wait_for_gateway_exit(timeout=10.0, force_after=5.0) + if is_termux(): print("Gateway service start is not supported on Termux because there is no system service manager.") print("Run manually: hermes gateway") @@ -2949,7 +3667,39 @@ def gateway_command(args): # Try service first, fall back to killing and restarting service_available = False system = getattr(args, 'system', False) + restart_all = getattr(args, 'all', False) service_configured = False + + if restart_all: + # --all: stop every gateway process across all profiles, then start fresh + service_stopped = False + if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): + try: + systemd_stop(system=system) + service_stopped = True + except subprocess.CalledProcessError: + pass + elif is_macos() and get_launchd_plist_path().exists(): + try: + launchd_stop() + service_stopped = True + except subprocess.CalledProcessError: + pass + killed = kill_gateway_processes(all_profiles=True) + total = killed + (1 if service_stopped else 0) + if total: + print(f"✓ Stopped {total} gateway process(es) across all profiles") + _wait_for_gateway_exit(timeout=10.0, force_after=5.0) + + # Start the current profile's service fresh + print("Starting gateway...") + if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): + systemd_start(system=system) + elif is_macos() and get_launchd_plist_path().exists(): + launchd_start() + else: + run_gateway(verbose=0) + return if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): service_configured = True @@ -3003,15 +3753,18 @@ def gateway_command(args): elif subcmd == "status": deep = getattr(args, 'deep', False) system = getattr(args, 'system', False) + snapshot = get_gateway_runtime_snapshot(system=system) # Check for service first if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()): systemd_status(deep, system=system) + _print_gateway_process_mismatch(snapshot) elif is_macos() and get_launchd_plist_path().exists(): launchd_status(deep) + _print_gateway_process_mismatch(snapshot) else: # Check for manually running processes - pids = find_gateway_pids() + pids = list(snapshot.gateway_pids) if pids: print(f"✓ Gateway is running (PID: {', '.join(map(str, pids))})") print(" (Running manually, not as a system service)") @@ -3052,3 +3805,14 @@ def gateway_command(args): else: print(" hermes gateway install # Install as user service") print(" sudo hermes gateway install --system # Install as boot-time system service") + + elif subcmd == "migrate-legacy": + # Stop, disable, and remove legacy Hermes gateway unit files from + # pre-rename installs (e.g. hermes.service). Profile units and + # unrelated third-party services are never touched. + dry_run = getattr(args, 'dry_run', False) + yes = getattr(args, 'yes', False) + if not supports_systemd_services() and not is_macos(): + print("Legacy unit migration only applies to systemd-based Linux hosts.") + return + remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index c73344be4e..61b1d38a61 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -45,11 +45,13 @@ Usage: import argparse import os +import shutil import subprocess import sys from pathlib import Path from typing import Optional + def _require_tty(command_name: str) -> None: """Exit with a clear error if stdin is not a terminal. @@ -71,6 +73,7 @@ def _require_tty(command_name: str) -> None: PROJECT_ROOT = Path(__file__).parent.parent.resolve() sys.path.insert(0, str(PROJECT_ROOT)) + # --------------------------------------------------------------------------- # Profile override — MUST happen before any hermes module import. # @@ -101,6 +104,7 @@ def _apply_profile_override() -> None: if profile_name is None: try: from hermes_constants import get_default_hermes_root + active_path = get_default_hermes_root() / "active_profile" if active_path.exists(): name = active_path.read_text().strip() @@ -114,13 +118,17 @@ def _apply_profile_override() -> None: if profile_name is not None: try: from hermes_cli.profiles import resolve_profile_env + hermes_home = resolve_profile_env(profile_name) except (ValueError, FileNotFoundError) as exc: print(f"Error: {exc}", file=sys.stderr) sys.exit(1) except Exception as exc: # A bug in profiles.py must NEVER prevent hermes from starting - print(f"Warning: profile override failed ({exc}), using default", file=sys.stderr) + print( + f"Warning: profile override failed ({exc}), using default", + file=sys.stderr, + ) return os.environ["HERMES_HOME"] = hermes_home # Strip the flag from argv so argparse doesn't choke @@ -128,25 +136,28 @@ def _apply_profile_override() -> None: for i, arg in enumerate(argv): if arg in ("--profile", "-p"): start = i + 1 # +1 because argv is sys.argv[1:] - sys.argv = sys.argv[:start] + sys.argv[start + consume:] + sys.argv = sys.argv[:start] + sys.argv[start + consume :] break elif arg.startswith("--profile="): start = i + 1 - sys.argv = sys.argv[:start] + sys.argv[start + 1:] + sys.argv = sys.argv[:start] + sys.argv[start + 1 :] break + _apply_profile_override() # Load .env from ~/.hermes/.env first, then project root as dev fallback. # User-managed env files should override stale shell exports on restart. from hermes_cli.config import get_hermes_home from hermes_cli.env_loader import load_hermes_dotenv -load_hermes_dotenv(project_env=PROJECT_ROOT / '.env') + +load_hermes_dotenv(project_env=PROJECT_ROOT / ".env") # Initialize centralized file logging early — all `hermes` subcommands # (chat, setup, gateway, config, etc.) write to agent.log + errors.log. try: from hermes_logging import setup_logging as _setup_logging + _setup_logging(mode="cli") except Exception: pass # best-effort — don't crash the CLI if logging setup fails @@ -155,6 +166,7 @@ except Exception: try: from hermes_cli.config import load_config as _load_config_early from hermes_constants import apply_ipv4_preference as _apply_ipv4 + _early_cfg = _load_config_early() _net = _early_cfg.get("network", {}) if isinstance(_net, dict) and _net.get("force_ipv4"): @@ -201,6 +213,7 @@ def _has_any_provider_configured() -> bool: # tool credentials (Claude Code, Codex CLI) that shouldn't silently skip # the setup wizard on a fresh install. from hermes_cli.config import DEFAULT_CONFIG + _DEFAULT_MODEL = DEFAULT_CONFIG.get("model", "") cfg = load_config() model_cfg = cfg.get("model") @@ -218,7 +231,13 @@ def _has_any_provider_configured() -> bool: from hermes_cli.auth import PROVIDER_REGISTRY # Collect all provider env vars - provider_env_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"} + provider_env_vars = { + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", + "OPENAI_BASE_URL", + } for pconfig in PROVIDER_REGISTRY.values(): if pconfig.auth_type == "api_key": provider_env_vars.update(pconfig.api_key_env_vars) @@ -256,6 +275,7 @@ def _has_any_provider_configured() -> bool: if auth_file.exists(): try: import json + auth = json.loads(auth_file.read_text()) active = auth.get("active_provider") if active: @@ -265,7 +285,6 @@ def _has_any_provider_configured() -> bool: except Exception: pass - # Check config.yaml — if model is a dict with an explicit provider set, # the user has gone through setup (fresh installs have model as a plain # string). Also covers custom endpoints that store api_key/base_url in @@ -282,9 +301,15 @@ def _has_any_provider_configured() -> bool: # being installed doesn't mean the user wants Hermes to use their tokens. if _has_hermes_config: try: - from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid + from agent.anthropic_adapter import ( + read_claude_code_credentials, + is_claude_code_token_valid, + ) + creds = read_claude_code_credentials() - if creds and (is_claude_code_token_valid(creds) or creds.get("refreshToken")): + if creds and ( + is_claude_code_token_valid(creds) or creds.get("refreshToken") + ): return True except Exception: pass @@ -346,10 +371,10 @@ def _session_browse_picker(sessions: list) -> Optional[str]: if curses.has_colors(): curses.start_color() curses.use_default_colors() - curses.init_pair(1, curses.COLOR_GREEN, -1) # selected + curses.init_pair(1, curses.COLOR_GREEN, -1) # selected curses.init_pair(2, curses.COLOR_YELLOW, -1) # header - curses.init_pair(3, curses.COLOR_CYAN, -1) # search - curses.init_pair(4, 8, -1) # dim + curses.init_pair(3, curses.COLOR_CYAN, -1) # search + curses.init_pair(4, 8, -1) # dim cursor = 0 scroll_offset = 0 @@ -390,7 +415,9 @@ def _session_browse_picker(sessions: list) -> Optional[str]: name_width = max(20, max_x - fixed_cols) col_header = f" {'Title / Preview':<{name_width}} {'Active':<10} {'Src':<5} {'ID'}" try: - dim_attr = curses.color_pair(4) if curses.has_colors() else curses.A_DIM + dim_attr = ( + curses.color_pair(4) if curses.has_colors() else curses.A_DIM + ) stdscr.addnstr(1, 0, col_header, max_x - 1, dim_attr) except curses.error: pass @@ -417,10 +444,12 @@ def _session_browse_picker(sessions: list) -> Optional[str]: elif cursor >= scroll_offset + visible_rows: scroll_offset = cursor - visible_rows + 1 - for draw_i, i in enumerate(range( - scroll_offset, - min(len(filtered), scroll_offset + visible_rows) - )): + for draw_i, i in enumerate( + range( + scroll_offset, + min(len(filtered), scroll_offset + visible_rows), + ) + ): y = draw_i + 3 if y >= max_y - 1: break @@ -446,18 +475,23 @@ def _session_browse_picker(sessions: list) -> Optional[str]: else: footer = f" 0/{len(sessions)} sessions" try: - stdscr.addnstr(footer_y, 0, footer, max_x - 1, - curses.color_pair(4) if curses.has_colors() else curses.A_DIM) + stdscr.addnstr( + footer_y, + 0, + footer, + max_x - 1, + curses.color_pair(4) if curses.has_colors() else curses.A_DIM, + ) except curses.error: pass stdscr.refresh() key = stdscr.getch() - if key in (curses.KEY_UP, ): + if key in (curses.KEY_UP,): if filtered: cursor = (cursor - 1) % len(filtered) - elif key in (curses.KEY_DOWN, ): + elif key in (curses.KEY_DOWN,): if filtered: cursor = (cursor + 1) % len(filtered) elif key in (curses.KEY_ENTER, 10, 13): @@ -483,7 +517,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]: filtered = list(sessions) cursor = 0 scroll_offset = 0 - elif key == ord('q') and not search_text: + elif key == ord("q") and not search_text: return elif 32 <= key <= 126: # Printable character → add to search filter @@ -526,12 +560,13 @@ def _session_browse_picker(sessions: list) -> Optional[str]: return None -def _resolve_last_cli_session() -> Optional[str]: - """Look up the most recent CLI session ID from SQLite. Returns None if unavailable.""" +def _resolve_last_session(source: str = "cli") -> Optional[str]: + """Look up the most recent session ID for a source.""" try: from hermes_state import SessionDB + db = SessionDB() - sessions = db.search_sessions(source="cli", limit=1) + sessions = db.search_sessions(source=source, limit=1) db.close() if sessions: return sessions[0]["id"] @@ -579,8 +614,10 @@ def _exec_in_container(container_info: dict, cli_args: list): runtime = shutil.which(backend) if not runtime: - print(f"Error: {backend} not found on PATH. Cannot route to container.", - file=sys.stderr) + print( + f"Error: {backend} not found on PATH. Cannot route to container.", + file=sys.stderr, + ) sys.exit(1) # Rootful containers (NixOS systemd service) are invisible to unprivileged @@ -588,14 +625,16 @@ def _exec_in_container(container_info: dict, cli_args: list): # Probe whether the runtime can see the container; if not, try via sudo. sudo_path = None probe = _probe_container( - [runtime, "inspect", "--format", "ok", container_name], backend, + [runtime, "inspect", "--format", "ok", container_name], + backend, ) if probe.returncode != 0: sudo_path = shutil.which("sudo") if sudo_path: probe2 = _probe_container( [sudo_path, "-n", runtime, "inspect", "--format", "ok", container_name], - backend, via_sudo=True, + backend, + via_sudo=True, ) if probe2.returncode != 0: print( @@ -608,10 +647,10 @@ def _exec_in_container(container_info: dict, cli_args: list): f"\n" f"On NixOS:\n" f"\n" - f' security.sudo.extraRules = [{{\n' + f" security.sudo.extraRules = [{{\n" f' users = [ "{os.getenv("USER", "your-user")}" ];\n' f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n' - f' }}];\n' + f" }}];\n" f"\n" f"Or run: sudo hermes {' '.join(cli_args)}", file=sys.stderr, @@ -636,7 +675,8 @@ def _exec_in_container(container_info: dict, cli_args: list): cmd_prefix = [sudo_path, "-n", runtime] if sudo_path else [runtime] exec_cmd = ( - cmd_prefix + ["exec"] + cmd_prefix + + ["exec"] + tty_flags + ["-u", exec_user] + env_flags @@ -653,29 +693,336 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]: - If it looks like a session ID (contains underscore + hex), try direct lookup first. - Otherwise, treat it as a title and use resolve_session_by_title (auto-latest). - Falls back to the other method if the first doesn't match. + - If the resolved session is a compression root, follow the chain forward + to the latest continuation. Users who remember the old root ID (e.g. + from an exit summary printed before the bug fix, or from notes) get + resumed at the live tip instead of a stale parent with no messages. """ try: from hermes_state import SessionDB + db = SessionDB() # Try as exact session ID first session = db.get_session(name_or_id) + resolved_id: Optional[str] = None if session: - db.close() - return session["id"] + resolved_id = session["id"] + else: + # Try as title (with auto-latest for lineage) + resolved_id = db.resolve_session_by_title(name_or_id) + + if resolved_id: + # Project forward through compression chain so resumes land on + # the live tip instead of a dead compressed parent. + try: + resolved_id = db.get_compression_tip(resolved_id) or resolved_id + except Exception: + pass - # Try as title (with auto-latest for lineage) - session_id = db.resolve_session_by_title(name_or_id) db.close() - return session_id + return resolved_id except Exception: pass return None +def _print_tui_exit_summary(session_id: Optional[str]) -> None: + """Print a shell-visible epilogue after TUI exits.""" + target = session_id or _resolve_last_session(source="tui") + if not target: + return + + db = None + try: + from hermes_state import SessionDB + + db = SessionDB() + session = db.get_session(target) + if not session: + return + + title = db.get_session_title(target) + message_count = int(session.get("message_count") or 0) + input_tokens = int(session.get("input_tokens") or 0) + output_tokens = int(session.get("output_tokens") or 0) + cache_read_tokens = int(session.get("cache_read_tokens") or 0) + cache_write_tokens = int(session.get("cache_write_tokens") or 0) + reasoning_tokens = int(session.get("reasoning_tokens") or 0) + total_tokens = ( + input_tokens + + output_tokens + + cache_read_tokens + + cache_write_tokens + + reasoning_tokens + ) + except Exception: + return + finally: + if db is not None: + db.close() + + print() + print("Resume this session with:") + print(f" hermes --tui --resume {target}") + if title: + print(f' hermes --tui -c "{title}"') + print() + print(f"Session: {target}") + if title: + print(f"Title: {title}") + print(f"Messages: {message_count}") + print( + "Tokens: " + f"{total_tokens} (in {input_tokens}, out {output_tokens}, " + f"cache {cache_read_tokens + cache_write_tokens}, reasoning {reasoning_tokens})" + ) + + +def _tui_need_npm_install(root: Path) -> bool: + """True when @hermes/ink is missing or node_modules is behind package-lock.json (post-pull).""" + ink = root / "node_modules" / "@hermes" / "ink" / "package.json" + if not ink.is_file(): + return True + lock = root / "package-lock.json" + if not lock.is_file(): + return False + marker = root / "node_modules" / ".package-lock.json" + if not marker.is_file(): + return True + return lock.stat().st_mtime > marker.stat().st_mtime + + +def _find_bundled_tui(tui_dir: Path) -> Optional[Path]: + """Directory whose dist/entry.js we should run: HERMES_TUI_DIR first, else repo ui-tui.""" + env = os.environ.get("HERMES_TUI_DIR") + if env: + p = Path(env) + if (p / "dist" / "entry.js").exists() and not _tui_need_npm_install(p): + return p + if (tui_dir / "dist" / "entry.js").exists() and not _tui_need_npm_install(tui_dir): + return tui_dir + return None + + +def _tui_build_needed(tui_dir: Path) -> bool: + entry = tui_dir / "dist" / "entry.js" + if not entry.exists(): + return True + dist_m = entry.stat().st_mtime + skip = frozenset({"node_modules", "dist"}) + for dirpath, dirnames, filenames in os.walk(tui_dir, topdown=True): + dirnames[:] = [d for d in dirnames if d not in skip] + for fn in filenames: + if fn.endswith((".ts", ".tsx")): + if os.path.getmtime(os.path.join(dirpath, fn)) > dist_m: + return True + for meta in ( + "package.json", + "package-lock.json", + "tsconfig.json", + "tsconfig.build.json", + ): + mp = tui_dir / meta + if mp.exists() and mp.stat().st_mtime > dist_m: + return True + return False + + +def _hermes_ink_bundle_stale(tui_dir: Path) -> bool: + ink_root = tui_dir / "packages" / "hermes-ink" + bundle = ink_root / "dist" / "ink-bundle.js" + if not bundle.exists(): + return True + bm = bundle.stat().st_mtime + skip = frozenset({"node_modules", "dist"}) + for dirpath, dirnames, filenames in os.walk(ink_root, topdown=True): + dirnames[:] = [d for d in dirnames if d not in skip] + for fn in filenames: + if fn.endswith((".ts", ".tsx")): + if os.path.getmtime(os.path.join(dirpath, fn)) > bm: + return True + mp = ink_root / "package.json" + if mp.exists() and mp.stat().st_mtime > bm: + return True + return False + + +def _ensure_tui_node() -> None: + """Make sure `node` + `npm` are on PATH for the TUI. + + If either is missing and scripts/lib/node-bootstrap.sh is available, source + it and call `ensure_node` (fnm/nvm/proto/brew/bundled cascade). After + install, capture the resolved node binary path from the bash subprocess + and prepend its directory to os.environ["PATH"] so shutil.which finds the + new binaries in this Python process — regardless of which version manager + was used (nvm, fnm, proto, brew, or the bundled fallback). + + Idempotent no-op when node+npm are already discoverable. Set + ``HERMES_SKIP_NODE_BOOTSTRAP=1`` to disable auto-install. + """ + if shutil.which("node") and shutil.which("npm"): + return + if os.environ.get("HERMES_SKIP_NODE_BOOTSTRAP"): + return + + helper = PROJECT_ROOT / "scripts" / "lib" / "node-bootstrap.sh" + if not helper.is_file(): + return + + hermes_home = os.environ.get("HERMES_HOME") or str(Path.home() / ".hermes") + try: + # Helper writes logs to stderr; we ask bash to print `command -v node` + # on stdout once ensure_node succeeds. Subshell PATH edits don't leak + # back into Python, so the stdout capture is the bridge. + result = subprocess.run( + [ + "bash", + "-c", + f'source "{helper}" >&2 && ensure_node >&2 && command -v node', + ], + env={**os.environ, "HERMES_HOME": hermes_home}, + capture_output=True, + text=True, + check=False, + ) + except (OSError, subprocess.SubprocessError): + return + + parts = os.environ.get("PATH", "").split(os.pathsep) + extras: list[Path] = [] + + resolved = (result.stdout or "").strip() + if resolved: + extras.append(Path(resolved).resolve().parent) + + extras.extend([Path(hermes_home) / "node" / "bin", Path.home() / ".local" / "bin"]) + + for extra in extras: + s = str(extra) + if extra.is_dir() and s not in parts: + parts.insert(0, s) + os.environ["PATH"] = os.pathsep.join(parts) + + +def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: + """TUI: --dev → tsx src; else node dist (HERMES_TUI_DIR or ui-tui, build when stale).""" + _ensure_tui_node() + + def _node_bin(bin: str) -> str: + if bin == "node": + env_node = os.environ.get("HERMES_NODE") + if env_node and os.path.isfile(env_node) and os.access(env_node, os.X_OK): + return env_node + path = shutil.which(bin) + if not path: + print(f"{bin} not found — install Node.js to use the TUI.") + sys.exit(1) + return path + + # pre-built dist + node_modules (nix / full HERMES_TUI_DIR) skips npm. + if not tui_dev: + ext_dir = os.environ.get("HERMES_TUI_DIR") + if ext_dir: + p = Path(ext_dir) + if (p / "dist" / "entry.js").exists() and not _tui_need_npm_install(p): + node = _node_bin("node") + return [node, str(p / "dist" / "entry.js")], p + + npm = _node_bin("npm") + if _tui_need_npm_install(tui_dir): + if not os.environ.get("HERMES_QUIET"): + print("Installing TUI dependencies…") + result = subprocess.run( + [npm, "install", "--silent", "--no-fund", "--no-audit", "--progress=false"], + cwd=str(tui_dir), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + env={**os.environ, "CI": "1"}, + ) + if result.returncode != 0: + err = (result.stderr or "").strip() + preview = "\n".join(err.splitlines()[-30:]) + print("npm install failed.") + if preview: + print(preview) + sys.exit(1) + + if tui_dev: + if _hermes_ink_bundle_stale(tui_dir): + result = subprocess.run( + [npm, "run", "build", "--prefix", "packages/hermes-ink"], + cwd=str(tui_dir), + capture_output=True, + text=True, + ) + if result.returncode != 0: + combined = f"{result.stdout or ''}{result.stderr or ''}".strip() + preview = "\n".join(combined.splitlines()[-30:]) + print("@hermes/ink build failed.") + if preview: + print(preview) + sys.exit(1) + tsx = tui_dir / "node_modules" / ".bin" / "tsx" + if tsx.exists(): + return [str(tsx), "src/entry.tsx"], tui_dir + return [npm, "start"], tui_dir + + if _tui_build_needed(tui_dir): + result = subprocess.run( + [npm, "run", "build"], + cwd=str(tui_dir), + capture_output=True, + text=True, + ) + if result.returncode != 0: + combined = f"{result.stdout or ''}{result.stderr or ''}".strip() + preview = "\n".join(combined.splitlines()[-30:]) + print("TUI build failed.") + if preview: + print(preview) + sys.exit(1) + + root = _find_bundled_tui(tui_dir) + if not root: + print("TUI build did not produce dist/entry.js") + sys.exit(1) + + node = _node_bin("node") + return [node, str(root / "dist" / "entry.js")], root + + +def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False): + """Replace current process with the TUI.""" + tui_dir = PROJECT_ROOT / "ui-tui" + + env = os.environ.copy() + env["HERMES_PYTHON_SRC_ROOT"] = os.environ.get( + "HERMES_PYTHON_SRC_ROOT", str(PROJECT_ROOT) + ) + env.setdefault("HERMES_PYTHON", sys.executable) + env.setdefault("HERMES_CWD", os.getcwd()) + if resume_session_id: + env["HERMES_TUI_RESUME"] = resume_session_id + + argv, cwd = _make_tui_argv(tui_dir, tui_dev) + try: + code = subprocess.call(argv, cwd=str(cwd), env=env) + except KeyboardInterrupt: + code = 130 + + if code in (0, 130): + _print_tui_exit_summary(resume_session_id) + + sys.exit(code) + + def cmd_chat(args): """Run interactive chat CLI.""" - # Resolve --continue into --resume with the latest CLI session or by name + use_tui = getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1" + + # Resolve --continue into --resume with the latest session or by name continue_val = getattr(args, "continue_last", None) if continue_val and not getattr(args, "resume", None): if isinstance(continue_val, str): @@ -689,11 +1036,15 @@ def cmd_chat(args): sys.exit(1) else: # -c with no argument — continue the most recent session - last_id = _resolve_last_cli_session() + source = "tui" if use_tui else "cli" + last_id = _resolve_last_session(source=source) + if not last_id and source == "tui": + last_id = _resolve_last_session(source="cli") if last_id: args.resume = last_id else: - print("No previous CLI session found to continue.") + kind = "TUI" if use_tui else "CLI" + print(f"No previous {kind} session found to continue.") sys.exit(1) # Resolve --resume by title if it's not a direct session ID @@ -708,12 +1059,17 @@ def cmd_chat(args): # First-run guard: check if any provider is configured before launching if not _has_any_provider_configured(): print() - print("It looks like Hermes isn't configured yet -- no API keys or providers found.") + print( + "It looks like Hermes isn't configured yet -- no API keys or providers found." + ) print() print(" Run: hermes setup") print() - from hermes_cli.setup import is_interactive_stdin, print_noninteractive_setup_guidance + from hermes_cli.setup import ( + is_interactive_stdin, + print_noninteractive_setup_guidance, + ) if not is_interactive_stdin(): print_noninteractive_setup_guidance( @@ -735,6 +1091,7 @@ def cmd_chat(args): # Start update check in background (runs while other init happens) try: from hermes_cli.banner import prefetch_update_check + prefetch_update_check() except Exception: pass @@ -742,6 +1099,7 @@ def cmd_chat(args): # Sync bundled skills on every CLI launch (fast -- skips unchanged skills) try: from tools.skills_sync import sync_skills + sync_skills(quiet=True) except Exception: pass @@ -754,9 +1112,15 @@ def cmd_chat(args): if getattr(args, "source", None): os.environ["HERMES_SESSION_SOURCE"] = args.source + if use_tui: + _launch_tui( + getattr(args, "resume", None), + tui_dev=getattr(args, "tui_dev", False), + ) + # Import and run the CLI from cli import main as cli_main - + # Build kwargs from args kwargs = { "model": args.model, @@ -775,7 +1139,7 @@ def cmd_chat(args): } # Filter out None values kwargs = {k: v for k, v in kwargs.items() if v is not None} - + try: cli_main(**kwargs) except ValueError as e: @@ -786,6 +1150,7 @@ def cmd_chat(args): def cmd_gateway(args): """Gateway management commands.""" from hermes_cli.gateway import gateway_command + gateway_command(args) @@ -808,7 +1173,9 @@ def cmd_whatsapp(args): print() print(" 1. Separate bot number (recommended)") print(" People message the bot's number directly — cleanest experience.") - print(" Requires a second phone number with WhatsApp installed on a device.") + print( + " Requires a second phone number with WhatsApp installed on a device." + ) print() print(" 2. Personal number (self-chat)") print(" You message yourself to talk to the agent.") @@ -843,7 +1210,9 @@ def cmd_whatsapp(args): print(" ✓ Mode: personal number (self-chat)") else: wa_mode = current_mode - mode_label = "separate bot number" if wa_mode == "bot" else "personal number (self-chat)" + mode_label = ( + "separate bot number" if wa_mode == "bot" else "personal number (self-chat)" + ) print(f"\n✓ Mode: {mode_label}") # ── Step 2: Enable WhatsApp ────────────────────────────────────────── @@ -865,7 +1234,9 @@ def cmd_whatsapp(args): response = "n" if response.lower() in ("y", "yes"): if wa_mode == "bot": - phone = input(" Phone numbers that can message the bot (comma-separated): ").strip() + phone = input( + " Phone numbers that can message the bot (comma-separated): " + ).strip() else: phone = input(" Your phone number (e.g. 15551234567): ").strip() if phone: @@ -875,7 +1246,9 @@ def cmd_whatsapp(args): print() if wa_mode == "bot": print(" Who should be allowed to message the bot?") - phone = input(" Phone numbers (comma-separated, or * for anyone): ").strip() + phone = input( + " Phone numbers (comma-separated, or * for anyone): " + ).strip() else: phone = input(" Your phone number (e.g. 15551234567): ").strip() if phone: @@ -916,11 +1289,14 @@ def cmd_whatsapp(args): if (session_dir / "creds.json").exists(): print("✓ Existing WhatsApp session found") try: - response = input("\n Re-pair? This will clear the existing session. [y/N] ").strip() + response = input( + "\n Re-pair? This will clear the existing session. [y/N] " + ).strip() except (EOFError, KeyboardInterrupt): response = "n" if response.lower() in ("y", "yes"): import shutil + shutil.rmtree(session_dir, ignore_errors=True) session_dir.mkdir(parents=True, exist_ok=True) print(" ✓ Session cleared") @@ -979,6 +1355,7 @@ def cmd_whatsapp(args): def cmd_setup(args): """Interactive setup wizard.""" from hermes_cli.setup import run_setup_wizard + run_setup_wizard(args) @@ -997,9 +1374,15 @@ def select_provider_and_model(args=None): persistence. """ from hermes_cli.auth import ( - resolve_provider, AuthError, format_auth_error, + resolve_provider, + AuthError, + format_auth_error, + ) + from hermes_cli.config import ( + get_compatible_custom_providers, + load_config, + get_env_value, ) - from hermes_cli.config import get_compatible_custom_providers, load_config, get_env_value config = load_config() current_model = config.get("model") @@ -1010,15 +1393,14 @@ def select_provider_and_model(args=None): # Read effective provider the same way the CLI does at startup: # config.yaml model.provider > env var > auto-detect import os + config_provider = None model_cfg = config.get("model") if isinstance(model_cfg, dict): config_provider = model_cfg.get("provider") effective_provider = ( - config_provider - or os.getenv("HERMES_INFERENCE_PROVIDER") - or "auto" + config_provider or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto" ) try: active = resolve_provider(effective_provider) @@ -1075,7 +1457,9 @@ def select_provider_and_model(args=None): return custom_provider_map # Add user-defined custom providers from config.yaml - _custom_provider_map = _named_custom_provider_map(config) # key → {name, base_url, api_key} + _custom_provider_map = _named_custom_provider_map( + config + ) # key → {name, base_url, api_key} for key, provider_info in _custom_provider_map.items(): name = provider_info["name"] base_url = provider_info["base_url"] @@ -1095,13 +1479,17 @@ def select_provider_and_model(args=None): ordered.append((key, label)) ordered.append(("custom", "Custom endpoint (enter URL manually)")) - _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(config.get("custom_providers")) + _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool( + config.get("custom_providers") + ) if _has_saved_custom_list: ordered.append(("remove-custom", "Remove a saved custom provider")) - ordered.append(("cancel", "Cancel")) + ordered.append(("aux-config", "Configure auxiliary models...")) + ordered.append(("cancel", "Leave unchanged")) provider_idx = _prompt_provider_choice( - [label for _, label in ordered], default=default_idx, + [label for _, label in ordered], + default=default_idx, ) if provider_idx is None or ordered[provider_idx][0] == "cancel": print("No change.") @@ -1109,6 +1497,10 @@ def select_provider_and_model(args=None): selected_provider = ordered[provider_idx][0] + if selected_provider == "aux-config": + _aux_config_menu() + return + # Step 2: Provider-specific setup + model selection if selected_provider == "openrouter": _model_flow_openrouter(config, current_model) @@ -1118,13 +1510,18 @@ def select_provider_and_model(args=None): _model_flow_openai_codex(config, current_model) elif selected_provider == "qwen-oauth": _model_flow_qwen_oauth(config, current_model) + elif selected_provider == "google-gemini-cli": + _model_flow_google_gemini_cli(config, current_model) elif selected_provider == "copilot-acp": _model_flow_copilot_acp(config, current_model) elif selected_provider == "copilot": _model_flow_copilot(config, current_model) elif selected_provider == "custom": _model_flow_custom(config) - elif selected_provider.startswith("custom:") or selected_provider in _custom_provider_map: + elif ( + selected_provider.startswith("custom:") + or selected_provider in _custom_provider_map + ): provider_info = _named_custom_provider_map(load_config()).get(selected_provider) if provider_info is None: print( @@ -1139,15 +1536,38 @@ def select_provider_and_model(args=None): _model_flow_anthropic(config, current_model) elif selected_provider == "kimi-coding": _model_flow_kimi(config, current_model) - elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"): + elif selected_provider == "bedrock": + _model_flow_bedrock(config, current_model) + elif selected_provider in ( + "gemini", + "deepseek", + "xai", + "zai", + "kimi-coding-cn", + "minimax", + "minimax-cn", + "kilocode", + "opencode-zen", + "opencode-go", + "ai-gateway", + "alibaba", + "huggingface", + "xiaomi", + "arcee", + "nvidia", + "ollama-cloud", + ): _model_flow_api_key_provider(config, selected_provider, current_model) # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ────────────── # When the user switches to a named provider (anything except "custom"), # a leftover OPENAI_BASE_URL in ~/.hermes/.env can poison auxiliary # clients that use provider:auto. Clear it proactively. (#5161) - if selected_provider not in ("custom", "cancel", "remove-custom") \ - and not selected_provider.startswith("custom:"): + if selected_provider not in ( + "custom", + "cancel", + "remove-custom", + ) and not selected_provider.startswith("custom:"): _clear_stale_openai_base_url() @@ -1174,9 +1594,333 @@ def _clear_stale_openai_base_url(): stale_url = get_env_value("OPENAI_BASE_URL") if stale_url: save_env_value("OPENAI_BASE_URL", "") - print(f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url[:40]}...)" - if len(stale_url) > 40 - else f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url})") + print( + f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url[:40]}...)" + if len(stale_url) > 40 + else f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url})" + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# Auxiliary model configuration +# +# Hermes uses lightweight "auxiliary" models for side tasks (vision analysis, +# context compression, web extraction, session search, etc.). Each task has +# its own provider+model pair in config.yaml under `auxiliary.`. +# +# The UI lives behind "Configure auxiliary models..." at the bottom of the +# `hermes model` provider picker. It does NOT re-run credential setup — it +# only routes already-authenticated providers to specific aux tasks. Users +# configure new providers through the normal `hermes model` flow first. +# ───────────────────────────────────────────────────────────────────────────── + +# (task_key, display_name, short_description) +_AUX_TASKS: list[tuple[str, str, str]] = [ + ("vision", "Vision", "image/screenshot analysis"), + ("compression", "Compression", "context summarization"), + ("web_extract", "Web extract", "web page summarization"), + ("session_search", "Session search", "past-conversation recall"), + ("approval", "Approval", "smart command approval"), + ("mcp", "MCP", "MCP tool reasoning"), + ("flush_memories", "Flush memories", "memory consolidation"), + ("title_generation", "Title generation", "session titles"), + ("skills_hub", "Skills hub", "skills search/install"), +] + + +def _format_aux_current(task_cfg: dict) -> str: + """Render the current aux config for display in the task menu.""" + if not isinstance(task_cfg, dict): + return "auto" + base_url = str(task_cfg.get("base_url") or "").strip() + provider = str(task_cfg.get("provider") or "auto").strip() or "auto" + model = str(task_cfg.get("model") or "").strip() + if base_url: + short = base_url.replace("https://", "").replace("http://", "").rstrip("/") + return f"custom ({short})" + (f" · {model}" if model else "") + if provider == "auto": + return "auto" + (f" · {model}" if model else "") + if model: + return f"{provider} · {model}" + return provider + + +def _save_aux_choice( + task: str, + *, + provider: str, + model: str = "", + base_url: str = "", + api_key: str = "", +) -> None: + """Persist an auxiliary task's provider/model to config.yaml. + + Only writes the four routing fields — timeout, download_timeout, and any + other task-specific settings are preserved untouched. The main model + config (``model.default``/``model.provider``) is never modified. + """ + from hermes_cli.config import load_config, save_config + + cfg = load_config() + aux = cfg.setdefault("auxiliary", {}) + if not isinstance(aux, dict): + aux = {} + cfg["auxiliary"] = aux + entry = aux.setdefault(task, {}) + if not isinstance(entry, dict): + entry = {} + aux[task] = entry + entry["provider"] = provider + entry["model"] = model or "" + entry["base_url"] = base_url or "" + entry["api_key"] = api_key or "" + save_config(cfg) + + +def _reset_aux_to_auto() -> int: + """Reset every known aux task back to auto/empty. Returns number reset.""" + from hermes_cli.config import load_config, save_config + + cfg = load_config() + aux = cfg.setdefault("auxiliary", {}) + if not isinstance(aux, dict): + aux = {} + cfg["auxiliary"] = aux + count = 0 + for task, _name, _desc in _AUX_TASKS: + entry = aux.setdefault(task, {}) + if not isinstance(entry, dict): + entry = {} + aux[task] = entry + changed = False + if entry.get("provider") not in (None, "", "auto"): + entry["provider"] = "auto" + changed = True + for field in ("model", "base_url", "api_key"): + if entry.get(field): + entry[field] = "" + changed = True + # Preserve timeout/download_timeout — those are user-tuned, not routing + if changed: + count += 1 + save_config(cfg) + return count + + +def _aux_config_menu() -> None: + """Top-level auxiliary-model picker — choose a task to configure. + + Loops until the user picks "Back" so multiple tasks can be configured + without returning to the main provider menu. + """ + from hermes_cli.config import load_config + + while True: + cfg = load_config() + aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} + + print() + print(" Auxiliary models — side-task routing") + print() + print(" Side tasks (vision, compression, web extraction, etc.) default") + print(" to your main chat model. \"auto\" means \"use my main model\" —") + print(" Hermes only falls back to a lightweight backend (OpenRouter,") + print(" Nous Portal) if the main model is unavailable. Override a") + print(" task below if you want it pinned to a specific provider/model.") + print() + + # Build the task menu with current settings inline + name_col = max(len(name) for _, name, _ in _AUX_TASKS) + 2 + desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4 + entries: list[tuple[str, str]] = [] + for task_key, name, desc in _AUX_TASKS: + task_cfg = aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {} + current = _format_aux_current(task_cfg) + label = f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}" + entries.append((task_key, label)) + entries.append(("__reset__", "Reset all to auto")) + entries.append(("__back__", "Back")) + + idx = _prompt_provider_choice( + [label for _, label in entries], default=0, + ) + if idx is None: + return + key = entries[idx][0] + if key == "__back__": + return + if key == "__reset__": + n = _reset_aux_to_auto() + if n: + print(f"Reset {n} auxiliary task(s) to auto.") + else: + print("All auxiliary tasks were already set to auto.") + print() + continue + # Otherwise configure the specific task + _aux_select_for_task(key) + + +def _aux_select_for_task(task: str) -> None: + """Pick a provider + model for a single auxiliary task and persist it. + + Uses ``list_authenticated_providers()`` to only show providers the user + has already configured. This avoids re-running OAuth/credential flows + inside the aux picker — users set up new providers through the normal + ``hermes model`` flow, then route aux tasks to them here. + """ + from hermes_cli.config import load_config + from hermes_cli.model_switch import list_authenticated_providers + + cfg = load_config() + aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} + task_cfg = aux.get(task, {}) if isinstance(aux.get(task), dict) else {} + current_provider = str(task_cfg.get("provider") or "auto").strip() or "auto" + current_model = str(task_cfg.get("model") or "").strip() + current_base_url = str(task_cfg.get("base_url") or "").strip() + + display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task) + + # Gather authenticated providers (has credentials + curated model list) + try: + providers = list_authenticated_providers(current_provider=current_provider) + except Exception as exc: + print(f"Could not detect authenticated providers: {exc}") + providers = [] + + entries: list[tuple[str, str, list[str]]] = [] # (slug, label, models) + # "auto" always first + auto_marker = " ← current" if current_provider == "auto" and not current_base_url else "" + entries.append(("__auto__", f"auto (recommended){auto_marker}", [])) + + for p in providers: + slug = p.get("slug", "") + name = p.get("name") or slug + total = p.get("total_models", 0) + models = p.get("models") or [] + model_hint = f" — {total} models" if total else "" + marker = " ← current" if slug == current_provider and not current_base_url else "" + entries.append((slug, f"{name}{model_hint}{marker}", list(models))) + + # Custom endpoint (raw base_url) + custom_marker = " ← current" if current_base_url else "" + entries.append(("__custom__", f"Custom endpoint (direct URL){custom_marker}", [])) + entries.append(("__back__", "Back", [])) + + print() + print(f" Configure {display_name} — current: {_format_aux_current(task_cfg)}") + print() + + idx = _prompt_provider_choice([label for _, label, _ in entries], default=0) + if idx is None: + return + slug, _label, models = entries[idx] + + if slug == "__back__": + return + + if slug == "__auto__": + _save_aux_choice(task, provider="auto", model="", base_url="", api_key="") + print(f"{display_name}: reset to auto.") + return + + if slug == "__custom__": + _aux_flow_custom_endpoint(task, task_cfg) + return + + # Regular provider — pick a model from its curated list + _aux_flow_provider_model(task, slug, models, current_model) + + +def _aux_flow_provider_model( + task: str, + provider_slug: str, + curated_models: list, + current_model: str = "", +) -> None: + """Prompt for a model under an already-authenticated provider, save to aux.""" + from hermes_cli.auth import _prompt_model_selection + from hermes_cli.models import get_pricing_for_provider + + display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task) + + # Fetch live pricing for this provider (non-blocking) + pricing: dict = {} + try: + pricing = get_pricing_for_provider(provider_slug) or {} + except Exception: + pricing = {} + + model_list = list(curated_models) + + # Let the user pick a model. _prompt_model_selection supports "Enter custom + # model name" and cancel. When there's no curated list (rare), fall back + # to a raw input prompt. + if not model_list: + print(f"No curated model list for {provider_slug}.") + print("Enter a model slug manually (blank = use provider default):") + try: + val = input("Model: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + selected = val or "" + else: + selected = _prompt_model_selection( + model_list, current_model=current_model, pricing=pricing, + ) + if selected is None: + print("No change.") + return + + _save_aux_choice(task, provider=provider_slug, model=selected or "", + base_url="", api_key="") + if selected: + print(f"{display_name}: {provider_slug} · {selected}") + else: + print(f"{display_name}: {provider_slug} (provider default model)") + + +def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None: + """Prompt for a direct OpenAI-compatible base_url + optional api_key/model.""" + import getpass + + display_name = next((name for key, name, _ in _AUX_TASKS if key == task), task) + current_base_url = str(task_cfg.get("base_url") or "").strip() + current_model = str(task_cfg.get("model") or "").strip() + + print() + print(f" Custom endpoint for {display_name}") + print(" Provide an OpenAI-compatible base URL (e.g. http://localhost:11434/v1)") + print() + try: + url_prompt = f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: " + url = input(url_prompt).strip() + except (KeyboardInterrupt, EOFError): + print() + return + url = url or current_base_url + if not url: + print("No URL provided. No change.") + return + try: + model_prompt = f"Model slug (optional) [{current_model}]: " if current_model else "Model slug (optional): " + model = input(model_prompt).strip() + except (KeyboardInterrupt, EOFError): + print() + return + model = model or current_model + try: + api_key = getpass.getpass("API key (optional, blank = use OPENAI_API_KEY): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + + _save_aux_choice( + task, provider="custom", model=model, base_url=url, api_key=api_key, + ) + short_url = url.replace("https://", "").replace("http://", "").rstrip("/") + print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else "")) def _prompt_provider_choice(choices, *, default=0): @@ -1188,6 +1932,7 @@ def _prompt_provider_choice(choices, *, default=0): """ try: from hermes_cli.setup import _curses_prompt_choice + idx = _curses_prompt_choice("Select provider:", choices, default) if idx >= 0: print() @@ -1219,7 +1964,11 @@ def _prompt_provider_choice(choices, *, default=0): def _model_flow_openrouter(config, current_model=""): """OpenRouter provider: ensure API key, then pick model.""" - from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) from hermes_cli.config import get_env_value, save_env_value api_key = get_env_value("OPENROUTER_API_KEY") @@ -1229,6 +1978,7 @@ def _model_flow_openrouter(config, current_model=""): print() try: import getpass + key = getpass.getpass("OpenRouter API key (or Enter to cancel): ").strip() except (KeyboardInterrupt, EOFError): print() @@ -1241,17 +1991,21 @@ def _model_flow_openrouter(config, current_model=""): print() from hermes_cli.models import model_ids, get_pricing_for_provider + openrouter_models = model_ids(force_refresh=True) # Fetch live pricing (non-blocking — returns empty dict on failure) pricing = get_pricing_for_provider("openrouter", force_refresh=True) - selected = _prompt_model_selection(openrouter_models, current_model=current_model, pricing=pricing) + selected = _prompt_model_selection( + openrouter_models, current_model=current_model, pricing=pricing + ) if selected: _save_model_choice(selected) # Update config provider and deactivate any OAuth provider from hermes_cli.config import load_config, save_config + cfg = load_config() model = cfg.get("model") if not isinstance(model, dict): @@ -1270,16 +2024,23 @@ def _model_flow_openrouter(config, current_model=""): def _model_flow_nous(config, current_model="", args=None): """Nous Portal provider: ensure logged in, then pick model.""" from hermes_cli.auth import ( - get_provider_auth_state, _prompt_model_selection, _save_model_choice, - _update_config_for_provider, resolve_nous_runtime_credentials, - AuthError, format_auth_error, - _login_nous, PROVIDER_REGISTRY, + get_provider_auth_state, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + resolve_nous_runtime_credentials, + AuthError, + format_auth_error, + _login_nous, + PROVIDER_REGISTRY, ) - from hermes_cli.config import get_env_value, save_config, save_env_value - from hermes_cli.nous_subscription import ( - apply_nous_provider_defaults, - get_nous_subscription_explainer_lines, + from hermes_cli.config import ( + get_env_value, + load_config, + save_config, + save_env_value, ) + from hermes_cli.nous_subscription import prompt_enable_tool_gateway import argparse state = get_provider_auth_state("nous") @@ -1298,9 +2059,12 @@ def _model_flow_nous(config, current_model="", args=None): insecure=bool(getattr(args, "insecure", False)), ) _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) - print() - for line in get_nous_subscription_explainer_lines(): - print(line) + # Offer Tool Gateway enablement for paid subscribers + try: + _refreshed = load_config() or {} + prompt_enable_tool_gateway(_refreshed) + except Exception: + pass except SystemExit: print("Login cancelled or failed.") return @@ -1314,9 +2078,13 @@ def _model_flow_nous(config, current_model="", args=None): # The live /models endpoint returns hundreds of models; the curated list # shows only agentic models users recognize from OpenRouter. from hermes_cli.models import ( - _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models, - check_nous_free_tier, partition_nous_models_by_tier, + _PROVIDER_MODELS, + get_pricing_for_provider, + filter_nous_free_models, + check_nous_free_tier, + partition_nous_models_by_tier, ) + model_ids = _PROVIDER_MODELS.get("nous", []) if not model_ids: print("No curated models available for Nous Portal.") @@ -1333,9 +2101,14 @@ def _model_flow_nous(config, current_model="", args=None): print("Re-authenticating with Nous Portal...\n") try: mock_args = argparse.Namespace( - portal_url=None, inference_url=None, client_id=None, - scope=None, no_browser=False, timeout=15.0, - ca_bundle=None, insecure=False, + portal_url=None, + inference_url=None, + client_id=None, + scope=None, + no_browser=False, + timeout=15.0, + ca_bundle=None, + insecure=False, ) _login_nous(mock_args, PROVIDER_REGISTRY["nous"]) except Exception as login_exc: @@ -1356,7 +2129,9 @@ def _model_flow_nous(config, current_model="", args=None): model_ids = filter_nous_free_models(model_ids, pricing) unavailable_models: list[str] = [] if free_tier: - model_ids, unavailable_models = partition_nous_models_by_tier(model_ids, pricing, free_tier=True) + model_ids, unavailable_models = partition_nous_models_by_tier( + model_ids, pricing, free_tier=True + ) if not model_ids and not unavailable_models: print("No models available for Nous Portal after filtering.") @@ -1375,15 +2150,21 @@ def _model_flow_nous(config, current_model="", args=None): print("No free models currently available.") if unavailable_models: from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL + _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/") print(f"Upgrade at {_url} to access paid models.") return - print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.") + print( + f'Showing {len(model_ids)} curated models — use "Enter custom model name" for others.' + ) selected = _prompt_model_selection( - model_ids, current_model=current_model, pricing=pricing, - unavailable_models=unavailable_models, portal_url=_nous_portal_url, + model_ids, + current_model=current_model, + pricing=pricing, + unavailable_models=unavailable_models, + portal_url=_nous_portal_url, ) if selected: _save_model_choice(selected) @@ -1408,18 +2189,10 @@ def _model_flow_nous(config, current_model="", args=None): if get_env_value("OPENAI_BASE_URL"): save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") - changed_defaults = apply_nous_provider_defaults(config) save_config(config) print(f"Default model set to: {selected} (via Nous Portal)") - if "tts" in changed_defaults: - print("TTS provider set to: OpenAI TTS via your Nous subscription") - else: - current_tts = str(config.get("tts", {}).get("provider") or "edge") - if current_tts.lower() not in {"", "edge"}: - print(f"Keeping your existing TTS provider: {current_tts}") - print() - for line in get_nous_subscription_explainer_lines(): - print(line) + # Offer Tool Gateway enablement for paid subscribers + prompt_enable_tool_gateway(config) else: print("No change.") @@ -1427,9 +2200,13 @@ def _model_flow_nous(config, current_model="", args=None): def _model_flow_openai_codex(config, current_model=""): """OpenAI Codex provider: ensure logged in, then pick model.""" from hermes_cli.auth import ( - get_codex_auth_status, _prompt_model_selection, _save_model_choice, - _update_config_for_provider, _login_openai_codex, - PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL, + get_codex_auth_status, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + _login_openai_codex, + PROVIDER_REGISTRY, + DEFAULT_CODEX_BASE_URL, ) from hermes_cli.codex_models import get_codex_model_ids import argparse @@ -1460,6 +2237,7 @@ def _model_flow_openai_codex(config, current_model=""): if not _codex_token: try: from hermes_cli.auth import resolve_codex_runtime_credentials + _codex_creds = resolve_codex_runtime_credentials() _codex_token = _codex_creds.get("api_key") except Exception: @@ -1476,7 +2254,6 @@ def _model_flow_openai_codex(config, current_model=""): print("No change.") - _DEFAULT_QWEN_PORTAL_MODELS = [ "qwen3-coder-plus", "qwen3-coder", @@ -1526,6 +2303,80 @@ def _model_flow_qwen_oauth(_config, current_model=""): print("No change.") +def _model_flow_google_gemini_cli(_config, current_model=""): + """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers. + + Flow: + 1. Show upfront warning about Google's ToS stance (per opencode-gemini-auth). + 2. If creds missing, run PKCE browser OAuth via agent.google_oauth. + 3. Resolve project context (env -> config -> auto-discover -> free tier). + 4. Prompt user to pick a model. + 5. Save to ~/.hermes/config.yaml. + """ + from hermes_cli.auth import ( + DEFAULT_GEMINI_CLOUDCODE_BASE_URL, + get_gemini_oauth_auth_status, + resolve_gemini_oauth_runtime_credentials, + _prompt_model_selection, + _save_model_choice, + _update_config_for_provider, + ) + from hermes_cli.models import _PROVIDER_MODELS + + print() + print("⚠ Google considers using the Gemini CLI OAuth client with third-party") + print(" software a policy violation. Some users have reported account") + print(" restrictions. You can use your own API key via 'gemini' provider") + print(" for the lowest-risk experience.") + print() + try: + proceed = input("Continue with OAuth login? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print("Cancelled.") + return + if proceed not in {"y", "yes"}: + print("Cancelled.") + return + + status = get_gemini_oauth_auth_status() + if not status.get("logged_in"): + try: + from agent.google_oauth import resolve_project_id_from_env, start_oauth_flow + + env_project = resolve_project_id_from_env() + start_oauth_flow(force_relogin=True, project_id=env_project) + except Exception as exc: + print(f"OAuth login failed: {exc}") + return + + # Verify creds resolve + trigger project discovery + try: + creds = resolve_gemini_oauth_runtime_credentials(force_refresh=False) + project_id = creds.get("project_id", "") + if project_id: + print(f" Using GCP project: {project_id}") + else: + print( + " No GCP project configured — free tier will be auto-provisioned on first request." + ) + except Exception as exc: + print(f"Failed to resolve Gemini credentials: {exc}") + return + + models = list(_PROVIDER_MODELS.get("google-gemini-cli") or []) + default = current_model or (models[0] if models else "gemini-3-flash-preview") + selected = _prompt_model_selection(models, current_model=default) + if selected: + _save_model_choice(selected) + _update_config_for_provider( + "google-gemini-cli", DEFAULT_GEMINI_CLOUDCODE_BASE_URL + ) + print( + f"Default model set to: {selected} (via Google Gemini OAuth / Code Assist)" + ) + else: + print("No change.") + def _model_flow_custom(config): """Custom endpoint: collect URL, API key, and model name. @@ -1547,9 +2398,14 @@ def _model_flow_custom(config): print() try: - base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip() + base_url = input( + f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: " + ).strip() import getpass - api_key = getpass.getpass(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip() + + api_key = getpass.getpass( + f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: " + ).strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return @@ -1566,6 +2422,30 @@ def _model_flow_custom(config): effective_key = api_key or current_key + # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1 + # in the base URL for OpenAI-compatible chat completions. Prompt the + # user if the URL looks like a local server without /v1. + _url_lower = effective_url.rstrip("/").lower() + _looks_local = any( + h in _url_lower + for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000") + ) + if _looks_local and not _url_lower.endswith("/v1"): + print() + print(f" Hint: Did you mean to add /v1 at the end?") + print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.") + print(f" e.g. {effective_url.rstrip('/')}/v1") + try: + _add_v1 = input(" Add /v1? [Y/n]: ").strip().lower() + except (KeyboardInterrupt, EOFError): + _add_v1 = "n" + if _add_v1 in ("", "y", "yes"): + effective_url = effective_url.rstrip("/") + "/v1" + if base_url: + base_url = effective_url + print(f" Updated URL: {effective_url}") + print() + from hermes_cli.models import probe_api_models probe = probe_api_models(effective_key, effective_url) @@ -1590,7 +2470,9 @@ def _model_flow_custom(config): if probe.get("suggested_base_url"): suggested = probe["suggested_base_url"] if suggested.endswith("/v1"): - print(f" If this server expects /v1 in the path, try base URL: {suggested}") + print( + f" If this server expects /v1 in the path, try base URL: {suggested}" + ) else: print(f" If /v1 should not be in the base URL, try: {suggested}") @@ -1609,7 +2491,9 @@ def _model_flow_custom(config): print(" Available models:") for i, m in enumerate(detected_models, 1): print(f" {i}. {m}") - pick = input(f" Select model [1-{len(detected_models)}] or type name: ").strip() + pick = input( + f" Select model [1-{len(detected_models)}] or type name: " + ).strip() if pick.isdigit() and 1 <= int(pick) <= len(detected_models): model_name = detected_models[int(pick) - 1] elif pick: @@ -1617,7 +2501,9 @@ def _model_flow_custom(config): else: model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() - context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip() + context_length_str = input( + "Context length in tokens [leave blank for auto-detect]: " + ).strip() # Prompt for a display name — shown in the provider menu on future runs default_name = _auto_provider_name(effective_url) @@ -1629,7 +2515,11 @@ def _model_flow_custom(config): context_length = None if context_length_str: try: - context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000")) + context_length = int( + context_length_str.replace(",", "") + .replace("k", "000") + .replace("K", "000") + ) if context_length <= 0: context_length = None except ValueError: @@ -1677,8 +2567,13 @@ def _model_flow_custom(config): print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.") # Auto-save to custom_providers so it appears in the menu next time - _save_custom_provider(effective_url, effective_key, model_name or "", - context_length=context_length, name=display_name) + _save_custom_provider( + effective_url, + effective_key, + model_name or "", + context_length=context_length, + name=display_name, + ) def _auto_provider_name(base_url: str) -> str: @@ -1689,6 +2584,7 @@ def _auto_provider_name(base_url: str) -> str: user for a display name during custom endpoint setup. """ import re + clean = base_url.replace("https://", "").replace("http://", "").rstrip("/") clean = re.sub(r"/v1/?$", "", clean) name = clean.split("/")[0] @@ -1701,8 +2597,9 @@ def _auto_provider_name(base_url: str) -> str: return name -def _save_custom_provider(base_url, api_key="", model="", context_length=None, - name=None): +def _save_custom_provider( + base_url, api_key="", model="", context_length=None, name=None +): """Save a custom endpoint to custom_providers in config.yaml. Deduplicates by base_url — if the URL already exists, updates the @@ -1718,7 +2615,9 @@ def _save_custom_provider(base_url, api_key="", model="", context_length=None, # Check if this URL is already saved — update model/context_length if so for entry in providers: - if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"): + if isinstance(entry, dict) and entry.get("base_url", "").rstrip( + "/" + ) == base_url.rstrip("/"): changed = False if model and entry.get("model") != model: entry["model"] = model @@ -1750,7 +2649,7 @@ def _save_custom_provider(base_url, api_key="", model="", context_length=None, providers.append(entry) cfg["custom_providers"] = providers save_config(cfg) - print(f" 💾 Saved to custom providers as \"{name}\" (edit in config.yaml)") + print(f' 💾 Saved to custom providers as "{name}" (edit in config.yaml)') def _remove_custom_provider(config): @@ -1778,15 +2677,20 @@ def _remove_custom_provider(config): try: from simple_term_menu import TerminalMenu + menu = TerminalMenu( - [f" {c}" for c in choices], cursor_index=0, - menu_cursor="-> ", menu_cursor_style=("fg_red", "bold"), + [f" {c}" for c in choices], + cursor_index=0, + menu_cursor="-> ", + menu_cursor_style=("fg_red", "bold"), menu_highlight_style=("fg_red",), - cycle_cursor=True, clear_screen=False, + cycle_cursor=True, + clear_screen=False, title="Select provider to remove:", ) idx = menu.show() from hermes_cli.curses_ui import flush_stdin + flush_stdin() print() except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError): @@ -1806,8 +2710,10 @@ def _remove_custom_provider(config): removed = providers.pop(idx) cfg["custom_providers"] = providers save_config(cfg) - removed_name = removed.get("name", "unnamed") if isinstance(removed, dict) else str(removed) - print(f"✅ Removed \"{removed_name}\" from custom providers.") + removed_name = ( + removed.get("name", "unnamed") if isinstance(removed, dict) else str(removed) + ) + print(f'✅ Removed "{removed_name}" from custom providers.') def _model_flow_named_custom(config, provider_info): @@ -1845,19 +2751,23 @@ def _model_flow_named_custom(config, provider_info): print(f"Found {len(models)} model(s):\n") try: from simple_term_menu import TerminalMenu + menu_items = [ - f" {m} (current)" if m == saved_model else f" {m}" - for m in models + f" {m} (current)" if m == saved_model else f" {m}" for m in models ] + [" Cancel"] menu = TerminalMenu( - menu_items, cursor_index=default_idx, - menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"), + menu_items, + cursor_index=default_idx, + menu_cursor="-> ", + menu_cursor_style=("fg_green", "bold"), menu_highlight_style=("fg_green",), - cycle_cursor=True, clear_screen=False, + cycle_cursor=True, + clear_screen=False, title=f"Select model from {name}:", ) idx = menu.show() from hermes_cli.curses_ui import flush_stdin + flush_stdin() print() if idx is None or idx >= len(models): @@ -1970,7 +2880,11 @@ def _set_reasoning_effort(config, effort: str) -> None: def _prompt_reasoning_effort_selection(efforts, current_effort=""): """Prompt for a reasoning effort. Returns effort, 'none', or None to keep current.""" - deduped = list(dict.fromkeys(str(effort).strip().lower() for effort in efforts if str(effort).strip())) + deduped = list( + dict.fromkeys( + str(effort).strip().lower() for effort in efforts if str(effort).strip() + ) + ) canonical_order = ("minimal", "low", "medium", "high", "xhigh") ordered = [effort for effort in canonical_order if effort in deduped] ordered.extend(effort for effort in deduped if effort not in canonical_order) @@ -2012,6 +2926,7 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""): ) idx = menu.show() from hermes_cli.curses_ui import flush_stdin + flush_stdin() if idx is None: return None @@ -2080,7 +2995,9 @@ def _model_flow_copilot(config, current_model=""): print("No GitHub token configured for GitHub Copilot.") print() print(" Supported token types:") - print(" → OAuth token (gho_*) via `copilot login` or device code flow") + print( + " → OAuth token (gho_*) via `copilot login` or device code flow" + ) print(" → Fine-grained PAT (github_pat_*) with Copilot Requests permission") print(" → GitHub App token (ghu_*) via environment variable") print(" ✗ Classic PAT (ghp_*) NOT supported by Copilot API") @@ -2099,6 +3016,7 @@ def _model_flow_copilot(config, current_model=""): if choice == "1": try: from hermes_cli.copilot_auth import copilot_device_code_login + token = copilot_device_code_login() if token: save_env_value("COPILOT_GITHUB_TOKEN", token) @@ -2113,6 +3031,7 @@ def _model_flow_copilot(config, current_model=""): elif choice == "2": try: import getpass + new_key = getpass.getpass(" Token (COPILOT_GITHUB_TOKEN): ").strip() except (KeyboardInterrupt, EOFError): print() @@ -2123,6 +3042,7 @@ def _model_flow_copilot(config, current_model=""): # Validate token type try: from hermes_cli.copilot_auth import validate_copilot_token + valid, msg = validate_copilot_token(new_key) if not valid: print(f" ✗ {msg}") @@ -2151,23 +3071,34 @@ def _model_flow_copilot(config, current_model=""): effective_base = pconfig.inference_base_url catalog = fetch_github_model_catalog(api_key) - live_models = [item.get("id", "") for item in catalog if item.get("id")] if catalog else fetch_api_models(api_key, effective_base) - normalized_current_model = normalize_copilot_model_id( - current_model, - catalog=catalog, - api_key=api_key, - ) or current_model + live_models = ( + [item.get("id", "") for item in catalog if item.get("id")] + if catalog + else fetch_api_models(api_key, effective_base) + ) + normalized_current_model = ( + normalize_copilot_model_id( + current_model, + catalog=catalog, + api_key=api_key, + ) + or current_model + ) if live_models: model_list = [model_id for model_id in live_models if model_id] print(f" Found {len(model_list)} model(s) from GitHub Copilot") else: model_list = _PROVIDER_MODELS.get(provider_id, []) if model_list: - print(" ⚠ Could not auto-detect models from GitHub Copilot — showing defaults.") + print( + " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults." + ) print(' Use "Enter custom model name" if you do not see your model.') if model_list: - selected = _prompt_model_selection(model_list, current_model=normalized_current_model) + selected = _prompt_model_selection( + model_list, current_model=normalized_current_model + ) else: try: selected = input("Model name: ").strip() @@ -2175,11 +3106,14 @@ def _model_flow_copilot(config, current_model=""): selected = None if selected: - selected = normalize_copilot_model_id( - selected, - catalog=catalog, - api_key=api_key, - ) or selected + selected = ( + normalize_copilot_model_id( + selected, + catalog=catalog, + api_key=api_key, + ) + or selected + ) initial_cfg = load_config() current_effort = _current_reasoning_effort(initial_cfg) reasoning_efforts = github_model_reasoning_efforts( @@ -2246,7 +3180,9 @@ def _model_flow_copilot_acp(config, current_model=""): pconfig = PROVIDER_REGISTRY[provider_id] status = get_external_process_provider_status(provider_id) - resolved_command = status.get("resolved_command") or status.get("command") or "copilot" + resolved_command = ( + status.get("resolved_command") or status.get("command") or "copilot" + ) effective_base = status.get("base_url") or pconfig.inference_base_url print(" GitHub Copilot ACP delegates Hermes turns to `copilot --acp`.") @@ -2260,7 +3196,9 @@ def _model_flow_copilot_acp(config, current_model=""): creds = resolve_external_process_provider_credentials(provider_id) except Exception as exc: print(f" ⚠ {exc}") - print(" Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere.") + print( + " Set HERMES_COPILOT_ACP_COMMAND or COPILOT_CLI_PATH if Copilot CLI is installed elsewhere." + ) return effective_base = creds.get("base_url") or effective_base @@ -2273,11 +3211,14 @@ def _model_flow_copilot_acp(config, current_model=""): pass catalog = fetch_github_model_catalog(catalog_api_key) - normalized_current_model = normalize_copilot_model_id( - current_model, - catalog=catalog, - api_key=catalog_api_key, - ) or current_model + normalized_current_model = ( + normalize_copilot_model_id( + current_model, + catalog=catalog, + api_key=catalog_api_key, + ) + or current_model + ) if catalog: model_list = [item.get("id", "") for item in catalog if item.get("id")] @@ -2285,7 +3226,9 @@ def _model_flow_copilot_acp(config, current_model=""): else: model_list = _PROVIDER_MODELS.get("copilot", []) if model_list: - print(" ⚠ Could not auto-detect models from GitHub Copilot — showing defaults.") + print( + " ⚠ Could not auto-detect models from GitHub Copilot — showing defaults." + ) print(' Use "Enter custom model name" if you do not see your model.') if model_list: @@ -2303,11 +3246,14 @@ def _model_flow_copilot_acp(config, current_model=""): print("No change.") return - selected = normalize_copilot_model_id( - selected, - catalog=catalog, - api_key=catalog_api_key, - ) or selected + selected = ( + normalize_copilot_model_id( + selected, + catalog=catalog, + api_key=catalog_api_key, + ) + or selected + ) _save_model_choice(selected) cfg = load_config() @@ -2333,10 +3279,18 @@ def _model_flow_kimi(config, current_model=""): No manual base URL prompt — endpoint is determined by key prefix. """ from hermes_cli.auth import ( - PROVIDER_REGISTRY, KIMI_CODE_BASE_URL, _prompt_model_selection, - _save_model_choice, deactivate_provider, + PROVIDER_REGISTRY, + KIMI_CODE_BASE_URL, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + get_env_value, + save_env_value, + load_config, + save_config, ) - from hermes_cli.config import get_env_value, save_env_value, load_config, save_config provider_id = "kimi-coding" pconfig = PROVIDER_REGISTRY[provider_id] @@ -2355,6 +3309,7 @@ def _model_flow_kimi(config, current_model=""): if key_env: try: import getpass + new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip() except (KeyboardInterrupt, EOFError): print() @@ -2385,10 +3340,10 @@ def _model_flow_kimi(config, current_model=""): # Step 3: Model selection — show appropriate models for the endpoint if is_coding_plan: - # Coding Plan models (kimi-for-coding first) + # Coding Plan models (kimi-k2.5 first) model_list = [ - "kimi-for-coding", "kimi-k2.5", + "kimi-for-coding", "kimi-k2-thinking", "kimi-k2-thinking-turbo", ] @@ -2425,14 +3380,296 @@ def _model_flow_kimi(config, current_model=""): print("No change.") +def _model_flow_bedrock_api_key(config, region, current_model=""): + """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint. + + For developers who don't have an AWS account but received a Bedrock API Key + from their AWS admin. Works like any OpenAI-compatible endpoint. + """ + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import ( + load_config, + save_config, + get_env_value, + save_env_value, + ) + from hermes_cli.models import _PROVIDER_MODELS + + mantle_base_url = f"https://bedrock-mantle.{region}.api.aws/v1" + + # Prompt for API key + existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or "" + if existing_key: + print(f" Bedrock API Key: {existing_key[:12]}... ✓") + else: + print(f" Endpoint: {mantle_base_url}") + print() + try: + import getpass + + api_key = getpass.getpass(" Bedrock API Key: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not api_key: + print(" Cancelled.") + return + save_env_value("AWS_BEARER_TOKEN_BEDROCK", api_key) + existing_key = api_key + print(" ✓ API key saved.") + print() + + # Model selection — use static list (mantle doesn't need boto3 for discovery) + model_list = _PROVIDER_MODELS.get("bedrock", []) + print(f" Showing {len(model_list)} curated models") + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input(" Model ID: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + # Save as custom provider pointing to bedrock-mantle + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "custom" + model["base_url"] = mantle_base_url + model.pop("api_mode", None) # chat_completions is the default + + # Also save region in bedrock config for reference + bedrock_cfg = cfg.get("bedrock", {}) + if not isinstance(bedrock_cfg, dict): + bedrock_cfg = {} + bedrock_cfg["region"] = region + cfg["bedrock"] = bedrock_cfg + + # Save the API key env var name so hermes knows where to find it + save_env_value("OPENAI_API_KEY", existing_key) + save_env_value("OPENAI_BASE_URL", mantle_base_url) + + save_config(cfg) + deactivate_provider() + + print(f" Default model set to: {selected} (via Bedrock API Key, {region})") + print(f" Endpoint: {mantle_base_url}") + else: + print(" No change.") + + +def _model_flow_bedrock(config, current_model=""): + """AWS Bedrock provider: verify credentials, pick region, discover models. + + Uses the native Converse API via boto3 — not the OpenAI-compatible endpoint. + Auth is handled by the AWS SDK default credential chain (env vars, profile, + instance role), so no API key prompt is needed. + """ + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import load_config, save_config + from hermes_cli.models import _PROVIDER_MODELS + + # 1. Check for AWS credentials + try: + from agent.bedrock_adapter import ( + has_aws_credentials, + resolve_aws_auth_env_var, + resolve_bedrock_region, + discover_bedrock_models, + ) + except ImportError: + print(" ✗ boto3 is not installed. Install it with:") + print(" pip install boto3") + print() + return + + if not has_aws_credentials(): + print(" ⚠ No AWS credentials detected via environment variables.") + print(" Bedrock will use boto3's default credential chain (IMDS, SSO, etc.)") + print() + + auth_var = resolve_aws_auth_env_var() + if auth_var: + print(f" AWS credentials: {auth_var} ✓") + else: + print(" AWS credentials: boto3 default chain (instance role / SSO)") + print() + + # 2. Region selection + current_region = resolve_bedrock_region() + try: + region_input = input(f" AWS Region [{current_region}]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + region = region_input or current_region + + # 2b. Authentication mode + print(" Choose authentication method:") + print() + print(" 1. IAM credential chain (recommended)") + print(" Works with EC2 instance roles, SSO, env vars, aws configure") + print(" 2. Bedrock API Key") + print(" Enter your Bedrock API Key directly — also supports") + print(" team scenarios where an admin distributes keys") + print() + try: + auth_choice = input(" Choice [1]: ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + + if auth_choice == "2": + _model_flow_bedrock_api_key(config, region, current_model) + return + + # 3. Model discovery — try live API first, fall back to static list + print(f" Discovering models in {region}...") + live_models = discover_bedrock_models(region) + + if live_models: + _EXCLUDE_PREFIXES = ( + "stability.", + "cohere.embed", + "twelvelabs.", + "us.stability.", + "us.cohere.embed", + "us.twelvelabs.", + "global.cohere.embed", + "global.twelvelabs.", + ) + _EXCLUDE_SUBSTRINGS = ("safeguard", "voxtral", "palmyra-vision") + filtered = [] + for m in live_models: + mid = m["id"] + if any(mid.startswith(p) for p in _EXCLUDE_PREFIXES): + continue + if any(s in mid.lower() for s in _EXCLUDE_SUBSTRINGS): + continue + filtered.append(m) + + # Deduplicate: prefer inference profiles (us.*, global.*) over bare + # foundation model IDs. + profile_base_ids = set() + for m in filtered: + mid = m["id"] + if mid.startswith(("us.", "global.")): + base = mid.split(".", 1)[1] if "." in mid[3:] else mid + profile_base_ids.add(base) + + deduped = [] + for m in filtered: + mid = m["id"] + if not mid.startswith(("us.", "global.")) and mid in profile_base_ids: + continue + deduped.append(m) + + _RECOMMENDED = [ + "us.anthropic.claude-sonnet-4-6", + "us.anthropic.claude-opus-4-6", + "us.anthropic.claude-haiku-4-5", + "us.amazon.nova-pro", + "us.amazon.nova-lite", + "us.amazon.nova-micro", + "deepseek.v3", + "us.meta.llama4-maverick", + "us.meta.llama4-scout", + ] + + def _sort_key(m): + mid = m["id"] + for i, rec in enumerate(_RECOMMENDED): + if mid.startswith(rec): + return (0, i, mid) + if mid.startswith("global."): + return (1, 0, mid) + return (2, 0, mid) + + deduped.sort(key=_sort_key) + model_list = [m["id"] for m in deduped] + print( + f" Found {len(model_list)} text model(s) (filtered from {len(live_models)} total)" + ) + else: + model_list = _PROVIDER_MODELS.get("bedrock", []) + if model_list: + print( + f" Using {len(model_list)} curated models (live discovery unavailable)" + ) + else: + print( + " No models found. Check IAM permissions for bedrock:ListFoundationModels." + ) + return + + # 4. Model selection + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input(" Model ID: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "bedrock" + model["base_url"] = f"https://bedrock-runtime.{region}.amazonaws.com" + model.pop("api_mode", None) # bedrock_converse is auto-detected + + bedrock_cfg = cfg.get("bedrock", {}) + if not isinstance(bedrock_cfg, dict): + bedrock_cfg = {} + bedrock_cfg["region"] = region + cfg["bedrock"] = bedrock_cfg + + save_config(cfg) + deactivate_provider() + + print(f" Default model set to: {selected} (via AWS Bedrock, {region})") + else: + print(" No change.") + + def _model_flow_api_key_provider(config, provider_id, current_model=""): """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.).""" from hermes_cli.auth import ( - PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice, + PROVIDER_REGISTRY, + _prompt_model_selection, + _save_model_choice, deactivate_provider, ) - from hermes_cli.config import get_env_value, save_env_value, load_config, save_config - from hermes_cli.models import fetch_api_models, opencode_model_api_mode, normalize_opencode_model_id + from hermes_cli.config import ( + get_env_value, + save_env_value, + load_config, + save_config, + ) + from hermes_cli.models import ( + fetch_api_models, + opencode_model_api_mode, + normalize_opencode_model_id, + ) pconfig = PROVIDER_REGISTRY[provider_id] key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" @@ -2450,6 +3687,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): if key_env: try: import getpass + new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip() except (KeyboardInterrupt, EOFError): print() @@ -2477,7 +3715,9 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): override = "" if override and base_url_env: if not override.startswith(("http://", "https://")): - print(" Invalid URL — must start with http:// or https://. Keeping current value.") + print( + " Invalid URL — must start with http:// or https://. Keeping current value." + ) else: save_env_value(base_url_env, override) effective_base = override @@ -2486,37 +3726,58 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): # 1. models.dev registry (cached, filtered for agentic/tool-capable models) # 2. Curated static fallback list (offline insurance) # 3. Live /models endpoint probe (small providers without models.dev data) - curated = _PROVIDER_MODELS.get(provider_id, []) + # + # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache) + if provider_id == "ollama-cloud": + from hermes_cli.models import fetch_ollama_cloud_models - # Try models.dev first — returns tool-capable models, filtered for noise - mdev_models: list = [] - try: - from agent.models_dev import list_agentic_models - mdev_models = list_agentic_models(provider_id) - except Exception: - pass - - if mdev_models: - model_list = mdev_models - print(f" Found {len(model_list)} model(s) from models.dev registry") - elif curated and len(curated) >= 8: - # Curated list is substantial — use it directly, skip live probe - model_list = curated - print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") - else: api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") - live_models = fetch_api_models(api_key_for_probe, effective_base) - if live_models and len(live_models) >= len(curated): - model_list = live_models - print(f" Found {len(model_list)} model(s) from {pconfig.name} API") - else: + model_list = fetch_ollama_cloud_models( + api_key=api_key_for_probe, base_url=effective_base + ) + if model_list: + print(f" Found {len(model_list)} model(s) from Ollama Cloud") + else: + curated = _PROVIDER_MODELS.get(provider_id, []) + + # Try models.dev first — returns tool-capable models, filtered for noise + mdev_models: list = [] + try: + from agent.models_dev import list_agentic_models + + mdev_models = list_agentic_models(provider_id) + except Exception: + pass + + if mdev_models: + model_list = mdev_models + print(f" Found {len(model_list)} model(s) from models.dev registry") + elif curated and len(curated) >= 8: + # Curated list is substantial — use it directly, skip live probe model_list = curated - if model_list: - print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") - # else: no defaults either, will fall through to raw input + print( + f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' + ) + else: + api_key_for_probe = existing_key or ( + get_env_value(key_env) if key_env else "" + ) + live_models = fetch_api_models(api_key_for_probe, effective_base) + if live_models and len(live_models) >= len(curated): + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + model_list = curated + if model_list: + print( + f' Showing {len(model_list)} curated models — use "Enter custom model name" for others.' + ) + # else: no defaults either, will fall through to raw input if provider_id in {"opencode-zen", "opencode-go"}: - model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list] + model_list = [ + normalize_opencode_model_id(provider_id, mid) for mid in model_list + ] current_model = normalize_opencode_model_id(provider_id, current_model) model_list = list(dict.fromkeys(mid for mid in model_list if mid)) @@ -2572,13 +3833,15 @@ def _run_anthropic_oauth_flow(save_env_value): except Exception: creds = None if creds and ( - is_claude_code_token_valid(creds) - or bool(creds.get("refreshToken")) + is_claude_code_token_valid(creds) or bool(creds.get("refreshToken")) ): use_anthropic_claude_code_credentials(save_fn=save_env_value) print(" ✓ Claude Code credentials linked.") from hermes_constants import display_hermes_home as _dhh_fn - print(f" Hermes will use Claude's credential store directly instead of copying a setup-token into {_dhh_fn()}/.env.") + + print( + f" Hermes will use Claude's credential store directly instead of copying a setup-token into {_dhh_fn()}/.env." + ) return True return False @@ -2601,7 +3864,10 @@ def _run_anthropic_oauth_flow(save_env_value): print() try: import getpass - manual_token = getpass.getpass(" Paste setup-token (or Enter to cancel): ").strip() + + manual_token = getpass.getpass( + " Paste setup-token (or Enter to cancel): " + ).strip() except (KeyboardInterrupt, EOFError): print() return False @@ -2629,6 +3895,7 @@ def _run_anthropic_oauth_flow(save_env_value): print() try: import getpass + token = getpass.getpass(" Setup-token (or Enter to cancel): ").strip() except (KeyboardInterrupt, EOFError): print() @@ -2644,21 +3911,29 @@ def _run_anthropic_oauth_flow(save_env_value): def _model_flow_anthropic(config, current_model=""): """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds.""" from hermes_cli.auth import ( - _prompt_model_selection, _save_model_choice, + _prompt_model_selection, + _save_model_choice, deactivate_provider, ) from hermes_cli.config import ( - save_env_value, load_config, save_config, + save_env_value, + load_config, + save_config, save_anthropic_api_key, ) from hermes_cli.models import _PROVIDER_MODELS # Check ALL credential sources from hermes_cli.auth import get_anthropic_key + existing_key = get_anthropic_key() cc_available = False try: - from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid + from agent.anthropic_adapter import ( + read_claude_code_credentials, + is_claude_code_token_valid, + ) + cc_creds = read_claude_code_credentials() if cc_creds and is_claude_code_token_valid(cc_creds): cc_available = True @@ -2711,10 +3986,11 @@ def _model_flow_anthropic(config, current_model=""): elif choice == "2": print() - print(" Get an API key at: https://console.anthropic.com/settings/keys") + print(" Get an API key at: https://platform.claude.com/settings/keys") print() try: import getpass + api_key = getpass.getpass(" API key (sk-ant-...): ").strip() except (KeyboardInterrupt, EOFError): print() @@ -2765,60 +4041,70 @@ def _model_flow_anthropic(config, current_model=""): def cmd_login(args): """Authenticate Hermes CLI with a provider.""" from hermes_cli.auth import login_command + login_command(args) def cmd_logout(args): """Clear provider authentication.""" from hermes_cli.auth import logout_command + logout_command(args) def cmd_auth(args): """Manage pooled credentials.""" from hermes_cli.auth_commands import auth_command + auth_command(args) def cmd_status(args): """Show status of all components.""" from hermes_cli.status import show_status + show_status(args) def cmd_cron(args): """Cron job management.""" from hermes_cli.cron import cron_command + cron_command(args) def cmd_webhook(args): """Webhook subscription management.""" from hermes_cli.webhook import webhook_command + webhook_command(args) def cmd_doctor(args): """Check configuration and dependencies.""" from hermes_cli.doctor import run_doctor + run_doctor(args) def cmd_dump(args): """Dump setup summary for support/debugging.""" from hermes_cli.dump import run_dump + run_dump(args) def cmd_debug(args): """Debug tools (share report, etc.).""" from hermes_cli.debug import run_debug + run_debug(args) def cmd_config(args): """Configuration management.""" from hermes_cli.config import config_command + config_command(args) @@ -2826,15 +4112,18 @@ def cmd_backup(args): """Back up Hermes home directory to a zip file.""" if getattr(args, "quick", False): from hermes_cli.backup import run_quick_backup + run_quick_backup(args) else: from hermes_cli.backup import run_backup + run_backup(args) def cmd_import(args): """Restore a Hermes backup from a zip file.""" from hermes_cli.backup import run_import + run_import(args) @@ -2842,13 +4131,14 @@ def cmd_version(args): """Show version.""" print(f"Hermes Agent v{__version__} ({__release_date__})") print(f"Project: {PROJECT_ROOT}") - + # Show Python version print(f"Python: {sys.version.split()[0]}") - + # Check for key dependencies try: import openai + print(f"OpenAI SDK: {openai.__version__}") except ImportError: print("OpenAI SDK: Not installed") @@ -2857,6 +4147,7 @@ def cmd_version(args): try: from hermes_cli.banner import check_for_updates from hermes_cli.config import recommended_update_command + behind = check_for_updates() if behind and behind > 0: commits_word = "commit" if behind == 1 else "commits" @@ -2874,6 +4165,7 @@ def cmd_uninstall(args): """Uninstall Hermes Agent.""" _require_tty("uninstall") from hermes_cli.uninstall import run_uninstall + run_uninstall(args) @@ -2891,12 +4183,14 @@ def _clear_bytecode_cache(root: Path) -> int: for dirpath, dirnames, _ in os.walk(root): # Skip venv / node_modules / .git entirely dirnames[:] = [ - d for d in dirnames + d + for d in dirnames if d not in ("venv", ".venv", "node_modules", ".git", ".worktrees") ] if os.path.basename(dirpath) == "__pycache__": try: import shutil as _shutil + _shutil.rmtree(dirpath) removed += 1 except OSError: @@ -2937,6 +4231,7 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0) # Poll for response import time as _time + deadline = _time.monotonic() + timeout while _time.monotonic() < deadline: if response_path.exists(): @@ -2969,6 +4264,7 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: if not (web_dir / "package.json").exists(): return True import shutil + npm = shutil.which("npm") if not npm: if fatal: @@ -2978,15 +4274,19 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: print("→ Building web UI...") r1 = subprocess.run([npm, "install", "--silent"], cwd=web_dir, capture_output=True) if r1.returncode != 0: - print(f" {'✗' if fatal else '⚠'} Web UI npm install failed" - + ("" if fatal else " (hermes web will not be available)")) + print( + f" {'✗' if fatal else '⚠'} Web UI npm install failed" + + ("" if fatal else " (hermes web will not be available)") + ) if fatal: print(" Run manually: cd web && npm install && npm run build") return False r2 = subprocess.run([npm, "run", "build"], cwd=web_dir, capture_output=True) if r2.returncode != 0: - print(f" {'✗' if fatal else '⚠'} Web UI build failed" - + ("" if fatal else " (hermes web will not be available)")) + print( + f" {'✗' if fatal else '⚠'} Web UI build failed" + + ("" if fatal else " (hermes web will not be available)") + ) if fatal: print(" Run manually: cd web && npm install && npm run build") return False @@ -2996,34 +4296,41 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: def _update_via_zip(args): """Update Hermes Agent by downloading a ZIP archive. - - Used on Windows when git file I/O is broken (antivirus, NTFS filter + + Used on Windows when git file I/O is broken (antivirus, NTFS filter drivers causing 'Invalid argument' errors on file creation). """ import shutil import tempfile import zipfile from urllib.request import urlretrieve - + branch = "main" - zip_url = f"https://github.com/NousResearch/hermes-agent/archive/refs/heads/{branch}.zip" - + zip_url = ( + f"https://github.com/NousResearch/hermes-agent/archive/refs/heads/{branch}.zip" + ) + print("→ Downloading latest version...") try: tmp_dir = tempfile.mkdtemp(prefix="hermes-update-") zip_path = os.path.join(tmp_dir, f"hermes-agent-{branch}.zip") urlretrieve(zip_url, zip_path) - + print("→ Extracting...") - with zipfile.ZipFile(zip_path, 'r') as zf: + with zipfile.ZipFile(zip_path, "r") as zf: # Validate paths to prevent zip-slip (path traversal) tmp_dir_real = os.path.realpath(tmp_dir) for member in zf.infolist(): member_path = os.path.realpath(os.path.join(tmp_dir, member.filename)) - if not member_path.startswith(tmp_dir_real + os.sep) and member_path != tmp_dir_real: - raise ValueError(f"Zip-slip detected: {member.filename} escapes extraction directory") + if ( + not member_path.startswith(tmp_dir_real + os.sep) + and member_path != tmp_dir_real + ): + raise ValueError( + f"Zip-slip detected: {member.filename} escapes extraction directory" + ) zf.extractall(tmp_dir) - + # GitHub ZIPs extract to hermes-agent-/ extracted = os.path.join(tmp_dir, f"hermes-agent-{branch}") if not os.path.isdir(extracted): @@ -3033,9 +4340,9 @@ def _update_via_zip(args): if os.path.isdir(candidate) and d != "__MACOSX": extracted = candidate break - + # Copy updated files over existing installation, preserving venv/node_modules/.git - preserve = {'venv', 'node_modules', '.git', '.env'} + preserve = {"venv", "node_modules", ".git", ".env"} update_count = 0 for item in os.listdir(extracted): if item in preserve: @@ -3049,12 +4356,12 @@ def _update_via_zip(args): else: shutil.copy2(src, dst) update_count += 1 - + print(f"✓ Updated {update_count} items from ZIP") - + # Cleanup shutil.rmtree(tmp_dir, ignore_errors=True) - + except Exception as e: print(f"✗ ZIP update failed: {e}") sys.exit(1) @@ -3062,13 +4369,16 @@ def _update_via_zip(args): # Clear stale bytecode after ZIP extraction removed = _clear_bytecode_cache(PROJECT_ROOT) if removed: - print(f" ✓ Cleared {removed} stale __pycache__ director{'y' if removed == 1 else 'ies'}") - + print( + f" ✓ Cleared {removed} stale __pycache__ director{'y' if removed == 1 else 'ies'}" + ) + # Reinstall Python dependencies. Prefer .[all], but if one optional extra # breaks on this machine, keep base deps and reinstall the remaining extras # individually so update does not silently strip working capabilities. print("→ Updating Python dependencies...") import subprocess + uv_bin = shutil.which("uv") if uv_bin: uv_env = {**os.environ, "VIRTUAL_ENV": str(PROJECT_ROOT / "venv")} @@ -3080,7 +4390,12 @@ def _update_via_zip(args): # ensurepip before trying the editable install. pip_cmd = [sys.executable, "-m", "pip"] try: - subprocess.run(pip_cmd + ["--version"], cwd=PROJECT_ROOT, check=True, capture_output=True) + subprocess.run( + pip_cmd + ["--version"], + cwd=PROJECT_ROOT, + check=True, + capture_output=True, + ) except subprocess.CalledProcessError: subprocess.run( [sys.executable, "-m", "ensurepip", "--upgrade", "--default-pip"], @@ -3089,18 +4404,21 @@ def _update_via_zip(args): ) _install_python_dependencies_with_optional_fallback(pip_cmd) - # Build web UI frontend (optional — requires npm) + _update_node_dependencies() _build_web_ui(PROJECT_ROOT / "web") # Sync skills try: from tools.skills_sync import sync_skills + print("→ Syncing bundled skills...") result = sync_skills(quiet=True) if result["copied"]: print(f" + {len(result['copied'])} new: {', '.join(result['copied'])}") if result.get("updated"): - print(f" ↑ {len(result['updated'])} updated: {', '.join(result['updated'])}") + print( + f" ↑ {len(result['updated'])} updated: {', '.join(result['updated'])}" + ) if result.get("user_modified"): print(f" ~ {len(result['user_modified'])} user-modified (kept)") if result.get("cleaned"): @@ -3109,7 +4427,7 @@ def _update_via_zip(args): print(" ✓ Skills are up to date") except Exception: pass - + print() print("✓ Update complete!") @@ -3141,7 +4459,9 @@ def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[st from datetime import datetime, timezone - stash_name = datetime.now(timezone.utc).strftime("hermes-update-autostash-%Y%m%d-%H%M%S") + stash_name = datetime.now(timezone.utc).strftime( + "hermes-update-autostash-%Y%m%d-%H%M%S" + ) print("→ Local changes detected — stashing before update...") subprocess.run( git_cmd + ["stash", "push", "--include-untracked", "-m", stash_name], @@ -3158,8 +4478,9 @@ def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[st return stash_ref - -def _resolve_stash_selector(git_cmd: list[str], cwd: Path, stash_ref: str) -> Optional[str]: +def _resolve_stash_selector( + git_cmd: list[str], cwd: Path, stash_ref: str +) -> Optional[str]: stash_list = subprocess.run( git_cmd + ["stash", "list", "--format=%gd %H"], cwd=cwd, @@ -3174,15 +4495,19 @@ def _resolve_stash_selector(git_cmd: list[str], cwd: Path, stash_ref: str) -> Op return None - -def _print_stash_cleanup_guidance(stash_ref: str, stash_selector: Optional[str] = None) -> None: - print(" Check `git status` first so you don't accidentally reapply the same change twice.") +def _print_stash_cleanup_guidance( + stash_ref: str, stash_selector: Optional[str] = None +) -> None: + print( + " Check `git status` first so you don't accidentally reapply the same change twice." + ) print(" Find the saved entry with: git stash list --format='%gd %H %s'") if stash_selector: print(f" Remove it with: git stash drop {stash_selector}") else: - print(f" Look for commit {stash_ref}, then drop its selector with: git stash drop stash@{{N}}") - + print( + f" Look for commit {stash_ref}, then drop its selector with: git stash drop stash@{{N}}" + ) def _restore_stashed_changes( @@ -3195,7 +4520,9 @@ def _restore_stashed_changes( if prompt_user: print() print("⚠ Local changes were stashed before updating.") - print(" Restoring them may reapply local customizations onto the updated codebase.") + print( + " Restoring them may reapply local customizations onto the updated codebase." + ) print(" Review the result afterward if Hermes behaves unexpectedly.") print("Restore local changes now? [Y/n]") if input_fn is not None: @@ -3259,8 +4586,12 @@ def _restore_stashed_changes( stash_selector = _resolve_stash_selector(git_cmd, cwd, stash_ref) if stash_selector is None: - print("⚠ Local changes were restored, but Hermes couldn't find the stash entry to drop.") - print(" The stash was left in place. You can remove it manually after checking the result.") + print( + "⚠ Local changes were restored, but Hermes couldn't find the stash entry to drop." + ) + print( + " The stash was left in place. You can remove it manually after checking the result." + ) _print_stash_cleanup_guidance(stash_ref) else: drop = subprocess.run( @@ -3270,18 +4601,23 @@ def _restore_stashed_changes( text=True, ) if drop.returncode != 0: - print("⚠ Local changes were restored, but Hermes couldn't drop the saved stash entry.") + print( + "⚠ Local changes were restored, but Hermes couldn't drop the saved stash entry." + ) if drop.stdout.strip(): print(drop.stdout.strip()) if drop.stderr.strip(): print(drop.stderr.strip()) - print(" The stash was left in place. You can remove it manually after checking the result.") + print( + " The stash was left in place. You can remove it manually after checking the result." + ) _print_stash_cleanup_guidance(stash_ref, stash_selector) print("⚠ Local changes were restored on top of the updated codebase.") print(" Review `git diff` / `git status` if Hermes behaves unexpectedly.") return True + # ========================================================================= # Fork detection and upstream management for `hermes update` # ========================================================================= @@ -3376,6 +4712,7 @@ def _count_commits_between(git_cmd: list[str], cwd: Path, base: str, head: str) def _should_skip_upstream_prompt() -> bool: """Check if user previously declined to add upstream.""" from hermes_constants import get_hermes_home + return (get_hermes_home() / SKIP_UPSTREAM_PROMPT_FILE).exists() @@ -3383,6 +4720,7 @@ def _mark_skip_upstream_prompt(): """Create marker file to skip future upstream prompts.""" try: from hermes_constants import get_hermes_home + (get_hermes_home() / SKIP_UPSTREAM_PROMPT_FILE).touch() except Exception: pass @@ -3427,7 +4765,9 @@ def _sync_with_upstream_if_needed(git_cmd: list[str], cwd: Path) -> None: print(" This means you may miss updates from NousResearch/hermes-agent.") print() try: - response = input("Add official repo as 'upstream' remote? [Y/n]: ").strip().lower() + response = ( + input("Add official repo as 'upstream' remote? [Y/n]: ").strip().lower() + ) except (EOFError, KeyboardInterrupt): print() response = "n" @@ -3435,13 +4775,17 @@ def _sync_with_upstream_if_needed(git_cmd: list[str], cwd: Path) -> None: if response in ("", "y", "yes"): print("→ Adding upstream remote...") if _add_upstream_remote(git_cmd, cwd): - print(" ✓ Added upstream: https://github.com/NousResearch/hermes-agent.git") + print( + " ✓ Added upstream: https://github.com/NousResearch/hermes-agent.git" + ) has_upstream = True else: print(" ✗ Failed to add upstream remote. Skipping upstream sync.") return else: - print(" Skipped. Run 'git remote add upstream https://github.com/NousResearch/hermes-agent.git' to add later.") + print( + " Skipped. Run 'git remote add upstream https://github.com/NousResearch/hermes-agent.git' to add later." + ) _mark_skip_upstream_prompt() return @@ -3461,7 +4805,9 @@ def _sync_with_upstream_if_needed(git_cmd: list[str], cwd: Path) -> None: # Compare origin/main with upstream/main origin_ahead = _count_commits_between(git_cmd, cwd, "upstream/main", "origin/main") - upstream_ahead = _count_commits_between(git_cmd, cwd, "origin/main", "upstream/main") + upstream_ahead = _count_commits_between( + git_cmd, cwd, "origin/main", "upstream/main" + ) if origin_ahead < 0 or upstream_ahead < 0: print(" ✗ Could not compare branches. Skipping upstream sync.") @@ -3493,7 +4839,9 @@ def _sync_with_upstream_if_needed(git_cmd: list[str], cwd: Path) -> None: check=True, ) except subprocess.CalledProcessError: - print(" ✗ Failed to pull from upstream. You may need to resolve conflicts manually.") + print( + " ✗ Failed to pull from upstream. You may need to resolve conflicts manually." + ) return print(" ✓ Updated from upstream") @@ -3503,7 +4851,9 @@ def _sync_with_upstream_if_needed(git_cmd: list[str], cwd: Path) -> None: if _sync_fork_with_upstream(git_cmd, cwd): print(" ✓ Fork synced with upstream") else: - print(" ℹ Got updates from upstream but couldn't push to fork (no write access?)") + print( + " ℹ Got updates from upstream but couldn't push to fork (no write access?)" + ) print(" Your local repo is updated, but your fork on GitHub may be behind.") @@ -3517,6 +4867,7 @@ def _invalidate_update_cache(): homes = [] # Default profile home (Docker-aware — uses /opt/data in Docker) from hermes_constants import get_default_hermes_root + default_home = get_default_hermes_root() homes.append(default_home) # Named profiles under /profiles/ @@ -3544,6 +4895,7 @@ def _load_installable_optional_extras() -> list[str]: """ try: import tomllib + with (PROJECT_ROOT / "pyproject.toml").open("rb") as handle: project = tomllib.load(handle).get("project", {}) except Exception: @@ -3566,7 +4918,6 @@ def _load_installable_optional_extras() -> list[str]: return referenced - def _install_python_dependencies_with_optional_fallback( install_cmd_prefix: list[str], *, @@ -3582,7 +4933,9 @@ def _install_python_dependencies_with_optional_fallback( ) return except subprocess.CalledProcessError: - print(" ⚠ Optional extras failed, reinstalling base dependencies and retrying extras individually...") + print( + " ⚠ Optional extras failed, reinstalling base dependencies and retrying extras individually..." + ) subprocess.run( install_cmd_prefix + ["install", "-e", ".", "--quiet"], @@ -3606,14 +4959,230 @@ def _install_python_dependencies_with_optional_fallback( failed_extras.append(extra) if installed_extras: - print(f" ✓ Reinstalled optional extras individually: {', '.join(installed_extras)}") + print( + f" ✓ Reinstalled optional extras individually: {', '.join(installed_extras)}" + ) if failed_extras: - print(f" ⚠ Skipped optional extras that still failed: {', '.join(failed_extras)}") + print( + f" ⚠ Skipped optional extras that still failed: {', '.join(failed_extras)}" + ) + + +def _update_node_dependencies() -> None: + npm = shutil.which("npm") + if not npm: + return + + paths = ( + ("repo root", PROJECT_ROOT), + ("ui-tui", PROJECT_ROOT / "ui-tui"), + ) + if not any((path / "package.json").exists() for _, path in paths): + return + + print("→ Updating Node.js dependencies...") + for label, path in paths: + if not (path / "package.json").exists(): + continue + + result = subprocess.run( + [npm, "install", "--silent", "--no-fund", "--no-audit", "--progress=false"], + cwd=path, + capture_output=True, + text=True, + check=False, + ) + if result.returncode == 0: + print(f" ✓ {label}") + continue + + print(f" ⚠ npm install failed in {label}") + stderr = (result.stderr or "").strip() + if stderr: + print(f" {stderr.splitlines()[-1]}") + + +class _UpdateOutputStream: + """Stream wrapper used during ``hermes update`` to survive terminal loss. + + Wraps the process's original stdout/stderr so that: + + * Every write is also mirrored to an append-only log file + (``~/.hermes/logs/update.log``) that users can inspect after the + terminal disconnects. + * Writes to the original stream that fail with ``BrokenPipeError`` / + ``OSError`` / ``ValueError`` (closed file) no longer cascade into + process exit — the update keeps going, only the on-screen output + stops. + + Combined with ``SIGHUP -> SIG_IGN`` installed by + ``_install_hangup_protection``, this makes ``hermes update`` safe to + run in a plain SSH session that might disconnect mid-install. + """ + + def __init__(self, original, log_file): + self._original = original + self._log = log_file + self._original_broken = False + + def write(self, data): + # Mirror to the log file first — it's the most reliable destination. + if self._log is not None: + try: + self._log.write(data) + except Exception: + # Log errors should never abort the update. + pass + + if self._original_broken: + return len(data) if isinstance(data, (str, bytes)) else 0 + + try: + return self._original.write(data) + except (BrokenPipeError, OSError, ValueError): + # Terminal vanished (SSH disconnect, shell close). Stop trying + # to write to it, but keep the update running. + self._original_broken = True + return len(data) if isinstance(data, (str, bytes)) else 0 + + def flush(self): + if self._log is not None: + try: + self._log.flush() + except Exception: + pass + if self._original_broken: + return + try: + self._original.flush() + except (BrokenPipeError, OSError, ValueError): + self._original_broken = True + + def isatty(self): + if self._original_broken: + return False + try: + return self._original.isatty() + except Exception: + return False + + def fileno(self): + # Some tools probe fileno(); defer to the underlying stream and let + # callers handle failures (same behaviour as the unwrapped stream). + return self._original.fileno() + + def __getattr__(self, name): + return getattr(self._original, name) + + +def _install_hangup_protection(gateway_mode: bool = False): + """Protect ``cmd_update`` from SIGHUP and broken terminal pipes. + + Users commonly run ``hermes update`` in an SSH session or a terminal + that may close mid-install. Without protection, ``SIGHUP`` from the + terminal kills the Python process during ``pip install`` and leaves + the venv half-installed; the documented workaround ("use screen / + tmux") shouldn't be required for something as routine as an update. + + Protections installed: + + 1. ``SIGHUP`` is set to ``SIG_IGN``. POSIX preserves ``SIG_IGN`` + across ``exec()``, so pip and git subprocesses also stop dying on + hangup. + 2. ``sys.stdout`` / ``sys.stderr`` are wrapped to mirror output to + ``~/.hermes/logs/update.log`` and to silently absorb + ``BrokenPipeError`` when the terminal vanishes. + + ``SIGINT`` (Ctrl-C) and ``SIGTERM`` (systemd shutdown) are + **intentionally left alone** — those are legitimate cancellation + signals the user or OS sent on purpose. + + In gateway mode (``hermes update --gateway``) the update is already + spawned detached from a terminal, so this function is a no-op. + + Returns a dict that ``cmd_update`` can pass to + ``_finalize_update_output`` on exit. Returning a dict rather than a + tuple keeps the call site forward-compatible with future additions. + """ + state = { + "prev_stdout": sys.stdout, + "prev_stderr": sys.stderr, + "log_file": None, + "installed": False, + } + + if gateway_mode: + return state + + import signal as _signal + + # (1) Ignore SIGHUP for the remainder of this process. + if hasattr(_signal, "SIGHUP"): + try: + _signal.signal(_signal.SIGHUP, _signal.SIG_IGN) + except (ValueError, OSError): + # Called from a non-main thread — not fatal. The update still + # runs, just without hangup protection. + pass + + # (2) Mirror output to update.log and wrap stdio for broken-pipe + # tolerance. Any failure here is non-fatal; we just skip the wrap. + try: + from hermes_cli.config import get_hermes_home + + logs_dir = get_hermes_home() / "logs" + logs_dir.mkdir(parents=True, exist_ok=True) + log_path = logs_dir / "update.log" + log_file = open(log_path, "a", buffering=1, encoding="utf-8") + + import datetime as _dt + + log_file.write( + f"\n=== hermes update started " + f"{_dt.datetime.now().isoformat(timespec='seconds')} ===\n" + ) + + state["log_file"] = log_file + sys.stdout = _UpdateOutputStream(state["prev_stdout"], log_file) + sys.stderr = _UpdateOutputStream(state["prev_stderr"], log_file) + state["installed"] = True + except Exception: + # Leave stdio untouched on any setup failure. Update continues + # without mirroring. + state["log_file"] = None + + return state + + +def _finalize_update_output(state): + """Restore stdio and close the update.log handle opened by ``_install_hangup_protection``.""" + if not state: + return + if state.get("installed"): + try: + sys.stdout = state.get("prev_stdout", sys.stdout) + except Exception: + pass + try: + sys.stderr = state.get("prev_stderr", sys.stderr) + except Exception: + pass + log_file = state.get("log_file") + if log_file is not None: + try: + log_file.flush() + log_file.close() + except Exception: + pass def cmd_update(args): - """Update Hermes Agent to the latest version.""" - import shutil + """Update Hermes Agent to the latest version. + + Thin wrapper around ``_cmd_update_impl``: installs hangup protection, + runs the update, then restores stdio on the way out (even on + ``sys.exit`` or unhandled exceptions). + """ from hermes_cli.config import is_managed, managed_error if is_managed(): @@ -3621,31 +5190,60 @@ def cmd_update(args): return gateway_mode = getattr(args, "gateway", False) + + # Protect against mid-update terminal disconnects (SIGHUP) and tolerate + # writes to a closed stdout. No-op in gateway mode. See + # _install_hangup_protection for rationale. + _update_io_state = _install_hangup_protection(gateway_mode=gateway_mode) + try: + _cmd_update_impl(args, gateway_mode=gateway_mode) + finally: + _finalize_update_output(_update_io_state) + + +def _cmd_update_impl(args, gateway_mode: bool): + """Body of ``cmd_update`` — kept separate so the wrapper can always + restore stdio even on ``sys.exit``.""" # In gateway mode, use file-based IPC for prompts instead of stdin - gw_input_fn = (lambda prompt, default="": _gateway_prompt(prompt, default)) if gateway_mode else None - + gw_input_fn = ( + (lambda prompt, default="": _gateway_prompt(prompt, default)) + if gateway_mode + else None + ) + print("⚕ Updating Hermes Agent...") print() - + # Try git-based update first, fall back to ZIP download on Windows # when git file I/O is broken (antivirus, NTFS filter drivers, etc.) use_zip_update = False - git_dir = PROJECT_ROOT / '.git' - + git_dir = PROJECT_ROOT / ".git" + if not git_dir.exists(): if sys.platform == "win32": use_zip_update = True else: print("✗ Not a git repository. Please reinstall:") - print(" curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash") + print( + " curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash" + ) sys.exit(1) - + # On Windows, git can fail with "unable to write loose object file: Invalid argument" # due to filesystem atomicity issues. Set the recommended workaround. if sys.platform == "win32" and git_dir.exists(): subprocess.run( - ["git", "-c", "windows.appendAtomically=false", "config", "windows.appendAtomically", "false"], - cwd=PROJECT_ROOT, check=False, capture_output=True + [ + "git", + "-c", + "windows.appendAtomically=false", + "config", + "windows.appendAtomically", + "false", + ], + cwd=PROJECT_ROOT, + check=False, + capture_output=True, ) # Build git command once — reused for fork detection and the update itself. @@ -3682,8 +5280,12 @@ def cmd_update(args): if "Could not resolve host" in stderr or "unable to access" in stderr: print("✗ Network error — cannot reach the remote repository.") print(f" {stderr.splitlines()[0]}" if stderr else "") - elif "Authentication failed" in stderr or "could not read Username" in stderr: - print("✗ Authentication failed — check your git credentials or SSH key.") + elif ( + "Authentication failed" in stderr or "could not read Username" in stderr + ): + print( + "✗ Authentication failed — check your git credentials or SSH key." + ) else: print(f"✗ Failed to fetch updates from origin.") if stderr: @@ -3705,7 +5307,11 @@ def cmd_update(args): # If user is on a non-main branch or detached HEAD, switch to main if current_branch != "main": - label = "detached HEAD" if current_branch == "HEAD" else f"branch '{current_branch}'" + label = ( + "detached HEAD" + if current_branch == "HEAD" + else f"branch '{current_branch}'" + ) print(f" ⚠ Currently on {label} — switching to main for update...") # Stash before checkout so uncommitted work isn't lost auto_stash_ref = _stash_local_changes_if_needed(git_cmd, PROJECT_ROOT) @@ -3738,14 +5344,19 @@ def cmd_update(args): # Restore stash and switch back to original branch if we moved if auto_stash_ref is not None: _restore_stashed_changes( - git_cmd, PROJECT_ROOT, auto_stash_ref, + git_cmd, + PROJECT_ROOT, + auto_stash_ref, prompt_user=prompt_for_restore, input_fn=gw_input_fn, ) if current_branch not in ("main", "HEAD"): subprocess.run( git_cmd + ["checkout", current_branch], - cwd=PROJECT_ROOT, capture_output=True, text=True, check=False, + cwd=PROJECT_ROOT, + capture_output=True, + text=True, + check=False, ) print("✓ Already up to date!") return @@ -3765,7 +5376,9 @@ def cmd_update(args): # ff-only failed — local and remote have diverged (e.g. upstream # force-pushed or rebase). Since local changes are already # stashed, reset to match the remote exactly. - print(" ⚠ Fast-forward not possible (history diverged), resetting to match remote...") + print( + " ⚠ Fast-forward not possible (history diverged), resetting to match remote..." + ) reset_result = subprocess.run( git_cmd + ["reset", "--hard", f"origin/{branch}"], cwd=PROJECT_ROOT, @@ -3776,7 +5389,9 @@ def cmd_update(args): print(f"✗ Failed to reset to origin/{branch}.") if reset_result.stderr.strip(): print(f" {reset_result.stderr.strip()}") - print(" Try manually: git fetch origin && git reset --hard origin/main") + print( + " Try manually: git fetch origin && git reset --hard origin/main" + ) sys.exit(1) update_succeeded = True finally: @@ -3784,7 +5399,9 @@ def cmd_update(args): # Don't attempt stash restore if the code update itself failed — # working tree is in an unknown state. if not update_succeeded: - print(f" ℹ️ Local changes preserved in stash (ref: {auto_stash_ref})") + print( + f" ℹ️ Local changes preserved in stash (ref: {auto_stash_ref})" + ) print(f" Restore manually with: git stash apply") else: _restore_stashed_changes( @@ -3794,7 +5411,7 @@ def cmd_update(args): prompt_user=prompt_for_restore, input_fn=gw_input_fn, ) - + _invalidate_update_cache() # Clear stale .pyc bytecode cache — prevents ImportError on gateway @@ -3802,12 +5419,14 @@ def cmd_update(args): # the old bytecode (e.g. get_hermes_home added to hermes_constants). removed = _clear_bytecode_cache(PROJECT_ROOT) if removed: - print(f" ✓ Cleared {removed} stale __pycache__ director{'y' if removed == 1 else 'ies'}") + print( + f" ✓ Cleared {removed} stale __pycache__ director{'y' if removed == 1 else 'ies'}" + ) # Fork upstream sync logic (only for main branch on forks) if is_fork and branch == "main": _sync_with_upstream_if_needed(git_cmd, PROJECT_ROOT) - + # Reinstall Python dependencies. Prefer .[all], but if one optional extra # breaks on this machine, keep base deps and reinstall the remaining extras # individually so update does not silently strip working capabilities. @@ -3815,7 +5434,9 @@ def cmd_update(args): uv_bin = shutil.which("uv") if uv_bin: uv_env = {**os.environ, "VIRTUAL_ENV": str(PROJECT_ROOT / "venv")} - _install_python_dependencies_with_optional_fallback([uv_bin, "pip"], env=uv_env) + _install_python_dependencies_with_optional_fallback( + [uv_bin, "pip"], env=uv_env + ) else: # Use sys.executable to explicitly call the venv's pip module, # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu. @@ -3823,7 +5444,12 @@ def cmd_update(args): # ensurepip before trying the editable install. pip_cmd = [sys.executable, "-m", "pip"] try: - subprocess.run(pip_cmd + ["--version"], cwd=PROJECT_ROOT, check=True, capture_output=True) + subprocess.run( + pip_cmd + ["--version"], + cwd=PROJECT_ROOT, + check=True, + capture_output=True, + ) except subprocess.CalledProcessError: subprocess.run( [sys.executable, "-m", "ensurepip", "--upgrade", "--default-pip"], @@ -3831,20 +5457,13 @@ def cmd_update(args): check=True, ) _install_python_dependencies_with_optional_fallback(pip_cmd) - - # Check for Node.js deps - if (PROJECT_ROOT / "package.json").exists(): - import shutil - if shutil.which("npm"): - print("→ Updating Node.js dependencies...") - subprocess.run(["npm", "install", "--silent"], cwd=PROJECT_ROOT, check=False) - # Build web UI frontend (optional — requires npm) + _update_node_dependencies() _build_web_ui(PROJECT_ROOT / "web") print() print("✓ Code updated!") - + # After git pull, source files on disk are newer than cached Python # modules in this process. Reload hermes_constants so that any lazy # import executed below (skills sync, gateway restart) sees new @@ -3852,20 +5471,24 @@ def cmd_update(args): try: import importlib import hermes_constants as _hc + importlib.reload(_hc) except Exception: pass # non-fatal — worst case a lazy import fails gracefully - + # Sync bundled skills (copies new, updates changed, respects user deletions) try: from tools.skills_sync import sync_skills + print() print("→ Syncing bundled skills...") result = sync_skills(quiet=True) if result["copied"]: print(f" + {len(result['copied'])} new: {', '.join(result['copied'])}") if result.get("updated"): - print(f" ↑ {len(result['updated'])} updated: {', '.join(result['updated'])}") + print( + f" ↑ {len(result['updated'])} updated: {', '.join(result['updated'])}" + ) if result.get("user_modified"): print(f" ~ {len(result['user_modified'])} user-modified (kept)") if result.get("cleaned"): @@ -3877,7 +5500,12 @@ def cmd_update(args): # Sync bundled skills to all other profiles try: - from hermes_cli.profiles import list_profiles, get_active_profile_name, seed_profile_skills + from hermes_cli.profiles import ( + list_profiles, + get_active_profile_name, + seed_profile_skills, + ) + active = get_active_profile_name() other_profiles = [p for p in list_profiles() if p.name != active] if other_profiles: @@ -3891,9 +5519,12 @@ def cmd_update(args): updated = len(r.get("updated", [])) modified = len(r.get("user_modified", [])) parts = [] - if copied: parts.append(f"+{copied} new") - if updated: parts.append(f"↑{updated} updated") - if modified: parts.append(f"~{modified} user-modified") + if copied: + parts.append(f"+{copied} new") + if updated: + parts.append(f"↑{updated} updated") + if modified: + parts.append(f"~{modified} user-modified") status = ", ".join(parts) if parts else "up to date" else: status = "sync failed" @@ -3906,6 +5537,7 @@ def cmd_update(args): # Sync Honcho host blocks to all profiles try: from plugins.memory.honcho.cli import sync_honcho_profiles_quiet + synced = sync_honcho_profiles_quiet() if synced: print(f"\n-> Honcho: synced {synced} profile(s)") @@ -3915,46 +5547,60 @@ def cmd_update(args): # Check for config migrations print() print("→ Checking configuration for new options...") - + from hermes_cli.config import ( - get_missing_env_vars, get_missing_config_fields, - check_config_version, migrate_config + get_missing_env_vars, + get_missing_config_fields, + check_config_version, + migrate_config, ) - + missing_env = get_missing_env_vars(required_only=True) missing_config = get_missing_config_fields() current_ver, latest_ver = check_config_version() - + needs_migration = missing_env or missing_config or current_ver < latest_ver - + if needs_migration: print() if missing_env: - print(f" ⚠️ {len(missing_env)} new required setting(s) need configuration") + print( + f" ⚠️ {len(missing_env)} new required setting(s) need configuration" + ) if missing_config: print(f" ℹ️ {len(missing_config)} new config option(s) available") - + print() if gateway_mode: - response = _gateway_prompt( - "Would you like to configure new options now? [Y/n]", "n" - ).strip().lower() + response = ( + _gateway_prompt( + "Would you like to configure new options now? [Y/n]", "n" + ) + .strip() + .lower() + ) elif not (sys.stdin.isatty() and sys.stdout.isatty()): print(" ℹ Non-interactive session — skipping config migration prompt.") - print(" Run 'hermes config migrate' later to apply any new config/env options.") + print( + " Run 'hermes config migrate' later to apply any new config/env options." + ) response = "n" else: try: - response = input("Would you like to configure them now? [Y/n]: ").strip().lower() + response = ( + input("Would you like to configure them now? [Y/n]: ") + .strip() + .lower() + ) except EOFError: response = "n" - - if response in ('', 'y', 'yes'): + + if response in ("", "y", "yes"): print() # In gateway mode, run auto-migrations only (no input() prompts # for API keys which would hang the detached process). results = migrate_config(interactive=not gateway_mode, quiet=False) - + if results["env_added"] or results["config_added"]: print() print("✓ Configuration updated!") @@ -3965,10 +5611,10 @@ def cmd_update(args): print("Skipped. Run 'hermes config migrate' later to configure.") else: print(" ✓ Configuration is up to date") - + print() print("✓ Update complete!") - + # Write exit code *before* the gateway restart attempt. # When running as ``hermes update --gateway`` (spawned by the gateway's # /update command), this process lives inside the gateway's systemd @@ -3988,13 +5634,15 @@ def cmd_update(args): _exit_code_path.write_text("0") except OSError: pass - + # Auto-restart ALL gateways after update. # The code update (git pull) is shared across all profiles, so every # running gateway needs restarting to pick up the new code. try: from hermes_cli.gateway import ( - is_macos, supports_systemd_services, _ensure_user_systemd_env, + is_macos, + supports_systemd_services, + _ensure_user_systemd_env, find_gateway_pids, _get_service_pids, ) @@ -4011,39 +5659,60 @@ def cmd_update(args): except Exception: pass - for scope, scope_cmd in [("user", ["systemctl", "--user"]), ("system", ["systemctl"])]: + for scope, scope_cmd in [ + ("user", ["systemctl", "--user"]), + ("system", ["systemctl"]), + ]: try: result = subprocess.run( - scope_cmd + ["list-units", "hermes-gateway*", "--plain", "--no-legend", "--no-pager"], - capture_output=True, text=True, timeout=10, + scope_cmd + + [ + "list-units", + "hermes-gateway*", + "--plain", + "--no-legend", + "--no-pager", + ], + capture_output=True, + text=True, + timeout=10, ) for line in result.stdout.strip().splitlines(): parts = line.split() if not parts: continue - unit = parts[0] # e.g. hermes-gateway.service or hermes-gateway-coder.service + unit = parts[ + 0 + ] # e.g. hermes-gateway.service or hermes-gateway-coder.service if not unit.endswith(".service"): continue svc_name = unit.removesuffix(".service") # Check if active check = subprocess.run( scope_cmd + ["is-active", svc_name], - capture_output=True, text=True, timeout=5, + capture_output=True, + text=True, + timeout=5, ) if check.stdout.strip() == "active": restart = subprocess.run( scope_cmd + ["restart", svc_name], - capture_output=True, text=True, timeout=15, + capture_output=True, + text=True, + timeout=15, ) if restart.returncode == 0: # Verify the service actually survived the # restart. systemctl restart returns 0 even # if the new process crashes immediately. import time as _time + _time.sleep(3) verify = subprocess.run( scope_cmd + ["is-active", svc_name], - capture_output=True, text=True, timeout=5, + capture_output=True, + text=True, + timeout=5, ) if verify.stdout.strip() == "active": restarted_services.append(svc_name) @@ -4051,15 +5720,21 @@ def cmd_update(args): # Retry once — transient startup failures # (stale module cache, import race) often # resolve on the second attempt. - print(f" ⚠ {svc_name} died after restart, retrying...") + print( + f" ⚠ {svc_name} died after restart, retrying..." + ) retry = subprocess.run( scope_cmd + ["restart", svc_name], - capture_output=True, text=True, timeout=15, + capture_output=True, + text=True, + timeout=15, ) _time.sleep(3) verify2 = subprocess.run( scope_cmd + ["is-active", svc_name], - capture_output=True, text=True, timeout=5, + capture_output=True, + text=True, + timeout=5, ) if verify2.stdout.strip() == "active": restarted_services.append(svc_name) @@ -4071,19 +5746,28 @@ def cmd_update(args): f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}" ) else: - print(f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}") + print( + f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}" + ) except (FileNotFoundError, subprocess.TimeoutExpired): pass # --- Launchd services (macOS) --- if is_macos(): try: - from hermes_cli.gateway import launchd_restart, get_launchd_label, get_launchd_plist_path + from hermes_cli.gateway import ( + launchd_restart, + get_launchd_label, + get_launchd_plist_path, + ) + plist_path = get_launchd_plist_path() if plist_path.exists(): check = subprocess.run( ["launchctl", "list", get_launchd_label()], - capture_output=True, text=True, timeout=5, + capture_output=True, + text=True, + timeout=5, ) if check.returncode == 0: try: @@ -4100,7 +5784,9 @@ def cmd_update(args): # Exclude PIDs that belong to just-restarted services so we don't # immediately kill the process that systemd/launchd just spawned. service_pids = _get_service_pids() - manual_pids = find_gateway_pids(exclude_pids=service_pids, all_profiles=True) + manual_pids = find_gateway_pids( + exclude_pids=service_pids, all_profiles=True + ) for pid in manual_pids: try: os.kill(pid, _signal.SIGTERM) @@ -4117,7 +5803,9 @@ def cmd_update(args): print(" Restart manually: hermes gateway run") # Also restart for each profile if needed if len(killed_pids) > 1: - print(" (or: hermes -p gateway run for each profile)") + print( + " (or: hermes -p gateway run for each profile)" + ) if not restarted_services and not killed_pids: # No gateways were running — nothing to do @@ -4125,11 +5813,40 @@ def cmd_update(args): except Exception as e: logger.debug("Gateway restart during update failed: %s", e) - + + # Warn if legacy Hermes gateway unit files are still installed. + # When both hermes.service (from a pre-rename install) and the + # current hermes-gateway.service are enabled, they SIGTERM-fight + # for the same bot token (see PR #11909). Flagging here means + # every `hermes update` surfaces the issue until the user migrates. + try: + from hermes_cli.gateway import ( + has_legacy_hermes_units, + _find_legacy_hermes_units, + supports_systemd_services, + ) + + if supports_systemd_services() and has_legacy_hermes_units(): + print() + print("⚠ Legacy Hermes gateway unit(s) detected:") + for name, path, is_sys in _find_legacy_hermes_units(): + scope = "system" if is_sys else "user" + print(f" {path} ({scope} scope)") + print() + print(" These pre-rename units (hermes.service) fight the current") + print(" hermes-gateway.service for the bot token and cause SIGTERM") + print(" flap loops. Remove them with:") + print() + print(" hermes gateway migrate-legacy") + print() + print(" (add `sudo` if any are in system scope)") + except Exception as e: + logger.debug("Legacy unit check during update failed: %s", e) + print() print("Tip: You can now select a provider and model:") print(" hermes model # Select provider and model") - + except subprocess.CalledProcessError as e: if sys.platform == "win32": print(f"⚠ Git update failed: {e}") @@ -4153,12 +5870,41 @@ def _coalesce_session_name_args(argv: list) -> list: or a known top-level subcommand. """ _SUBCOMMANDS = { - "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth", - "status", "cron", "doctor", "config", "pairing", "skills", "tools", - "mcp", "sessions", "insights", "version", "update", "uninstall", - "profile", "dashboard", - "honcho", "claw", "plugins", "acp", - "webhook", "memory", "dump", "debug", "backup", "import", "completion", "logs", + "chat", + "model", + "gateway", + "setup", + "whatsapp", + "login", + "logout", + "auth", + "status", + "cron", + "doctor", + "config", + "pairing", + "skills", + "tools", + "mcp", + "sessions", + "insights", + "version", + "update", + "uninstall", + "profile", + "dashboard", + "honcho", + "claw", + "plugins", + "acp", + "webhook", + "memory", + "dump", + "debug", + "backup", + "import", + "completion", + "logs", } _SESSION_FLAGS = {"-c", "--continue", "-r", "--resume"} @@ -4171,7 +5917,11 @@ def _coalesce_session_name_args(argv: list) -> list: i += 1 # Collect subsequent non-flag, non-subcommand tokens as one name parts: list = [] - while i < len(argv) and not argv[i].startswith("-") and argv[i] not in _SUBCOMMANDS: + while ( + i < len(argv) + and not argv[i].startswith("-") + and argv[i] not in _SUBCOMMANDS + ): parts.append(argv[i]) i += 1 if parts: @@ -4185,10 +5935,17 @@ def _coalesce_session_name_args(argv: list) -> list: def cmd_profile(args): """Profile management — create, delete, list, switch, alias.""" from hermes_cli.profiles import ( - list_profiles, create_profile, delete_profile, seed_profile_skills, - set_active_profile, get_active_profile_name, - check_alias_collision, create_wrapper_script, remove_wrapper_script, - _is_wrapper_dir_in_path, _get_wrapper_dir, + list_profiles, + create_profile, + delete_profile, + seed_profile_skills, + set_active_profile, + get_active_profile_name, + check_alias_collision, + create_wrapper_script, + remove_wrapper_script, + _is_wrapper_dir_in_path, + _get_wrapper_dir, ) from hermes_constants import display_hermes_home @@ -4205,8 +5962,13 @@ def cmd_profile(args): for p in profiles: if p.name == profile_name or (profile_name == "default" and p.is_default): if p.model: - print(f"Model: {p.model}" + (f" ({p.provider})" if p.provider else "")) - print(f"Gateway: {'running' if p.gateway_running else 'stopped'}") + print( + f"Model: {p.model}" + + (f" ({p.provider})" if p.provider else "") + ) + print( + f"Gateway: {'running' if p.gateway_running else 'stopped'}" + ) print(f"Skills: {p.skill_count} installed") if p.alias_path: print(f"Alias: {p.name} → hermes -p {p.name}") @@ -4227,7 +5989,11 @@ def cmd_profile(args): print(f" {'─' * 15} {'─' * 27} {'─' * 11} {'─' * 12}") for p in profiles: - marker = " ◆" if (p.name == active or (active == "default" and p.is_default)) else " " + marker = ( + " ◆" + if (p.name == active or (active == "default" and p.is_default)) + else " " + ) name = p.name model = (p.model or "—")[:26] gw = "running" if p.gateway_running else "stopped" @@ -4268,7 +6034,9 @@ def cmd_profile(args): print(f"\nProfile '{name}' created at {profile_dir}") if clone or clone_all: - source_label = getattr(args, "clone_from", None) or get_active_profile_name() + source_label = ( + getattr(args, "clone_from", None) or get_active_profile_name() + ) if clone_all: print(f"Full copy from {source_label}.") else: @@ -4278,6 +6046,7 @@ def cmd_profile(args): if clone or clone_all: try: from plugins.memory.honcho.cli import clone_honcho_for_profile + if clone_honcho_for_profile(name): print(f"Honcho config cloned (peer: {name})") except Exception: @@ -4290,14 +6059,20 @@ def cmd_profile(args): copied = len(result.get("copied", [])) print(f"{copied} bundled skills synced.") else: - print("⚠ Skills could not be seeded. Run `{} update` to retry.".format(name)) + print( + "⚠ Skills could not be seeded. Run `{} update` to retry.".format( + name + ) + ) # Create wrapper alias if not no_alias: collision = check_alias_collision(name) if collision: print(f"\n⚠ Cannot create alias '{name}' — {collision}") - print(f" Choose a custom alias: hermes profile alias {name} --name ") + print( + f" Choose a custom alias: hermes profile alias {name} --name " + ) print(f" Or access via flag: hermes -p {name} chat") else: wrapper_path = create_wrapper_script(name) @@ -4305,7 +6080,9 @@ def cmd_profile(args): print(f"Wrapper created: {wrapper_path}") if not _is_wrapper_dir_in_path(): print(f"\n⚠ {_get_wrapper_dir()} is not in your PATH.") - print(f' Add to your shell config (~/.bashrc or ~/.zshrc):') + print( + f" Add to your shell config (~/.bashrc or ~/.zshrc):" + ) print(f' export PATH="$HOME/.local/bin:$PATH"') # Profile dir for display @@ -4323,7 +6100,9 @@ def cmd_profile(args): print(f"\n Edit {profile_dir_display}/.env for different API keys") print(f" Edit {profile_dir_display}/SOUL.md for different personality") else: - print(f"\n ⚠ This profile has no API keys yet. Run '{name} setup' first,") + print( + f"\n ⚠ This profile has no API keys yet. Run '{name} setup' first," + ) print(f" or it will inherit keys from your shell environment.") print(f" Edit {profile_dir_display}/SOUL.md to customize personality") print() @@ -4343,7 +6122,14 @@ def cmd_profile(args): elif action == "show": name = args.profile_name - from hermes_cli.profiles import get_profile_dir, profile_exists, _read_config_model, _check_gateway_running, _count_skills + from hermes_cli.profiles import ( + get_profile_dir, + profile_exists, + _read_config_model, + _check_gateway_running, + _count_skills, + ) + if not profile_exists(name): print(f"Error: Profile '{name}' does not exist.") sys.exit(1) @@ -4359,8 +6145,12 @@ def cmd_profile(args): print(f"Model: {model}" + (f" ({provider})" if provider else "")) print(f"Gateway: {'running' if gw else 'stopped'}") print(f"Skills: {skills}") - print(f".env: {'exists' if (profile_dir / '.env').exists() else 'not configured'}") - print(f"SOUL.md: {'exists' if (profile_dir / 'SOUL.md').exists() else 'not configured'}") + print( + f".env: {'exists' if (profile_dir / '.env').exists() else 'not configured'}" + ) + print( + f"SOUL.md: {'exists' if (profile_dir / 'SOUL.md').exists() else 'not configured'}" + ) if wrapper.exists(): print(f"Alias: {wrapper}") print() @@ -4371,6 +6161,7 @@ def cmd_profile(args): custom_name = getattr(args, "alias_name", None) from hermes_cli.profiles import profile_exists + if not profile_exists(name): print(f"Error: Profile '{name}' does not exist.") sys.exit(1) @@ -4398,6 +6189,7 @@ def cmd_profile(args): elif action == "rename": from hermes_cli.profiles import rename_profile + try: new_dir = rename_profile(args.old_name, args.new_name) print(f"\nProfile renamed: {args.old_name} → {args.new_name}") @@ -4408,6 +6200,7 @@ def cmd_profile(args): elif action == "export": from hermes_cli.profiles import export_profile + name = args.profile_name output = args.output or f"{name}.tar.gz" try: @@ -4419,8 +6212,11 @@ def cmd_profile(args): elif action == "import": from hermes_cli.profiles import import_profile + try: - profile_dir = import_profile(args.archive, name=getattr(args, "import_name", None)) + profile_dir = import_profile( + args.archive, name=getattr(args, "import_name", None) + ) name = profile_dir.name print(f"✓ Imported profile '{name}' at {profile_dir}") @@ -4443,13 +6239,15 @@ def cmd_dashboard(args): import uvicorn # noqa: F401 except ImportError: print("Web UI dependencies not installed.") - print("Install them with: pip install hermes-agent[web]") + print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'") sys.exit(1) - if not _build_web_ui(PROJECT_ROOT / "web", fatal=True): - sys.exit(1) + if "HERMES_WEB_DIST" not in os.environ: + if not _build_web_ui(PROJECT_ROOT / "web", fatal=True): + sys.exit(1) from hermes_cli.web_server import start_server + start_server( host=args.host, port=args.port, @@ -4461,6 +6259,7 @@ def cmd_dashboard(args): def cmd_completion(args, parser=None): """Print shell completion script.""" from hermes_cli.completion import generate_bash, generate_zsh, generate_fish + shell = getattr(args, "shell", "bash") if shell == "zsh": print(generate_zsh(parser)) @@ -4530,152 +6329,200 @@ Examples: For more help on a command: hermes --help -""" +""", ) - + parser.add_argument( - "--version", "-V", - action="store_true", - help="Show version and exit" + "--version", "-V", action="store_true", help="Show version and exit" ) parser.add_argument( - "--resume", "-r", + "--resume", + "-r", metavar="SESSION", default=None, - help="Resume a previous session by ID or title" + help="Resume a previous session by ID or title", ) parser.add_argument( - "--continue", "-c", + "--continue", + "-c", dest="continue_last", nargs="?", const=True, default=None, metavar="SESSION_NAME", - help="Resume a session by name, or the most recent if no name given" + help="Resume a session by name, or the most recent if no name given", ) parser.add_argument( - "--worktree", "-w", + "--worktree", + "-w", action="store_true", default=False, - help="Run in an isolated git worktree (for parallel agents)" + help="Run in an isolated git worktree (for parallel agents)", ) parser.add_argument( - "--skills", "-s", + "--skills", + "-s", action="append", default=None, - help="Preload one or more skills for the session (repeat flag or comma-separate)" + help="Preload one or more skills for the session (repeat flag or comma-separate)", ) parser.add_argument( "--yolo", action="store_true", default=False, - help="Bypass all dangerous command approval prompts (use at your own risk)" + help="Bypass all dangerous command approval prompts (use at your own risk)", ) parser.add_argument( "--pass-session-id", action="store_true", default=False, - help="Include the session ID in the agent's system prompt" + help="Include the session ID in the agent's system prompt", ) - + parser.add_argument( + "--tui", + action="store_true", + default=False, + help="Launch the modern TUI instead of the classic REPL", + ) + parser.add_argument( + "--dev", + dest="tui_dev", + action="store_true", + default=False, + help="With --tui: run TypeScript sources via tsx (skip dist build)", + ) + subparsers = parser.add_subparsers(dest="command", help="Command to run") - + # ========================================================================= # chat command # ========================================================================= chat_parser = subparsers.add_parser( "chat", help="Interactive chat with the agent", - description="Start an interactive chat session with Hermes Agent" + description="Start an interactive chat session with Hermes Agent", ) chat_parser.add_argument( - "-q", "--query", - help="Single query (non-interactive mode)" + "-q", "--query", help="Single query (non-interactive mode)" ) chat_parser.add_argument( - "--image", - help="Optional local image path to attach to a single query" + "--image", help="Optional local image path to attach to a single query" ) chat_parser.add_argument( - "-m", "--model", - help="Model to use (e.g., anthropic/claude-sonnet-4)" + "-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)" ) chat_parser.add_argument( - "-t", "--toolsets", - help="Comma-separated toolsets to enable" + "-t", "--toolsets", help="Comma-separated toolsets to enable" ) chat_parser.add_argument( - "-s", "--skills", + "-s", + "--skills", action="append", default=argparse.SUPPRESS, - help="Preload one or more skills for the session (repeat flag or comma-separate)" + help="Preload one or more skills for the session (repeat flag or comma-separate)", ) chat_parser.add_argument( "--provider", - choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"], + choices=[ + "auto", + "openrouter", + "nous", + "openai-codex", + "copilot-acp", + "copilot", + "anthropic", + "gemini", + "xai", + "ollama-cloud", + "huggingface", + "zai", + "kimi-coding", + "kimi-coding-cn", + "minimax", + "minimax-cn", + "kilocode", + "xiaomi", + "arcee", + "nvidia", + ], default=None, - help="Inference provider (default: auto)" + help="Inference provider (default: auto)", ) chat_parser.add_argument( - "-v", "--verbose", + "-v", "--verbose", action="store_true", help="Verbose output" + ) + chat_parser.add_argument( + "-Q", + "--quiet", action="store_true", - help="Verbose output" + help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.", ) chat_parser.add_argument( - "-Q", "--quiet", - action="store_true", - help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info." - ) - chat_parser.add_argument( - "--resume", "-r", + "--resume", + "-r", metavar="SESSION_ID", default=argparse.SUPPRESS, - help="Resume a previous session by ID (shown on exit)" + help="Resume a previous session by ID (shown on exit)", ) chat_parser.add_argument( - "--continue", "-c", + "--continue", + "-c", dest="continue_last", nargs="?", const=True, default=argparse.SUPPRESS, metavar="SESSION_NAME", - help="Resume a session by name, or the most recent if no name given" + help="Resume a session by name, or the most recent if no name given", ) chat_parser.add_argument( - "--worktree", "-w", + "--worktree", + "-w", action="store_true", default=argparse.SUPPRESS, - help="Run in an isolated git worktree (for parallel agents on the same repo)" + help="Run in an isolated git worktree (for parallel agents on the same repo)", ) chat_parser.add_argument( "--checkpoints", action="store_true", default=False, - help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)" + help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)", ) chat_parser.add_argument( "--max-turns", type=int, default=None, metavar="N", - help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)" + help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)", ) chat_parser.add_argument( "--yolo", action="store_true", default=argparse.SUPPRESS, - help="Bypass all dangerous command approval prompts (use at your own risk)" + help="Bypass all dangerous command approval prompts (use at your own risk)", ) chat_parser.add_argument( "--pass-session-id", action="store_true", default=argparse.SUPPRESS, - help="Include the session ID in the agent's system prompt" + help="Include the session ID in the agent's system prompt", ) chat_parser.add_argument( "--source", default=None, - help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists." + help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.", + ) + chat_parser.add_argument( + "--tui", + action="store_true", + default=False, + help="Launch the modern TUI instead of the classic REPL", + ) + chat_parser.add_argument( + "--dev", + dest="tui_dev", + action="store_true", + default=False, + help="With --tui: run TypeScript sources via tsx (skip dist build)", ) chat_parser.set_defaults(func=cmd_chat) @@ -4685,45 +6532,42 @@ For more help on a command: model_parser = subparsers.add_parser( "model", help="Select default model and provider", - description="Interactively select your inference provider and default model" + description="Interactively select your inference provider and default model", ) model_parser.add_argument( "--portal-url", - help="Portal base URL for Nous login (default: production portal)" + help="Portal base URL for Nous login (default: production portal)", ) model_parser.add_argument( "--inference-url", - help="Inference API base URL for Nous login (default: production inference API)" + help="Inference API base URL for Nous login (default: production inference API)", ) model_parser.add_argument( "--client-id", default=None, - help="OAuth client id to use for Nous login (default: hermes-cli)" + help="OAuth client id to use for Nous login (default: hermes-cli)", ) model_parser.add_argument( - "--scope", - default=None, - help="OAuth scope to request for Nous login" + "--scope", default=None, help="OAuth scope to request for Nous login" ) model_parser.add_argument( "--no-browser", action="store_true", - help="Do not attempt to open the browser automatically during Nous login" + help="Do not attempt to open the browser automatically during Nous login", ) model_parser.add_argument( "--timeout", type=float, default=15.0, - help="HTTP request timeout in seconds for Nous login (default: 15)" + help="HTTP request timeout in seconds for Nous login (default: 15)", ) model_parser.add_argument( - "--ca-bundle", - help="Path to CA bundle PEM file for Nous TLS verification" + "--ca-bundle", help="Path to CA bundle PEM file for Nous TLS verification" ) model_parser.add_argument( "--insecure", action="store_true", - help="Disable TLS verification for Nous login (testing only)" + help="Disable TLS verification for Nous login (testing only)", ) model_parser.set_defaults(func=cmd_model) @@ -4733,52 +6577,138 @@ For more help on a command: gateway_parser = subparsers.add_parser( "gateway", help="Messaging gateway management", - description="Manage the messaging gateway (Telegram, Discord, WhatsApp)" + description="Manage the messaging gateway (Telegram, Discord, WhatsApp)", ) gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command") - + # gateway run (default) - gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground (recommended for WSL, Docker, Termux)") - gateway_run.add_argument("-v", "--verbose", action="count", default=0, - help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)") - gateway_run.add_argument("-q", "--quiet", action="store_true", - help="Suppress all stderr log output") - gateway_run.add_argument("--replace", action="store_true", - help="Replace any existing gateway instance (useful for systemd)") - + gateway_run = gateway_subparsers.add_parser( + "run", help="Run gateway in foreground (recommended for WSL, Docker, Termux)" + ) + gateway_run.add_argument( + "-v", + "--verbose", + action="count", + default=0, + help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)", + ) + gateway_run.add_argument( + "-q", "--quiet", action="store_true", help="Suppress all stderr log output" + ) + gateway_run.add_argument( + "--replace", + action="store_true", + help="Replace any existing gateway instance (useful for systemd)", + ) + # gateway start - gateway_start = gateway_subparsers.add_parser("start", help="Start the installed systemd/launchd background service") - gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service") - + gateway_start = gateway_subparsers.add_parser( + "start", help="Start the installed systemd/launchd background service" + ) + gateway_start.add_argument( + "--system", + action="store_true", + help="Target the Linux system-level gateway service", + ) + gateway_start.add_argument( + "--all", + action="store_true", + help="Kill ALL stale gateway processes across all profiles before starting", + ) + # gateway stop gateway_stop = gateway_subparsers.add_parser("stop", help="Stop gateway service") - gateway_stop.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service") - gateway_stop.add_argument("--all", action="store_true", help="Stop ALL gateway processes across all profiles") - + gateway_stop.add_argument( + "--system", + action="store_true", + help="Target the Linux system-level gateway service", + ) + gateway_stop.add_argument( + "--all", + action="store_true", + help="Stop ALL gateway processes across all profiles", + ) + # gateway restart - gateway_restart = gateway_subparsers.add_parser("restart", help="Restart gateway service") - gateway_restart.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service") - + gateway_restart = gateway_subparsers.add_parser( + "restart", help="Restart gateway service" + ) + gateway_restart.add_argument( + "--system", + action="store_true", + help="Target the Linux system-level gateway service", + ) + gateway_restart.add_argument( + "--all", + action="store_true", + help="Kill ALL gateway processes across all profiles before restarting", + ) + # gateway status gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status") gateway_status.add_argument("--deep", action="store_true", help="Deep status check") - gateway_status.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service") - + gateway_status.add_argument( + "--system", + action="store_true", + help="Target the Linux system-level gateway service", + ) + # gateway install - gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as a systemd/launchd background service") + gateway_install = gateway_subparsers.add_parser( + "install", help="Install gateway as a systemd/launchd background service" + ) gateway_install.add_argument("--force", action="store_true", help="Force reinstall") - gateway_install.add_argument("--system", action="store_true", help="Install as a Linux system-level service (starts at boot)") - gateway_install.add_argument("--run-as-user", dest="run_as_user", help="User account the Linux system service should run as") - + gateway_install.add_argument( + "--system", + action="store_true", + help="Install as a Linux system-level service (starts at boot)", + ) + gateway_install.add_argument( + "--run-as-user", + dest="run_as_user", + help="User account the Linux system service should run as", + ) + # gateway uninstall - gateway_uninstall = gateway_subparsers.add_parser("uninstall", help="Uninstall gateway service") - gateway_uninstall.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service") + gateway_uninstall = gateway_subparsers.add_parser( + "uninstall", help="Uninstall gateway service" + ) + gateway_uninstall.add_argument( + "--system", + action="store_true", + help="Target the Linux system-level gateway service", + ) # gateway setup gateway_subparsers.add_parser("setup", help="Configure messaging platforms") + # gateway migrate-legacy + gateway_migrate_legacy = gateway_subparsers.add_parser( + "migrate-legacy", + help="Remove legacy hermes.service units from pre-rename installs", + description=( + "Stop, disable, and remove legacy Hermes gateway unit files " + "(e.g. hermes.service) left over from older installs. Profile " + "units (hermes-gateway-.service) and unrelated " + "third-party services are never touched." + ), + ) + gateway_migrate_legacy.add_argument( + "--dry-run", + dest="dry_run", + action="store_true", + help="List what would be removed without doing it", + ) + gateway_migrate_legacy.add_argument( + "-y", + "--yes", + dest="yes", + action="store_true", + help="Skip the confirmation prompt", + ) + gateway_parser.set_defaults(func=cmd_gateway) - + # ========================================================================= # setup command # ========================================================================= @@ -4786,24 +6716,22 @@ For more help on a command: "setup", help="Interactive setup wizard", description="Configure Hermes Agent with an interactive wizard. " - "Run a specific section: hermes setup model|tts|terminal|gateway|tools|agent" + "Run a specific section: hermes setup model|tts|terminal|gateway|tools|agent", ) setup_parser.add_argument( "section", nargs="?", choices=["model", "tts", "terminal", "gateway", "tools", "agent"], default=None, - help="Run a specific setup section instead of the full wizard" + help="Run a specific setup section instead of the full wizard", ) setup_parser.add_argument( "--non-interactive", action="store_true", - help="Non-interactive mode (use defaults/env vars)" + help="Non-interactive mode (use defaults/env vars)", ) setup_parser.add_argument( - "--reset", - action="store_true", - help="Reset configuration to defaults" + "--reset", action="store_true", help="Reset configuration to defaults" ) setup_parser.set_defaults(func=cmd_setup) @@ -4813,7 +6741,7 @@ For more help on a command: whatsapp_parser = subparsers.add_parser( "whatsapp", help="Set up WhatsApp integration", - description="Configure WhatsApp and pair via QR code" + description="Configure WhatsApp and pair via QR code", ) whatsapp_parser.set_defaults(func=cmd_whatsapp) @@ -4823,51 +6751,43 @@ For more help on a command: login_parser = subparsers.add_parser( "login", help="Authenticate with an inference provider", - description="Run OAuth device authorization flow for Hermes CLI" + description="Run OAuth device authorization flow for Hermes CLI", ) login_parser.add_argument( "--provider", choices=["nous", "openai-codex"], default=None, - help="Provider to authenticate with (default: nous)" + help="Provider to authenticate with (default: nous)", ) login_parser.add_argument( - "--portal-url", - help="Portal base URL (default: production portal)" + "--portal-url", help="Portal base URL (default: production portal)" ) login_parser.add_argument( "--inference-url", - help="Inference API base URL (default: production inference API)" + help="Inference API base URL (default: production inference API)", ) login_parser.add_argument( - "--client-id", - default=None, - help="OAuth client id to use (default: hermes-cli)" - ) - login_parser.add_argument( - "--scope", - default=None, - help="OAuth scope to request" + "--client-id", default=None, help="OAuth client id to use (default: hermes-cli)" ) + login_parser.add_argument("--scope", default=None, help="OAuth scope to request") login_parser.add_argument( "--no-browser", action="store_true", - help="Do not attempt to open the browser automatically" + help="Do not attempt to open the browser automatically", ) login_parser.add_argument( "--timeout", type=float, default=15.0, - help="HTTP request timeout in seconds (default: 15)" + help="HTTP request timeout in seconds (default: 15)", ) login_parser.add_argument( - "--ca-bundle", - help="Path to CA bundle PEM file for TLS verification" + "--ca-bundle", help="Path to CA bundle PEM file for TLS verification" ) login_parser.add_argument( "--insecure", action="store_true", - help="Disable TLS verification (testing only)" + help="Disable TLS verification (testing only)", ) login_parser.set_defaults(func=cmd_login) @@ -4877,13 +6797,13 @@ For more help on a command: logout_parser = subparsers.add_parser( "logout", help="Clear authentication for an inference provider", - description="Remove stored credentials and reset provider config" + description="Remove stored credentials and reset provider config", ) logout_parser.add_argument( "--provider", choices=["nous", "openai-codex"], default=None, - help="Provider to log out from (default: active provider)" + help="Provider to log out from (default: active provider)", ) logout_parser.set_defaults(func=cmd_logout) @@ -4893,24 +6813,50 @@ For more help on a command: ) auth_subparsers = auth_parser.add_subparsers(dest="auth_action") auth_add = auth_subparsers.add_parser("add", help="Add a pooled credential") - auth_add.add_argument("provider", help="Provider id (for example: anthropic, openai-codex, openrouter)") - auth_add.add_argument("--type", dest="auth_type", choices=["oauth", "api-key", "api_key"], help="Credential type to add") + auth_add.add_argument( + "provider", + help="Provider id (for example: anthropic, openai-codex, openrouter)", + ) + auth_add.add_argument( + "--type", + dest="auth_type", + choices=["oauth", "api-key", "api_key"], + help="Credential type to add", + ) auth_add.add_argument("--label", help="Optional display label") - auth_add.add_argument("--api-key", help="API key value (otherwise prompted securely)") + auth_add.add_argument( + "--api-key", help="API key value (otherwise prompted securely)" + ) auth_add.add_argument("--portal-url", help="Nous portal base URL") auth_add.add_argument("--inference-url", help="Nous inference base URL") auth_add.add_argument("--client-id", help="OAuth client id") auth_add.add_argument("--scope", help="OAuth scope override") - auth_add.add_argument("--no-browser", action="store_true", help="Do not auto-open a browser for OAuth login") - auth_add.add_argument("--timeout", type=float, help="OAuth/network timeout in seconds") - auth_add.add_argument("--insecure", action="store_true", help="Disable TLS verification for OAuth login") + auth_add.add_argument( + "--no-browser", + action="store_true", + help="Do not auto-open a browser for OAuth login", + ) + auth_add.add_argument( + "--timeout", type=float, help="OAuth/network timeout in seconds" + ) + auth_add.add_argument( + "--insecure", + action="store_true", + help="Disable TLS verification for OAuth login", + ) auth_add.add_argument("--ca-bundle", help="Custom CA bundle for OAuth login") auth_list = auth_subparsers.add_parser("list", help="List pooled credentials") auth_list.add_argument("provider", nargs="?", help="Optional provider filter") - auth_remove = auth_subparsers.add_parser("remove", help="Remove a pooled credential by index, id, or label") + auth_remove = auth_subparsers.add_parser( + "remove", help="Remove a pooled credential by index, id, or label" + ) auth_remove.add_argument("provider", help="Provider id") - auth_remove.add_argument("target", help="Credential index, entry id, or exact label") - auth_reset = auth_subparsers.add_parser("reset", help="Clear exhaustion status for all credentials for a provider") + auth_remove.add_argument( + "target", help="Credential index, entry id, or exact label" + ) + auth_reset = auth_subparsers.add_parser( + "reset", help="Clear exhaustion status for all credentials for a provider" + ) auth_reset.add_argument("provider", help="Provider id") auth_parser.set_defaults(func=cmd_auth) @@ -4920,57 +6866,92 @@ For more help on a command: status_parser = subparsers.add_parser( "status", help="Show status of all components", - description="Display status of Hermes Agent components" + description="Display status of Hermes Agent components", ) status_parser.add_argument( - "--all", - action="store_true", - help="Show all details (redacted for sharing)" + "--all", action="store_true", help="Show all details (redacted for sharing)" ) status_parser.add_argument( - "--deep", - action="store_true", - help="Run deep checks (may take longer)" + "--deep", action="store_true", help="Run deep checks (may take longer)" ) status_parser.set_defaults(func=cmd_status) - + # ========================================================================= # cron command # ========================================================================= cron_parser = subparsers.add_parser( - "cron", - help="Cron job management", - description="Manage scheduled tasks" + "cron", help="Cron job management", description="Manage scheduled tasks" ) cron_subparsers = cron_parser.add_subparsers(dest="cron_command") - + # cron list cron_list = cron_subparsers.add_parser("list", help="List scheduled jobs") cron_list.add_argument("--all", action="store_true", help="Include disabled jobs") # cron create/add - cron_create = cron_subparsers.add_parser("create", aliases=["add"], help="Create a scheduled job") - cron_create.add_argument("schedule", help="Schedule like '30m', 'every 2h', or '0 9 * * *'") - cron_create.add_argument("prompt", nargs="?", help="Optional self-contained prompt or task instruction") + cron_create = cron_subparsers.add_parser( + "create", aliases=["add"], help="Create a scheduled job" + ) + cron_create.add_argument( + "schedule", help="Schedule like '30m', 'every 2h', or '0 9 * * *'" + ) + cron_create.add_argument( + "prompt", nargs="?", help="Optional self-contained prompt or task instruction" + ) cron_create.add_argument("--name", help="Optional human-friendly job name") - cron_create.add_argument("--deliver", help="Delivery target: origin, local, telegram, discord, signal, or platform:chat_id") + cron_create.add_argument( + "--deliver", + help="Delivery target: origin, local, telegram, discord, signal, or platform:chat_id", + ) cron_create.add_argument("--repeat", type=int, help="Optional repeat count") - cron_create.add_argument("--skill", dest="skills", action="append", help="Attach a skill. Repeat to add multiple skills.") - cron_create.add_argument("--script", help="Path to a Python script whose stdout is injected into the prompt each run") + cron_create.add_argument( + "--skill", + dest="skills", + action="append", + help="Attach a skill. Repeat to add multiple skills.", + ) + cron_create.add_argument( + "--script", + help="Path to a Python script whose stdout is injected into the prompt each run", + ) # cron edit - cron_edit = cron_subparsers.add_parser("edit", help="Edit an existing scheduled job") + cron_edit = cron_subparsers.add_parser( + "edit", help="Edit an existing scheduled job" + ) cron_edit.add_argument("job_id", help="Job ID to edit") cron_edit.add_argument("--schedule", help="New schedule") cron_edit.add_argument("--prompt", help="New prompt/task instruction") cron_edit.add_argument("--name", help="New job name") cron_edit.add_argument("--deliver", help="New delivery target") cron_edit.add_argument("--repeat", type=int, help="New repeat count") - cron_edit.add_argument("--skill", dest="skills", action="append", help="Replace the job's skills with this set. Repeat to attach multiple skills.") - cron_edit.add_argument("--add-skill", dest="add_skills", action="append", help="Append a skill without replacing the existing list. Repeatable.") - cron_edit.add_argument("--remove-skill", dest="remove_skills", action="append", help="Remove a specific attached skill. Repeatable.") - cron_edit.add_argument("--clear-skills", action="store_true", help="Remove all attached skills from the job") - cron_edit.add_argument("--script", help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.") + cron_edit.add_argument( + "--skill", + dest="skills", + action="append", + help="Replace the job's skills with this set. Repeat to attach multiple skills.", + ) + cron_edit.add_argument( + "--add-skill", + dest="add_skills", + action="append", + help="Append a skill without replacing the existing list. Repeatable.", + ) + cron_edit.add_argument( + "--remove-skill", + dest="remove_skills", + action="append", + help="Remove a specific attached skill. Repeatable.", + ) + cron_edit.add_argument( + "--clear-skills", + action="store_true", + help="Remove all attached skills from the job", + ) + cron_edit.add_argument( + "--script", + help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.", + ) # lifecycle actions cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job") @@ -4979,10 +6960,14 @@ For more help on a command: cron_resume = cron_subparsers.add_parser("resume", help="Resume a paused job") cron_resume.add_argument("job_id", help="Job ID to resume") - cron_run = cron_subparsers.add_parser("run", help="Run a job on the next scheduler tick") + cron_run = cron_subparsers.add_parser( + "run", help="Run a job on the next scheduler tick" + ) cron_run.add_argument("job_id", help="Job ID to trigger") - cron_remove = cron_subparsers.add_parser("remove", aliases=["rm", "delete"], help="Remove a scheduled job") + cron_remove = cron_subparsers.add_parser( + "remove", aliases=["rm", "delete"], help="Remove a scheduled job" + ) cron_remove.add_argument("job_id", help="Job ID to remove") # cron status @@ -5003,24 +6988,57 @@ For more help on a command: ) webhook_subparsers = webhook_parser.add_subparsers(dest="webhook_action") - wh_sub = webhook_subparsers.add_parser("subscribe", aliases=["add"], help="Create a webhook subscription") + wh_sub = webhook_subparsers.add_parser( + "subscribe", aliases=["add"], help="Create a webhook subscription" + ) wh_sub.add_argument("name", help="Route name (used in URL: /webhooks/)") - wh_sub.add_argument("--prompt", default="", help="Prompt template with {dot.notation} payload refs") - wh_sub.add_argument("--events", default="", help="Comma-separated event types to accept") + wh_sub.add_argument( + "--prompt", default="", help="Prompt template with {dot.notation} payload refs" + ) + wh_sub.add_argument( + "--events", default="", help="Comma-separated event types to accept" + ) wh_sub.add_argument("--description", default="", help="What this subscription does") - wh_sub.add_argument("--skills", default="", help="Comma-separated skill names to load") - wh_sub.add_argument("--deliver", default="log", help="Delivery target: log, telegram, discord, slack, etc.") - wh_sub.add_argument("--deliver-chat-id", default="", help="Target chat ID for cross-platform delivery") - wh_sub.add_argument("--secret", default="", help="HMAC secret (auto-generated if omitted)") + wh_sub.add_argument( + "--skills", default="", help="Comma-separated skill names to load" + ) + wh_sub.add_argument( + "--deliver", + default="log", + help="Delivery target: log, telegram, discord, slack, etc.", + ) + wh_sub.add_argument( + "--deliver-chat-id", + default="", + help="Target chat ID for cross-platform delivery", + ) + wh_sub.add_argument( + "--secret", default="", help="HMAC secret (auto-generated if omitted)" + ) + wh_sub.add_argument( + "--deliver-only", + action="store_true", + help="Skip the agent — deliver the rendered prompt directly as the " + "message. Zero LLM cost. Requires --deliver to be a real target " + "(not 'log').", + ) - webhook_subparsers.add_parser("list", aliases=["ls"], help="List all dynamic subscriptions") + webhook_subparsers.add_parser( + "list", aliases=["ls"], help="List all dynamic subscriptions" + ) - wh_rm = webhook_subparsers.add_parser("remove", aliases=["rm"], help="Remove a subscription") + wh_rm = webhook_subparsers.add_parser( + "remove", aliases=["rm"], help="Remove a subscription" + ) wh_rm.add_argument("name", help="Subscription name to remove") - wh_test = webhook_subparsers.add_parser("test", help="Send a test POST to a webhook route") + wh_test = webhook_subparsers.add_parser( + "test", help="Send a test POST to a webhook route" + ) wh_test.add_argument("name", help="Subscription name to test") - wh_test.add_argument("--payload", default="", help="JSON payload to send (default: test payload)") + wh_test.add_argument( + "--payload", default="", help="JSON payload to send (default: test payload)" + ) webhook_parser.set_defaults(func=cmd_webhook) @@ -5030,12 +7048,10 @@ For more help on a command: doctor_parser = subparsers.add_parser( "doctor", help="Check configuration and dependencies", - description="Diagnose issues with Hermes Agent setup" + description="Diagnose issues with Hermes Agent setup", ) doctor_parser.add_argument( - "--fix", - action="store_true", - help="Attempt to fix issues automatically" + "--fix", action="store_true", help="Attempt to fix issues automatically" ) doctor_parser.set_defaults(func=cmd_doctor) @@ -5046,12 +7062,12 @@ For more help on a command: "dump", help="Dump setup summary for support/debugging", description="Output a compact, plain-text summary of your Hermes setup " - "that can be copy-pasted into Discord/GitHub for support context" + "that can be copy-pasted into Discord/GitHub for support context", ) dump_parser.add_argument( "--show-keys", action="store_true", - help="Show redacted API key prefixes (first/last 4 chars) instead of just set/not set" + help="Show redacted API key prefixes (first/last 4 chars) instead of just set/not set", ) dump_parser.set_defaults(func=cmd_dump) @@ -5062,8 +7078,8 @@ For more help on a command: "debug", help="Debug tools — upload logs and system info for support", description="Debug utilities for Hermes Agent. Use 'hermes debug share' to " - "upload a debug report (system info + recent logs) to a paste " - "service and get a shareable URL.", + "upload a debug report (system info + recent logs) to a paste " + "service and get a shareable URL.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""\ Examples: @@ -5071,6 +7087,7 @@ Examples: hermes debug share --lines 500 Include more log lines hermes debug share --expire 30 Keep paste for 30 days hermes debug share --local Print report locally (no upload) + hermes debug delete Delete a previously uploaded paste """, ) debug_sub = debug_parser.add_subparsers(dest="debug_command") @@ -5079,17 +7096,32 @@ Examples: help="Upload debug report to a paste service and print a shareable URL", ) share_parser.add_argument( - "--lines", type=int, default=200, + "--lines", + type=int, + default=200, help="Number of log lines to include per log file (default: 200)", ) share_parser.add_argument( - "--expire", type=int, default=7, + "--expire", + type=int, + default=7, help="Paste expiry in days (default: 7)", ) share_parser.add_argument( - "--local", action="store_true", + "--local", + action="store_true", help="Print the report locally instead of uploading", ) + delete_parser = debug_sub.add_parser( + "delete", + help="Delete a paste uploaded by 'hermes debug share'", + ) + delete_parser.add_argument( + "urls", + nargs="*", + default=[], + help="One or more paste URLs to delete (e.g. https://paste.rs/abc123)", + ) debug_parser.set_defaults(func=cmd_debug) # ========================================================================= @@ -5099,21 +7131,22 @@ Examples: "backup", help="Back up Hermes home directory to a zip file", description="Create a zip archive of your entire Hermes configuration, " - "skills, sessions, and data (excludes the hermes-agent codebase). " - "Use --quick for a fast snapshot of just critical state files." + "skills, sessions, and data (excludes the hermes-agent codebase). " + "Use --quick for a fast snapshot of just critical state files.", ) backup_parser.add_argument( - "-o", "--output", - help="Output path for the zip file (default: ~/hermes-backup-.zip)" + "-o", + "--output", + help="Output path for the zip file (default: ~/hermes-backup-.zip)", ) backup_parser.add_argument( - "-q", "--quick", + "-q", + "--quick", action="store_true", - help="Quick snapshot: only critical state files (config, state.db, .env, auth, cron)" + help="Quick snapshot: only critical state files (config, state.db, .env, auth, cron)", ) backup_parser.add_argument( - "-l", "--label", - help="Label for the snapshot (only used with --quick)" + "-l", "--label", help="Label for the snapshot (only used with --quick)" ) backup_parser.set_defaults(func=cmd_backup) @@ -5124,17 +7157,15 @@ Examples: "import", help="Restore a Hermes backup from a zip file", description="Extract a previously created Hermes backup into your " - "Hermes home directory, restoring configuration, skills, " - "sessions, and data" + "Hermes home directory, restoring configuration, skills, " + "sessions, and data", ) + import_parser.add_argument("zipfile", help="Path to the backup zip file") import_parser.add_argument( - "zipfile", - help="Path to the backup zip file" - ) - import_parser.add_argument( - "--force", "-f", + "--force", + "-f", action="store_true", - help="Overwrite existing files without confirmation" + help="Overwrite existing files without confirmation", ) import_parser.set_defaults(func=cmd_import) @@ -5144,49 +7175,55 @@ Examples: config_parser = subparsers.add_parser( "config", help="View and edit configuration", - description="Manage Hermes Agent configuration" + description="Manage Hermes Agent configuration", ) config_subparsers = config_parser.add_subparsers(dest="config_command") - + # config show (default) config_subparsers.add_parser("show", help="Show current configuration") - + # config edit config_subparsers.add_parser("edit", help="Open config file in editor") - + # config set config_set = config_subparsers.add_parser("set", help="Set a configuration value") - config_set.add_argument("key", nargs="?", help="Configuration key (e.g., model, terminal.backend)") + config_set.add_argument( + "key", nargs="?", help="Configuration key (e.g., model, terminal.backend)" + ) config_set.add_argument("value", nargs="?", help="Value to set") - + # config path config_subparsers.add_parser("path", help="Print config file path") - + # config env-path config_subparsers.add_parser("env-path", help="Print .env file path") - + # config check config_subparsers.add_parser("check", help="Check for missing/outdated config") - + # config migrate config_subparsers.add_parser("migrate", help="Update config with new options") - + config_parser.set_defaults(func=cmd_config) - + # ========================================================================= # pairing command # ========================================================================= pairing_parser = subparsers.add_parser( "pairing", help="Manage DM pairing codes for user authorization", - description="Approve or revoke user access via pairing codes" + description="Approve or revoke user access via pairing codes", ) pairing_sub = pairing_parser.add_subparsers(dest="pairing_action") pairing_sub.add_parser("list", help="Show pending + approved users") - pairing_approve_parser = pairing_sub.add_parser("approve", help="Approve a pairing code") - pairing_approve_parser.add_argument("platform", help="Platform name (telegram, discord, slack, whatsapp)") + pairing_approve_parser = pairing_sub.add_parser( + "approve", help="Approve a pairing code" + ) + pairing_approve_parser.add_argument( + "platform", help="Platform name (telegram, discord, slack, whatsapp)" + ) pairing_approve_parser.add_argument("code", help="Pairing code to approve") pairing_revoke_parser = pairing_sub.add_parser("revoke", help="Revoke user access") @@ -5197,6 +7234,7 @@ Examples: def cmd_pairing(args): from hermes_cli.pairing import pairing_command + pairing_command(args) pairing_parser.set_defaults(func=cmd_pairing) @@ -5207,58 +7245,158 @@ Examples: skills_parser = subparsers.add_parser( "skills", help="Search, install, configure, and manage skills", - description="Search, install, inspect, audit, configure, and manage skills from skills.sh, well-known agent skill endpoints, GitHub, ClawHub, and other registries." + description="Search, install, inspect, audit, configure, and manage skills from skills.sh, well-known agent skill endpoints, GitHub, ClawHub, and other registries.", ) skills_subparsers = skills_parser.add_subparsers(dest="skills_action") - skills_browse = skills_subparsers.add_parser("browse", help="Browse all available skills (paginated)") - skills_browse.add_argument("--page", type=int, default=1, help="Page number (default: 1)") - skills_browse.add_argument("--size", type=int, default=20, help="Results per page (default: 20)") - skills_browse.add_argument("--source", default="all", - choices=["all", "official", "skills-sh", "well-known", "github", "clawhub", "lobehub"], - help="Filter by source (default: all)") + skills_browse = skills_subparsers.add_parser( + "browse", help="Browse all available skills (paginated)" + ) + skills_browse.add_argument( + "--page", type=int, default=1, help="Page number (default: 1)" + ) + skills_browse.add_argument( + "--size", type=int, default=20, help="Results per page (default: 20)" + ) + skills_browse.add_argument( + "--source", + default="all", + choices=[ + "all", + "official", + "skills-sh", + "well-known", + "github", + "clawhub", + "lobehub", + ], + help="Filter by source (default: all)", + ) - skills_search = skills_subparsers.add_parser("search", help="Search skill registries") + skills_search = skills_subparsers.add_parser( + "search", help="Search skill registries" + ) skills_search.add_argument("query", help="Search query") - skills_search.add_argument("--source", default="all", choices=["all", "official", "skills-sh", "well-known", "github", "clawhub", "lobehub"]) + skills_search.add_argument( + "--source", + default="all", + choices=[ + "all", + "official", + "skills-sh", + "well-known", + "github", + "clawhub", + "lobehub", + ], + ) skills_search.add_argument("--limit", type=int, default=10, help="Max results") skills_install = skills_subparsers.add_parser("install", help="Install a skill") - skills_install.add_argument("identifier", help="Skill identifier (e.g. openai/skills/skill-creator)") - skills_install.add_argument("--category", default="", help="Category folder to install into") - skills_install.add_argument("--force", action="store_true", help="Install despite blocked scan verdict") - skills_install.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompt (needed in TUI mode)") + skills_install.add_argument( + "identifier", help="Skill identifier (e.g. openai/skills/skill-creator)" + ) + skills_install.add_argument( + "--category", default="", help="Category folder to install into" + ) + skills_install.add_argument( + "--force", action="store_true", help="Install despite blocked scan verdict" + ) + skills_install.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt (needed in TUI mode)", + ) - skills_inspect = skills_subparsers.add_parser("inspect", help="Preview a skill without installing") + skills_inspect = skills_subparsers.add_parser( + "inspect", help="Preview a skill without installing" + ) skills_inspect.add_argument("identifier", help="Skill identifier") skills_list = skills_subparsers.add_parser("list", help="List installed skills") - skills_list.add_argument("--source", default="all", choices=["all", "hub", "builtin", "local"]) + skills_list.add_argument( + "--source", default="all", choices=["all", "hub", "builtin", "local"] + ) - skills_check = skills_subparsers.add_parser("check", help="Check installed hub skills for updates") - skills_check.add_argument("name", nargs="?", help="Specific skill to check (default: all)") + skills_check = skills_subparsers.add_parser( + "check", help="Check installed hub skills for updates" + ) + skills_check.add_argument( + "name", nargs="?", help="Specific skill to check (default: all)" + ) - skills_update = skills_subparsers.add_parser("update", help="Update installed hub skills") - skills_update.add_argument("name", nargs="?", help="Specific skill to update (default: all outdated skills)") + skills_update = skills_subparsers.add_parser( + "update", help="Update installed hub skills" + ) + skills_update.add_argument( + "name", + nargs="?", + help="Specific skill to update (default: all outdated skills)", + ) - skills_audit = skills_subparsers.add_parser("audit", help="Re-scan installed hub skills") - skills_audit.add_argument("name", nargs="?", help="Specific skill to audit (default: all)") + skills_audit = skills_subparsers.add_parser( + "audit", help="Re-scan installed hub skills" + ) + skills_audit.add_argument( + "name", nargs="?", help="Specific skill to audit (default: all)" + ) - skills_uninstall = skills_subparsers.add_parser("uninstall", help="Remove a hub-installed skill") + skills_uninstall = skills_subparsers.add_parser( + "uninstall", help="Remove a hub-installed skill" + ) skills_uninstall.add_argument("name", help="Skill name to remove") - skills_publish = skills_subparsers.add_parser("publish", help="Publish a skill to a registry") - skills_publish.add_argument("skill_path", help="Path to skill directory") - skills_publish.add_argument("--to", default="github", choices=["github", "clawhub"], help="Target registry") - skills_publish.add_argument("--repo", default="", help="Target GitHub repo (e.g. openai/skills)") + skills_reset = skills_subparsers.add_parser( + "reset", + help="Reset a bundled skill — clears 'user-modified' tracking so updates work again", + description=( + "Clear a bundled skill's entry from the sync manifest (~/.hermes/skills/.bundled_manifest) " + "so future 'hermes update' runs stop marking it as user-modified. Pass --restore to also " + "replace the current copy with the bundled version." + ), + ) + skills_reset.add_argument( + "name", help="Skill name to reset (e.g. google-workspace)" + ) + skills_reset.add_argument( + "--restore", + action="store_true", + help="Also delete the current copy and re-copy the bundled version", + ) + skills_reset.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt when using --restore", + ) - skills_snapshot = skills_subparsers.add_parser("snapshot", help="Export/import skill configurations") + skills_publish = skills_subparsers.add_parser( + "publish", help="Publish a skill to a registry" + ) + skills_publish.add_argument("skill_path", help="Path to skill directory") + skills_publish.add_argument( + "--to", default="github", choices=["github", "clawhub"], help="Target registry" + ) + skills_publish.add_argument( + "--repo", default="", help="Target GitHub repo (e.g. openai/skills)" + ) + + skills_snapshot = skills_subparsers.add_parser( + "snapshot", help="Export/import skill configurations" + ) snapshot_subparsers = skills_snapshot.add_subparsers(dest="snapshot_action") - snap_export = snapshot_subparsers.add_parser("export", help="Export installed skills to a file") + snap_export = snapshot_subparsers.add_parser( + "export", help="Export installed skills to a file" + ) snap_export.add_argument("output", help="Output JSON file path (use - for stdout)") - snap_import = snapshot_subparsers.add_parser("import", help="Import and install skills from a file") + snap_import = snapshot_subparsers.add_parser( + "import", help="Import and install skills from a file" + ) snap_import.add_argument("input", help="Input JSON file path") - snap_import.add_argument("--force", action="store_true", help="Force install despite caution verdict") + snap_import.add_argument( + "--force", action="store_true", help="Force install despite caution verdict" + ) skills_tap = skills_subparsers.add_parser("tap", help="Manage skill sources") tap_subparsers = skills_tap.add_subparsers(dest="tap_action") @@ -5269,16 +7407,21 @@ Examples: tap_rm.add_argument("name", help="Tap name to remove") # config sub-action: interactive enable/disable - skills_subparsers.add_parser("config", help="Interactive skill configuration — enable/disable individual skills") + skills_subparsers.add_parser( + "config", + help="Interactive skill configuration — enable/disable individual skills", + ) def cmd_skills(args): # Route 'config' action to skills_config module - if getattr(args, 'skills_action', None) == 'config': + if getattr(args, "skills_action", None) == "config": _require_tty("skills config") from hermes_cli.skills_config import skills_command as skills_config_command + skills_config_command(args) else: from hermes_cli.skills_hub import skills_command + skills_command(args) skills_parser.set_defaults(func=cmd_skills) @@ -5301,9 +7444,22 @@ Examples: help="Git URL or owner/repo shorthand (e.g. anpicasso/hermes-plugin-chrome-profiles)", ) plugins_install.add_argument( - "--force", "-f", action="store_true", + "--force", + "-f", + action="store_true", help="Remove existing plugin and reinstall", ) + _install_enable_group = plugins_install.add_mutually_exclusive_group() + _install_enable_group.add_argument( + "--enable", + action="store_true", + help="Auto-enable the plugin after install (skip confirmation prompt)", + ) + _install_enable_group.add_argument( + "--no-enable", + action="store_true", + help="Install disabled (skip confirmation prompt); enable later with `hermes plugins enable `", + ) plugins_update = plugins_subparsers.add_parser( "update", help="Pull latest changes for an installed plugin" @@ -5329,6 +7485,7 @@ Examples: def cmd_plugins(args): from hermes_cli.plugins_cmd import plugins_command + plugins_command(args) plugins_parser.set_defaults(func=cmd_plugins) @@ -5340,6 +7497,7 @@ Examples: # ========================================================================= try: from plugins.memory import discover_plugin_cli_commands + for cmd_info in discover_plugin_cli_commands(): plugin_parser = subparsers.add_parser( cmd_info["name"], @@ -5350,6 +7508,7 @@ Examples: cmd_info["setup_fn"](plugin_parser) except Exception as _exc: import logging as _log + _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc) # ========================================================================= @@ -5367,14 +7526,33 @@ Examples: ), ) memory_sub = memory_parser.add_subparsers(dest="memory_command") - memory_sub.add_parser("setup", help="Interactive provider selection and configuration") + memory_sub.add_parser( + "setup", help="Interactive provider selection and configuration" + ) memory_sub.add_parser("status", help="Show current memory provider config") memory_sub.add_parser("off", help="Disable external provider (built-in only)") + _reset_parser = memory_sub.add_parser( + "reset", + help="Erase all built-in memory (MEMORY.md and USER.md)", + ) + _reset_parser.add_argument( + "--yes", + "-y", + action="store_true", + help="Skip confirmation prompt", + ) + _reset_parser.add_argument( + "--target", + choices=["all", "memory", "user"], + default="all", + help="Which store to reset: 'all' (default), 'memory', or 'user'", + ) def cmd_memory(args): sub = getattr(args, "memory_command", None) if sub == "off": from hermes_cli.config import load_config, save_config + config = load_config() if not isinstance(config.get("memory"), dict): config["memory"] = {} @@ -5382,8 +7560,54 @@ Examples: save_config(config) print("\n ✓ Memory provider: built-in only") print(" Saved to config.yaml\n") + elif sub == "reset": + from hermes_constants import get_hermes_home, display_hermes_home + + mem_dir = get_hermes_home() / "memories" + target = getattr(args, "target", "all") + files_to_reset = [] + if target in ("all", "memory"): + files_to_reset.append(("MEMORY.md", "agent notes")) + if target in ("all", "user"): + files_to_reset.append(("USER.md", "user profile")) + + # Check what exists + existing = [ + (f, desc) for f, desc in files_to_reset if (mem_dir / f).exists() + ] + if not existing: + print( + f"\n Nothing to reset — no memory files found in {display_hermes_home()}/memories/\n" + ) + return + + print(f"\n This will permanently erase the following memory files:") + for f, desc in existing: + path = mem_dir / f + size = path.stat().st_size + print(f" ◆ {f} ({desc}) — {size:,} bytes") + + if not getattr(args, "yes", False): + try: + answer = input("\n Type 'yes' to confirm: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print("\n Cancelled.\n") + return + if answer != "yes": + print(" Cancelled.\n") + return + + for f, desc in existing: + (mem_dir / f).unlink() + print(f" ✓ Deleted {f} ({desc})") + + print( + f"\n Memory reset complete. New sessions will start with a blank slate." + ) + print(f" Files were in: {display_hermes_home()}/memories/\n") else: from hermes_cli.memory_setup import memory_command + memory_command(args) memory_parser.set_defaults(func=cmd_memory) @@ -5404,7 +7628,7 @@ Examples: tools_parser.add_argument( "--summary", action="store_true", - help="Print a summary of enabled tools per platform and exit" + help="Print a summary of enabled tools per platform and exit", ) tools_sub = tools_parser.add_subparsers(dest="tools_action") @@ -5414,7 +7638,8 @@ Examples: help="Show all tools and their enabled/disabled status", ) tools_list_p.add_argument( - "--platform", default="cli", + "--platform", + default="cli", help="Platform to show (default: cli)", ) @@ -5424,11 +7649,14 @@ Examples: help="Disable toolsets or MCP tools", ) tools_disable_p.add_argument( - "names", nargs="+", metavar="NAME", + "names", + nargs="+", + metavar="NAME", help="Toolset name (e.g. web) or MCP tool in server:tool form", ) tools_disable_p.add_argument( - "--platform", default="cli", + "--platform", + default="cli", help="Platform to apply to (default: cli)", ) @@ -5438,11 +7666,14 @@ Examples: help="Enable toolsets or MCP tools", ) tools_enable_p.add_argument( - "names", nargs="+", metavar="NAME", + "names", + nargs="+", + metavar="NAME", help="Toolset name or MCP tool in server:tool form", ) tools_enable_p.add_argument( - "--platform", default="cli", + "--platform", + default="cli", help="Platform to apply to (default: cli)", ) @@ -5450,10 +7681,12 @@ Examples: action = getattr(args, "tools_action", None) if action in ("list", "disable", "enable"): from hermes_cli.tools_config import tools_disable_enable_command + tools_disable_enable_command(args) else: _require_tty("tools") from hermes_cli.tools_config import tools_command + tools_command(args) tools_parser.set_defaults(func=cmd_tools) @@ -5477,18 +7710,29 @@ Examples: help="Run Hermes as an MCP server (expose conversations to other agents)", ) mcp_serve_p.add_argument( - "-v", "--verbose", action="store_true", + "-v", + "--verbose", + action="store_true", help="Enable verbose logging on stderr", ) - mcp_add_p = mcp_sub.add_parser("add", help="Add an MCP server (discovery-first install)") + mcp_add_p = mcp_sub.add_parser( + "add", help="Add an MCP server (discovery-first install)" + ) mcp_add_p.add_argument("name", help="Server name (used as config key)") mcp_add_p.add_argument("--url", help="HTTP/SSE endpoint URL") mcp_add_p.add_argument("--command", help="Stdio command (e.g. npx)") - mcp_add_p.add_argument("--args", nargs="*", default=[], help="Arguments for stdio command") + mcp_add_p.add_argument( + "--args", nargs="*", default=[], help="Arguments for stdio command" + ) mcp_add_p.add_argument("--auth", choices=["oauth", "header"], help="Auth method") mcp_add_p.add_argument("--preset", help="Known MCP preset name") - mcp_add_p.add_argument("--env", nargs="*", default=[], help="Environment variables for stdio servers (KEY=VALUE)") + mcp_add_p.add_argument( + "--env", + nargs="*", + default=[], + help="Environment variables for stdio servers (KEY=VALUE)", + ) mcp_rm_p = mcp_sub.add_parser("remove", aliases=["rm"], help="Remove an MCP server") mcp_rm_p.add_argument("name", help="Server name to remove") @@ -5498,11 +7742,20 @@ Examples: mcp_test_p = mcp_sub.add_parser("test", help="Test MCP server connection") mcp_test_p.add_argument("name", help="Server name to test") - mcp_cfg_p = mcp_sub.add_parser("configure", aliases=["config"], help="Toggle tool selection") + mcp_cfg_p = mcp_sub.add_parser( + "configure", aliases=["config"], help="Toggle tool selection" + ) mcp_cfg_p.add_argument("name", help="Server name to configure") + mcp_login_p = mcp_sub.add_parser( + "login", + help="Force re-authentication for an OAuth-based MCP server", + ) + mcp_login_p.add_argument("name", help="Server name to re-authenticate") + def cmd_mcp(args): from hermes_cli.mcp_config import mcp_command + mcp_command(args) mcp_parser.set_defaults(func=cmd_mcp) @@ -5513,31 +7766,52 @@ Examples: sessions_parser = subparsers.add_parser( "sessions", help="Manage session history (list, rename, export, prune, delete)", - description="View and manage the SQLite session store" + description="View and manage the SQLite session store", ) sessions_subparsers = sessions_parser.add_subparsers(dest="sessions_action") sessions_list = sessions_subparsers.add_parser("list", help="List recent sessions") - sessions_list.add_argument("--source", help="Filter by source (cli, telegram, discord, etc.)") - sessions_list.add_argument("--limit", type=int, default=20, help="Max sessions to show") + sessions_list.add_argument( + "--source", help="Filter by source (cli, telegram, discord, etc.)" + ) + sessions_list.add_argument( + "--limit", type=int, default=20, help="Max sessions to show" + ) - sessions_export = sessions_subparsers.add_parser("export", help="Export sessions to a JSONL file") - sessions_export.add_argument("output", help="Output JSONL file path (use - for stdout)") + sessions_export = sessions_subparsers.add_parser( + "export", help="Export sessions to a JSONL file" + ) + sessions_export.add_argument( + "output", help="Output JSONL file path (use - for stdout)" + ) sessions_export.add_argument("--source", help="Filter by source") sessions_export.add_argument("--session-id", help="Export a specific session") - sessions_delete = sessions_subparsers.add_parser("delete", help="Delete a specific session") + sessions_delete = sessions_subparsers.add_parser( + "delete", help="Delete a specific session" + ) sessions_delete.add_argument("session_id", help="Session ID to delete") - sessions_delete.add_argument("--yes", "-y", action="store_true", help="Skip confirmation") + sessions_delete.add_argument( + "--yes", "-y", action="store_true", help="Skip confirmation" + ) sessions_prune = sessions_subparsers.add_parser("prune", help="Delete old sessions") - sessions_prune.add_argument("--older-than", type=int, default=90, help="Delete sessions older than N days (default: 90)") + sessions_prune.add_argument( + "--older-than", + type=int, + default=90, + help="Delete sessions older than N days (default: 90)", + ) sessions_prune.add_argument("--source", help="Only prune sessions from this source") - sessions_prune.add_argument("--yes", "-y", action="store_true", help="Skip confirmation") + sessions_prune.add_argument( + "--yes", "-y", action="store_true", help="Skip confirmation" + ) sessions_subparsers.add_parser("stats", help="Show session store statistics") - sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title") + sessions_rename = sessions_subparsers.add_parser( + "rename", help="Set or change a session's title" + ) sessions_rename.add_argument("session_id", help="Session ID to rename") sessions_rename.add_argument("title", nargs="+", help="New title for the session") @@ -5545,8 +7819,12 @@ Examples: "browse", help="Interactive session picker — browse, search, and resume sessions", ) - sessions_browse.add_argument("--source", help="Filter by source (cli, telegram, discord, etc.)") - sessions_browse.add_argument("--limit", type=int, default=50, help="Max sessions to load (default: 50)") + sessions_browse.add_argument( + "--source", help="Filter by source (cli, telegram, discord, etc.)" + ) + sessions_browse.add_argument( + "--limit", type=int, default=50, help="Max sessions to load (default: 50)" + ) def _confirm_prompt(prompt: str) -> bool: """Prompt for y/N confirmation, safe against non-TTY environments.""" @@ -5557,8 +7835,10 @@ Examples: def cmd_sessions(args): import json as _json + try: from hermes_state import SessionDB + db = SessionDB() except Exception as e: print(f"Error: Could not open session database: {e}") @@ -5571,7 +7851,9 @@ Examples: _exclude = None if _source else ["tool"] if action == "list": - sessions = db.list_sessions_rich(source=args.source, exclude_sources=_exclude, limit=args.limit) + sessions = db.list_sessions_rich( + source=args.source, exclude_sources=_exclude, limit=args.limit + ) if not sessions: print("No sessions found.") return @@ -5584,7 +7866,11 @@ Examples: print("─" * 95) for s in sessions: last_active = _relative_time(s.get("last_active")) - preview = s.get("preview", "")[:38] if has_titles else s.get("preview", "")[:48] + preview = ( + s.get("preview", "")[:38] + if has_titles + else s.get("preview", "")[:48] + ) if has_titles: title = (s.get("title") or "—")[:30] sid = s["id"] @@ -5606,6 +7892,7 @@ Examples: line = _json.dumps(data, ensure_ascii=False) + "\n" if args.output == "-": import sys + sys.stdout.write(line) else: with open(args.output, "w", encoding="utf-8") as f: @@ -5615,6 +7902,7 @@ Examples: sessions = db.export_all(source=args.source) if args.output == "-": import sys + for s in sessions: sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n") else: @@ -5629,7 +7917,9 @@ Examples: print(f"Session '{args.session_id}' not found.") return if not args.yes: - if not _confirm_prompt(f"Delete session '{resolved_session_id}' and all its messages? [y/N] "): + if not _confirm_prompt( + f"Delete session '{resolved_session_id}' and all its messages? [y/N] " + ): print("Cancelled.") return if db.delete_session(resolved_session_id): @@ -5641,7 +7931,9 @@ Examples: days = args.older_than source_msg = f" from '{args.source}'" if args.source else "" if not args.yes: - if not _confirm_prompt(f"Delete all ended sessions older than {days} days{source_msg}? [y/N] "): + if not _confirm_prompt( + f"Delete all ended sessions older than {days} days{source_msg}? [y/N] " + ): print("Cancelled.") return count = db.prune_sessions(older_than_days=days, source=args.source) @@ -5665,7 +7957,9 @@ Examples: limit = getattr(args, "limit", 50) or 50 source = getattr(args, "source", None) _browse_exclude = None if source else ["tool"] - sessions = db.list_sessions_rich(source=source, exclude_sources=_browse_exclude, limit=limit) + sessions = db.list_sessions_rich( + source=source, exclude_sources=_browse_exclude, limit=limit + ) db.close() if not sessions: print("No sessions found.") @@ -5679,6 +7973,7 @@ Examples: # Launch hermes --resume by replacing the current process print(f"Resuming session: {selected_id}") import shutil + hermes_bin = shutil.which("hermes") if hermes_bin: os.execvp(hermes_bin, ["hermes", "--resume", selected_id]) @@ -5717,10 +8012,14 @@ Examples: insights_parser = subparsers.add_parser( "insights", help="Show usage insights and analytics", - description="Analyze session history to show token usage, costs, tool patterns, and activity trends" + description="Analyze session history to show token usage, costs, tool patterns, and activity trends", + ) + insights_parser.add_argument( + "--days", type=int, default=30, help="Number of days to analyze (default: 30)" + ) + insights_parser.add_argument( + "--source", help="Filter by platform (cli, telegram, discord, etc.)" ) - insights_parser.add_argument("--days", type=int, default=30, help="Number of days to analyze (default: 30)") - insights_parser.add_argument("--source", help="Filter by platform (cli, telegram, discord, etc.)") def cmd_insights(args): try: @@ -5743,7 +8042,7 @@ Examples: claw_parser = subparsers.add_parser( "claw", help="OpenClaw migration tools", - description="Migrate settings, memories, skills, and API keys from OpenClaw to Hermes" + description="Migrate settings, memories, skills, and API keys from OpenClaw to Hermes", ) claw_subparsers = claw_parser.add_subparsers(dest="claw_action") @@ -5752,47 +8051,43 @@ Examples: "migrate", help="Migrate from OpenClaw to Hermes", description="Import settings, memories, skills, and API keys from an OpenClaw installation. " - "Always shows a preview before making changes." + "Always shows a preview before making changes.", ) claw_migrate.add_argument( - "--source", - help="Path to OpenClaw directory (default: ~/.openclaw)" + "--source", help="Path to OpenClaw directory (default: ~/.openclaw)" ) claw_migrate.add_argument( "--dry-run", action="store_true", - help="Preview only — stop after showing what would be migrated" + help="Preview only — stop after showing what would be migrated", ) claw_migrate.add_argument( "--preset", choices=["user-data", "full"], default="full", - help="Migration preset (default: full). 'user-data' excludes secrets" + help="Migration preset (default: full). 'user-data' excludes secrets", ) claw_migrate.add_argument( "--overwrite", action="store_true", - help="Overwrite existing files (default: skip conflicts)" + help="Overwrite existing files (default: skip conflicts)", ) claw_migrate.add_argument( "--migrate-secrets", action="store_true", - help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)" + help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)", ) claw_migrate.add_argument( - "--workspace-target", - help="Absolute path to copy workspace instructions into" + "--workspace-target", help="Absolute path to copy workspace instructions into" ) claw_migrate.add_argument( "--skill-conflict", choices=["skip", "overwrite", "rename"], default="skip", - help="How to handle skill name conflicts (default: skip)" + help="How to handle skill name conflicts (default: skip)", ) claw_migrate.add_argument( - "--yes", "-y", - action="store_true", - help="Skip confirmation prompts" + "--yes", "-y", action="store_true", help="Skip confirmation prompts" ) # claw cleanup @@ -5800,25 +8095,23 @@ Examples: "cleanup", aliases=["clean"], help="Archive leftover OpenClaw directories after migration", - description="Scan for and archive leftover OpenClaw directories to prevent state fragmentation" + description="Scan for and archive leftover OpenClaw directories to prevent state fragmentation", ) claw_cleanup.add_argument( - "--source", - help="Path to a specific OpenClaw directory to clean up" + "--source", help="Path to a specific OpenClaw directory to clean up" ) claw_cleanup.add_argument( "--dry-run", action="store_true", - help="Preview what would be archived without making changes" + help="Preview what would be archived without making changes", ) claw_cleanup.add_argument( - "--yes", "-y", - action="store_true", - help="Skip confirmation prompts" + "--yes", "-y", action="store_true", help="Skip confirmation prompts" ) def cmd_claw(args): from hermes_cli.claw import claw_command + claw_command(args) claw_parser.set_defaults(func=cmd_claw) @@ -5826,43 +8119,40 @@ Examples: # ========================================================================= # version command # ========================================================================= - version_parser = subparsers.add_parser( - "version", - help="Show version information" - ) + version_parser = subparsers.add_parser("version", help="Show version information") version_parser.set_defaults(func=cmd_version) - + # ========================================================================= # update command # ========================================================================= update_parser = subparsers.add_parser( "update", help="Update Hermes Agent to the latest version", - description="Pull the latest changes from git and reinstall dependencies" + description="Pull the latest changes from git and reinstall dependencies", ) update_parser.add_argument( - "--gateway", action="store_true", default=False, - help="Gateway mode: use file-based IPC for prompts instead of stdin (used internally by /update)" + "--gateway", + action="store_true", + default=False, + help="Gateway mode: use file-based IPC for prompts instead of stdin (used internally by /update)", ) update_parser.set_defaults(func=cmd_update) - + # ========================================================================= # uninstall command # ========================================================================= uninstall_parser = subparsers.add_parser( "uninstall", help="Uninstall Hermes Agent", - description="Remove Hermes Agent from your system. Can keep configs/data for reinstall." + description="Remove Hermes Agent from your system. Can keep configs/data for reinstall.", ) uninstall_parser.add_argument( "--full", action="store_true", - help="Full uninstall - remove everything including configs and data" + help="Full uninstall - remove everything including configs and data", ) uninstall_parser.add_argument( - "--yes", "-y", - action="store_true", - help="Skip confirmation prompts" + "--yes", "-y", action="store_true", help="Skip confirmation prompts" ) uninstall_parser.set_defaults(func=cmd_uninstall) @@ -5879,6 +8169,7 @@ Examples: """Launch Hermes Agent as an ACP server.""" try: from acp_adapter.entry import main as acp_main + acp_main() except ImportError: print("ACP dependencies not installed.") @@ -5897,48 +8188,81 @@ Examples: profile_subparsers = profile_parser.add_subparsers(dest="profile_action") profile_subparsers.add_parser("list", help="List all profiles") - profile_use = profile_subparsers.add_parser("use", help="Set sticky default profile") + profile_use = profile_subparsers.add_parser( + "use", help="Set sticky default profile" + ) profile_use.add_argument("profile_name", help="Profile name (or 'default')") - profile_create = profile_subparsers.add_parser("create", help="Create a new profile") - profile_create.add_argument("profile_name", help="Profile name (lowercase, alphanumeric)") - profile_create.add_argument("--clone", action="store_true", - help="Copy config.yaml, .env, SOUL.md from active profile") - profile_create.add_argument("--clone-all", action="store_true", - help="Full copy of active profile (all state)") - profile_create.add_argument("--clone-from", metavar="SOURCE", - help="Source profile to clone from (default: active)") - profile_create.add_argument("--no-alias", action="store_true", - help="Skip wrapper script creation") + profile_create = profile_subparsers.add_parser( + "create", help="Create a new profile" + ) + profile_create.add_argument( + "profile_name", help="Profile name (lowercase, alphanumeric)" + ) + profile_create.add_argument( + "--clone", + action="store_true", + help="Copy config.yaml, .env, SOUL.md from active profile", + ) + profile_create.add_argument( + "--clone-all", + action="store_true", + help="Full copy of active profile (all state)", + ) + profile_create.add_argument( + "--clone-from", + metavar="SOURCE", + help="Source profile to clone from (default: active)", + ) + profile_create.add_argument( + "--no-alias", action="store_true", help="Skip wrapper script creation" + ) profile_delete = profile_subparsers.add_parser("delete", help="Delete a profile") profile_delete.add_argument("profile_name", help="Profile to delete") - profile_delete.add_argument("-y", "--yes", action="store_true", - help="Skip confirmation prompt") + profile_delete.add_argument( + "-y", "--yes", action="store_true", help="Skip confirmation prompt" + ) profile_show = profile_subparsers.add_parser("show", help="Show profile details") profile_show.add_argument("profile_name", help="Profile to show") - profile_alias = profile_subparsers.add_parser("alias", help="Manage wrapper scripts") + profile_alias = profile_subparsers.add_parser( + "alias", help="Manage wrapper scripts" + ) profile_alias.add_argument("profile_name", help="Profile name") - profile_alias.add_argument("--remove", action="store_true", - help="Remove the wrapper script") - profile_alias.add_argument("--name", dest="alias_name", metavar="NAME", - help="Custom alias name (default: profile name)") + profile_alias.add_argument( + "--remove", action="store_true", help="Remove the wrapper script" + ) + profile_alias.add_argument( + "--name", + dest="alias_name", + metavar="NAME", + help="Custom alias name (default: profile name)", + ) profile_rename = profile_subparsers.add_parser("rename", help="Rename a profile") profile_rename.add_argument("old_name", help="Current profile name") profile_rename.add_argument("new_name", help="New profile name") - profile_export = profile_subparsers.add_parser("export", help="Export a profile to archive") + profile_export = profile_subparsers.add_parser( + "export", help="Export a profile to archive" + ) profile_export.add_argument("profile_name", help="Profile to export") - profile_export.add_argument("-o", "--output", default=None, - help="Output file (default: .tar.gz)") + profile_export.add_argument( + "-o", "--output", default=None, help="Output file (default: .tar.gz)" + ) - profile_import = profile_subparsers.add_parser("import", help="Import a profile from archive") + profile_import = profile_subparsers.add_parser( + "import", help="Import a profile from archive" + ) profile_import.add_argument("archive", help="Path to .tar.gz archive") - profile_import.add_argument("--name", dest="import_name", metavar="NAME", - help="Profile name (default: inferred from archive)") + profile_import.add_argument( + "--name", + dest="import_name", + metavar="NAME", + help="Profile name (default: inferred from archive)", + ) profile_parser.set_defaults(func=cmd_profile) @@ -5950,7 +8274,10 @@ Examples: help="Print shell completion script (bash, zsh, or fish)", ) completion_parser.add_argument( - "shell", nargs="?", default="bash", choices=["bash", "zsh", "fish"], + "shell", + nargs="?", + default="bash", + choices=["bash", "zsh", "fish"], help="Shell type (default: bash)", ) completion_parser.set_defaults(func=lambda args: cmd_completion(args, parser)) @@ -5963,11 +8290,18 @@ Examples: help="Start the web UI dashboard", description="Launch the Hermes Agent web dashboard for managing config, API keys, and sessions", ) - dashboard_parser.add_argument("--port", type=int, default=9119, help="Port (default 9119)") - dashboard_parser.add_argument("--host", default="127.0.0.1", help="Host (default 127.0.0.1)") - dashboard_parser.add_argument("--no-open", action="store_true", help="Don't open browser automatically") dashboard_parser.add_argument( - "--insecure", action="store_true", + "--port", type=int, default=9119, help="Port (default 9119)" + ) + dashboard_parser.add_argument( + "--host", default="127.0.0.1", help="Host (default 127.0.0.1)" + ) + dashboard_parser.add_argument( + "--no-open", action="store_true", help="Don't open browser automatically" + ) + dashboard_parser.add_argument( + "--insecure", + action="store_true", help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)", ) dashboard_parser.set_defaults(func=cmd_dashboard) @@ -5995,31 +8329,42 @@ Examples: """, ) logs_parser.add_argument( - "log_name", nargs="?", default="agent", + "log_name", + nargs="?", + default="agent", help="Log to view: agent (default), errors, gateway, or 'list' to show available files", ) logs_parser.add_argument( - "-n", "--lines", type=int, default=50, + "-n", + "--lines", + type=int, + default=50, help="Number of lines to show (default: 50)", ) logs_parser.add_argument( - "-f", "--follow", action="store_true", + "-f", + "--follow", + action="store_true", help="Follow the log in real time (like tail -f)", ) logs_parser.add_argument( - "--level", metavar="LEVEL", + "--level", + metavar="LEVEL", help="Minimum log level to show (DEBUG, INFO, WARNING, ERROR)", ) logs_parser.add_argument( - "--session", metavar="ID", + "--session", + metavar="ID", help="Filter lines containing this session ID substring", ) logs_parser.add_argument( - "--since", metavar="TIME", + "--since", + metavar="TIME", help="Show lines since TIME ago (e.g. 1h, 30m, 2d)", ) logs_parser.add_argument( - "--component", metavar="NAME", + "--component", + metavar="NAME", help="Filter by component: gateway, agent, tools, cli, cron", ) logs_parser.set_defaults(func=cmd_logs) @@ -6036,6 +8381,7 @@ Examples: # --help, unrecognised flags, and every subcommand are forwarded # transparently instead of being intercepted by argparse on the host. from hermes_cli.config import get_container_exec_info + container_info = get_container_exec_info() if container_info: _exec_in_container(container_info, sys.argv[1:]) @@ -6044,42 +8390,88 @@ Examples: sys.exit(1) _processed_argv = _coalesce_session_name_args(sys.argv[1:]) - args = parser.parse_args(_processed_argv) + + # ── Defensive subparser routing (bpo-9338 workaround) ─────────── + # On some Python versions (notably <3.11), argparse fails to route + # subcommand tokens when the parent parser has nargs='?' optional + # arguments (--continue). The symptom: "unrecognized arguments: model" + # even though 'model' is a registered subcommand. + # + # Fix: when argv contains a token matching a known subcommand, set + # subparsers.required=True to force deterministic routing. If that + # fails (e.g. 'hermes -c model' where 'model' is consumed as the + # session name for --continue), fall back to the default behaviour. + import io as _io + + _known_cmds = ( + set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set() + ) + _has_cmd_token = any( + t in _known_cmds for t in _processed_argv if not t.startswith("-") + ) + + if _has_cmd_token: + subparsers.required = True + _saved_stderr = sys.stderr + try: + sys.stderr = _io.StringIO() + args = parser.parse_args(_processed_argv) + sys.stderr = _saved_stderr + except SystemExit as exc: + sys.stderr = _saved_stderr + # Help/version flags (exit code 0) already printed output — + # re-raise immediately to avoid a second parse_args printing + # the same help text again (#10230). + if exc.code == 0: + raise + # Subcommand name was consumed as a flag value (e.g. -c model). + # Fall back to optional subparsers so argparse handles it normally. + subparsers.required = False + args = parser.parse_args(_processed_argv) + else: + subparsers.required = False + args = parser.parse_args(_processed_argv) # Handle --version flag if args.version: cmd_version(args) return - + # Handle top-level --resume / --continue as shortcut to chat if (args.resume or args.continue_last) and args.command is None: args.command = "chat" - args.query = None - args.model = None - args.provider = None - args.toolsets = None - args.verbose = False - if not hasattr(args, "worktree"): - args.worktree = False + for attr, default in [ + ("query", None), + ("model", None), + ("provider", None), + ("toolsets", None), + ("verbose", False), + ("worktree", False), + ]: + if not hasattr(args, attr): + setattr(args, attr, default) cmd_chat(args) return - + # Default to chat if no command specified if args.command is None: - args.query = None - args.model = None - args.provider = None - args.toolsets = None - args.verbose = False - args.resume = None - args.continue_last = None - if not hasattr(args, "worktree"): - args.worktree = False + for attr, default in [ + ("query", None), + ("model", None), + ("provider", None), + ("toolsets", None), + ("verbose", False), + ("resume", None), + ("continue_last", None), + ("worktree", False), + ]: + if not hasattr(args, attr): + setattr(args, attr, default) cmd_chat(args) return - + # Execute the command - if hasattr(args, 'func'): + if hasattr(args, "func"): args.func(args) else: parser.print_help() diff --git a/hermes_cli/mcp_config.py b/hermes_cli/mcp_config.py index b21234ce0a..ae845b069b 100644 --- a/hermes_cli/mcp_config.py +++ b/hermes_cli/mcp_config.py @@ -279,8 +279,8 @@ def cmd_mcp_add(args): _info(f"Starting OAuth flow for '{name}'...") oauth_ok = False try: - from tools.mcp_oauth import build_oauth_auth - oauth_auth = build_oauth_auth(name, url) + from tools.mcp_oauth_manager import get_manager + oauth_auth = get_manager().get_or_build_provider(name, url, None) if oauth_auth: server_config["auth"] = "oauth" _success("OAuth configured (tokens will be acquired on first connection)") @@ -428,10 +428,12 @@ def cmd_mcp_remove(args): _remove_mcp_server(name) _success(f"Removed '{name}' from config") - # Clean up OAuth tokens if they exist + # Clean up OAuth tokens if they exist — route through MCPOAuthManager so + # any provider instance cached in the current process (e.g. from an + # earlier `hermes mcp test` in the same session) is evicted too. try: - from tools.mcp_oauth import remove_oauth_tokens - remove_oauth_tokens(name) + from tools.mcp_oauth_manager import get_manager + get_manager().remove(name) _success("Cleaned up OAuth tokens") except Exception: pass @@ -577,6 +579,63 @@ def _interpolate_value(value: str) -> str: return re.sub(r"\$\{(\w+)\}", _replace, value) +# ─── hermes mcp login ──────────────────────────────────────────────────────── + +def cmd_mcp_login(args): + """Force re-authentication for an OAuth-based MCP server. + + Deletes cached tokens (both on disk and in the running process's + MCPOAuthManager cache) and triggers a fresh OAuth flow via the + existing probe path. + + Use this when: + - Tokens are stuck in a bad state (server revoked, refresh token + consumed by an external process, etc.) + - You want to re-authenticate to change scopes or account + - A tool call returned ``needs_reauth: true`` + """ + name = args.name + servers = _get_mcp_servers() + + if name not in servers: + _error(f"Server '{name}' not found in config.") + if servers: + _info(f"Available servers: {', '.join(servers)}") + return + + server_config = servers[name] + url = server_config.get("url") + if not url: + _error(f"Server '{name}' has no URL — not an OAuth-capable server") + return + if server_config.get("auth") != "oauth": + _error(f"Server '{name}' is not configured for OAuth (auth={server_config.get('auth')})") + _info("Use `hermes mcp remove` + `hermes mcp add` to reconfigure auth.") + return + + # Wipe both disk and in-memory cache so the next probe forces a fresh + # OAuth flow. + try: + from tools.mcp_oauth_manager import get_manager + mgr = get_manager() + mgr.remove(name) + except Exception as exc: + _warning(f"Could not clear existing OAuth state: {exc}") + + print() + _info(f"Starting OAuth flow for '{name}'...") + + # Probe triggers the OAuth flow (browser redirect + callback capture). + try: + tools = _probe_single_server(name, server_config) + if tools: + _success(f"Authenticated — {len(tools)} tool(s) available") + else: + _success("Authenticated (server reported no tools)") + except Exception as exc: + _error(f"Authentication failed: {exc}") + + # ─── hermes mcp configure ──────────────────────────────────────────────────── def cmd_mcp_configure(args): @@ -696,6 +755,7 @@ def mcp_command(args): "test": cmd_mcp_test, "configure": cmd_mcp_configure, "config": cmd_mcp_configure, + "login": cmd_mcp_login, } handler = handlers.get(action) @@ -713,4 +773,5 @@ def mcp_command(args): _info("hermes mcp list List servers") _info("hermes mcp test Test connection") _info("hermes mcp configure Toggle tools") + _info("hermes mcp login Re-authenticate OAuth") print() diff --git a/hermes_cli/memory_setup.py b/hermes_cli/memory_setup.py index e6a61316a7..88186b8ec6 100644 --- a/hermes_cli/memory_setup.py +++ b/hermes_cli/memory_setup.py @@ -58,9 +58,11 @@ def _prompt(label: str, default: str | None = None, secret: bool = False) -> str def _install_dependencies(provider_name: str) -> None: """Install pip dependencies declared in plugin.yaml.""" import subprocess - from pathlib import Path as _Path + from plugins.memory import find_provider_dir - plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name + plugin_dir = find_provider_dir(provider_name) + if not plugin_dir: + return yaml_path = plugin_dir / "plugin.yaml" if not yaml_path.exists(): return diff --git a/hermes_cli/model_normalize.py b/hermes_cli/model_normalize.py index 40afe003bc..76dace065a 100644 --- a/hermes_cli/model_normalize.py +++ b/hermes_cli/model_normalize.py @@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({ "qwen-oauth", "xiaomi", "arcee", + "ollama-cloud", "custom", }) @@ -373,7 +374,26 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str: return bare return _dots_to_hyphens(bare) - # --- Copilot: strip matching provider prefix, keep dots --- + # --- Copilot / Copilot ACP: delegate to the Copilot-specific + # normalizer. It knows about the alias table (vendor-prefix + # stripping for Anthropic/OpenAI, dash-to-dot repair for Claude) + # and live-catalog lookups. Without this, vendor-prefixed or + # dash-notation Claude IDs survive to the Copilot API and hit + # HTTP 400 "model_not_supported". See issue #6879. + if provider in {"copilot", "copilot-acp"}: + try: + from hermes_cli.models import normalize_copilot_model_id + + normalized = normalize_copilot_model_id(name) + if normalized: + return normalized + except Exception: + # Fall through to the generic strip-vendor behaviour below + # if the Copilot-specific path is unavailable for any reason. + pass + + # --- Copilot / Copilot ACP / openai-codex fallback: + # strip matching provider prefix, keep dots --- if provider in _STRIP_VENDOR_ONLY_PROVIDERS: stripped = _strip_matching_provider_prefix(name, provider) if stripped == name and name.startswith("openai/"): diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 699bde23e9..f5dcbc49da 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -274,6 +274,11 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]: is_global = False explicit_provider = "" + # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash) + # A single Unicode dash before a flag keyword becomes "--" + import re as _re + raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args) + # Extract --global if "--global" in raw_args: is_global = True @@ -452,6 +457,7 @@ def switch_model( ModelSwitchResult with all information the caller needs. """ from hermes_cli.models import ( + copilot_model_api_mode, detect_provider_for_model, validate_requested_model, opencode_model_api_mode, @@ -686,12 +692,12 @@ def switch_model( api_key=api_key, base_url=base_url, ) - except Exception: + except Exception as e: validation = { - "accepted": True, - "persist": True, + "accepted": False, + "persist": False, "recognized": False, - "message": None, + "message": f"Could not validate `{new_model}`: {e}", } if not validation.get("accepted"): @@ -709,14 +715,34 @@ def switch_model( if validation.get("corrected_model"): new_model = validation["corrected_model"] + # --- Copilot api_mode override --- + if target_provider in {"copilot", "github-copilot"}: + api_mode = copilot_model_api_mode(new_model, api_key=api_key) + # --- OpenCode api_mode override --- - if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}: + if target_provider in {"opencode-zen", "opencode-go", "opencode"}: api_mode = opencode_model_api_mode(target_provider, new_model) # --- Determine api_mode if not already set --- if not api_mode: api_mode = determine_api_mode(target_provider, base_url) + # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the + # Anthropic SDK prepends its own /v1/messages to the base_url. Strip the + # trailing /v1 so the SDK constructs the correct path (e.g. + # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages). + # Mirrors the same logic in hermes_cli.runtime_provider.resolve_runtime_provider; + # without it, /model switches into an anthropic_messages-routed OpenCode + # model (e.g. `/model minimax-m2.7` on opencode-go, `/model claude-sonnet-4-6` + # on opencode-zen) hit a double /v1 and returned OpenCode's website 404 page. + if ( + api_mode == "anthropic_messages" + and target_provider in {"opencode-zen", "opencode-go"} + and isinstance(base_url, str) + and base_url + ): + base_url = re.sub(r"/v1/?$", "", base_url) + # --- Get capabilities (legacy) --- capabilities = get_model_capabilities(target_provider, new_model) @@ -786,7 +812,8 @@ def list_authenticated_providers( from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS results: List[dict] = [] - seen_slugs: set = set() + seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545) + seen_mdev_ids: set = set() # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn) data = fetch_models_dev() @@ -796,9 +823,18 @@ def list_authenticated_providers( # "nous" shares OpenRouter's curated list if not separately defined if "nous" not in curated: curated["nous"] = curated["openrouter"] + # Ollama Cloud uses dynamic discovery (no static curated list) + if "ollama-cloud" not in curated: + from hermes_cli.models import fetch_ollama_cloud_models + curated["ollama-cloud"] = fetch_ollama_cloud_models() # --- 1. Check Hermes-mapped providers --- for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items(): + # Skip aliases that map to the same models.dev provider (e.g. + # kimi-coding and kimi-coding-cn both → kimi-for-coding). + # The first one with valid credentials wins (#10526). + if mdev_id in seen_mdev_ids: + continue pdata = data.get(mdev_id) if not isinstance(pdata, dict): continue @@ -837,7 +873,8 @@ def list_authenticated_providers( "total_models": total, "source": "built-in", }) - seen_slugs.add(slug) + seen_slugs.add(slug.lower()) + seen_mdev_ids.add(mdev_id) # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) --- from hermes_cli.providers import HERMES_OVERLAYS @@ -849,12 +886,12 @@ def list_authenticated_providers( _mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()} for pid, overlay in HERMES_OVERLAYS.items(): - if pid in seen_slugs: + if pid.lower() in seen_slugs: continue # Resolve Hermes slug — e.g. "github-copilot" → "copilot" hermes_slug = _mdev_to_hermes.get(pid, pid) - if hermes_slug in seen_slugs: + if hermes_slug.lower() in seen_slugs: continue # Check if credentials exist @@ -935,8 +972,8 @@ def list_authenticated_providers( "total_models": total, "source": "hermes", }) - seen_slugs.add(pid) - seen_slugs.add(hermes_slug) + seen_slugs.add(pid.lower()) + seen_slugs.add(hermes_slug.lower()) # --- 2b. Cross-check canonical provider list --- # Catches providers that are in CANONICAL_PROVIDERS but weren't found @@ -948,7 +985,7 @@ def list_authenticated_providers( _canon_provs = [] for _cp in _canon_provs: - if _cp.slug in seen_slugs: + if _cp.slug.lower() in seen_slugs: continue # Check credentials via PROVIDER_REGISTRY (auth.py) @@ -995,24 +1032,52 @@ def list_authenticated_providers( "total_models": _cp_total, "source": "canonical", }) - seen_slugs.add(_cp.slug) + seen_slugs.add(_cp.slug.lower()) # --- 3. User-defined endpoints from config --- + # Track (name, base_url) of what section 3 emits so section 4 can skip + # any overlapping ``custom_providers:`` entries. Callers typically pass + # both (gateway/CLI invoke ``get_compatible_custom_providers()`` which + # merges ``providers:`` into the list) — without this, the same endpoint + # produces two picker rows: one bare-slug ("openrouter") from section 3 + # and one "custom:openrouter" from section 4, both labelled identically. + _section3_emitted_pairs: set = set() if user_providers and isinstance(user_providers, dict): for ep_name, ep_cfg in user_providers.items(): if not isinstance(ep_cfg, dict): continue + # Skip if this slug was already emitted (e.g. canonical provider + # with the same name) or will be picked up by section 4. + if ep_name.lower() in seen_slugs: + continue display_name = ep_cfg.get("name", "") or ep_name - api_url = ep_cfg.get("api", "") or ep_cfg.get("url", "") or "" - default_model = ep_cfg.get("default_model", "") + # ``base_url`` is Hermes's canonical write key (matches + # custom_providers and _save_custom_provider); ``api`` / ``url`` + # remain as fallbacks for hand-edited / legacy configs. + api_url = ( + ep_cfg.get("base_url", "") + or ep_cfg.get("api", "") + or ep_cfg.get("url", "") + or "" + ) + # ``default_model`` is the legacy key; ``model`` matches what + # custom_providers entries use, so accept either. + default_model = ep_cfg.get("default_model", "") or ep_cfg.get("model", "") # Build models list from both default_model and full models array models_list = [] if default_model: models_list.append(default_model) - # Also include the full models list from config + # Also include the full models list from config. + # Hermes writes ``models:`` as a dict keyed by model id + # (see hermes_cli/main.py::_save_custom_provider); older + # configs or hand-edited files may still use a list. cfg_models = ep_cfg.get("models", []) - if isinstance(cfg_models, list): + if isinstance(cfg_models, dict): + for m in cfg_models: + if m and m not in models_list: + models_list.append(m) + elif isinstance(cfg_models, list): for m in cfg_models: if m and m not in models_list: models_list.append(m) @@ -1029,6 +1094,13 @@ def list_authenticated_providers( "source": "user-config", "api_url": api_url, }) + seen_slugs.add(ep_name.lower()) + _pair = ( + str(display_name).strip().lower(), + str(api_url).strip().rstrip("/").lower(), + ) + if _pair[0] and _pair[1]: + _section3_emitted_pairs.add(_pair) # --- 4. Saved custom providers from config --- # Each ``custom_providers`` entry represents one model under a named @@ -1063,12 +1135,40 @@ def list_authenticated_providers( "api_url": api_url, "models": [], } + # The singular ``model:`` field only holds the currently + # active model. Hermes's own writer (main.py::_save_custom_provider) + # stores every configured model as a dict under ``models:``; + # downstream readers (agent/models_dev.py, gateway/run.py, + # run_agent.py, hermes_cli/config.py) already consume that dict. + # The /model picker previously ignored it, so multi-model + # custom providers appeared to have only the active model. default_model = (entry.get("model") or "").strip() if default_model and default_model not in groups[slug]["models"]: groups[slug]["models"].append(default_model) + cfg_models = entry.get("models", {}) + if isinstance(cfg_models, dict): + for m in cfg_models: + if m and m not in groups[slug]["models"]: + groups[slug]["models"].append(m) + elif isinstance(cfg_models, list): + for m in cfg_models: + if m and m not in groups[slug]["models"]: + groups[slug]["models"].append(m) + for slug, grp in groups.items(): - if slug in seen_slugs: + if slug.lower() in seen_slugs: + continue + # Skip if section 3 already emitted this endpoint under its + # ``providers:`` dict key — matches on (display_name, base_url), + # the tuple section 4 groups by. Prevents two picker rows + # labelled identically when callers pass both ``user_providers`` + # and a compatibility-merged ``custom_providers`` list. + _pair_key = ( + str(grp["name"]).strip().lower(), + str(grp["api_url"]).strip().rstrip("/").lower(), + ) + if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs: continue results.append({ "slug": slug, @@ -1080,11 +1180,9 @@ def list_authenticated_providers( "source": "user-config", "api_url": grp["api_url"], }) - seen_slugs.add(slug) + seen_slugs.add(slug.lower()) # Sort: current provider first, then by model count descending results.sort(key=lambda r: (not r["is_current"], -r["total_models"])) return results - - diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 852601229e..6413c35fdf 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -11,9 +11,17 @@ import json import os import urllib.request import urllib.error +import time from difflib import get_close_matches +from pathlib import Path from typing import Any, NamedTuple, Optional +from hermes_cli import __version__ as _HERMES_VERSION + +# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity +# Check (error 1010) don't reject the default ``Python-urllib/*`` signature. +_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}" + COPILOT_BASE_URL = "https://api.githubcopilot.com" COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models" COPILOT_EDITOR_VERSION = "vscode/1.104.1" @@ -24,7 +32,9 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] # Fallback OpenRouter snapshot used when the live catalog is unavailable. # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ - ("anthropic/claude-opus-4.6", "recommended"), + ("moonshotai/kimi-k2.5", "recommended"), + ("anthropic/claude-opus-4.7", ""), + ("anthropic/claude-opus-4.6", ""), ("anthropic/claude-sonnet-4.6", ""), ("qwen/qwen3.6-plus", ""), ("anthropic/claude-sonnet-4.5", ""), @@ -44,8 +54,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("minimax/minimax-m2.7", ""), ("minimax/minimax-m2.5", ""), ("z-ai/glm-5.1", ""), + ("z-ai/glm-5v-turbo", ""), ("z-ai/glm-5-turbo", ""), - ("moonshotai/kimi-k2.5", ""), ("x-ai/grok-4.20", ""), ("nvidia/nemotron-3-super-120b-a12b", ""), ("nvidia/nemotron-3-super-120b-a12b:free", "free"), @@ -71,7 +81,9 @@ def _codex_curated_models() -> list[str]: _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ + "moonshotai/kimi-k2.5", "xiaomi/mimo-v2-pro", + "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-sonnet-4.5", @@ -89,8 +101,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "minimax/minimax-m2.7", "minimax/minimax-m2.5", "z-ai/glm-5.1", + "z-ai/glm-5v-turbo", "z-ai/glm-5-turbo", - "moonshotai/kimi-k2.5", "x-ai/grok-4.20-beta", "nvidia/nemotron-3-super-120b-a12b", "nvidia/nemotron-3-super-120b-a12b:free", @@ -122,39 +134,45 @@ _PROVIDER_MODELS: dict[str, list[str]] = { ], "gemini": [ "gemini-3.1-pro-preview", + "gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", - "gemini-2.5-pro", - "gemini-2.5-flash", - "gemini-2.5-flash-lite", - # Gemma open models (also served via AI Studio) - "gemma-4-31b-it", - "gemma-4-26b-it", + ], + "google-gemini-cli": [ + "gemini-3.1-pro-preview", + "gemini-3-pro-preview", + "gemini-3-flash-preview", ], "zai": [ "glm-5.1", "glm-5", + "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5", "glm-4.5-flash", ], "xai": [ - "grok-4.20-0309-reasoning", - "grok-4.20-0309-non-reasoning", - "grok-4.20-multi-agent-0309", + "grok-4.20-reasoning", "grok-4-1-fast-reasoning", - "grok-4-1-fast-non-reasoning", - "grok-4-fast-reasoning", - "grok-4-fast-non-reasoning", - "grok-4-0709", - "grok-code-fast-1", - "grok-3", - "grok-3-mini", + ], + "nvidia": [ + # NVIDIA flagship reasoning models + "nvidia/nemotron-3-super-120b-a12b", + "nvidia/nemotron-3-nano-30b-a3b", + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + # Third-party agentic models hosted on build.nvidia.com + # (map to OpenRouter defaults — users get familiar picks on NIM) + "qwen/qwen3.5-397b-a17b", + "deepseek-ai/deepseek-v3.2", + "moonshotai/kimi-k2.5", + "minimaxai/minimax-m2.5", + "z-ai/glm5", + "openai/gpt-oss-120b", ], "kimi-coding": [ - "kimi-for-coding", "kimi-k2.5", + "kimi-for-coding", "kimi-k2-thinking", "kimi-k2-thinking-turbo", "kimi-k2-turbo-preview", @@ -185,6 +203,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "MiniMax-M2", ], "anthropic": [ + "claude-opus-4-7", "claude-opus-4-6", "claude-sonnet-4-6", "claude-opus-4-5-20251101", @@ -208,10 +227,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "trinity-mini", ], "opencode-zen": [ + "kimi-k2.5", "gpt-5.4-pro", "gpt-5.4", "gpt-5.3-codex", - "gpt-5.3-codex-spark", "gpt-5.2", "gpt-5.2-codex", "gpt-5.1", @@ -239,15 +258,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "glm-5", "glm-4.7", "glm-4.6", - "kimi-k2.5", "kimi-k2-thinking", "kimi-k2", "qwen3-coder", "big-pickle", ], "opencode-go": [ - "glm-5", "kimi-k2.5", + "glm-5.1", + "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", @@ -280,26 +299,42 @@ _PROVIDER_MODELS: dict[str, list[str]] = { # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat) # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat). "alibaba": [ + "kimi-k2.5", "qwen3.5-plus", "qwen3-coder-plus", "qwen3-coder-next", # Third-party models available on coding-intl "glm-5", "glm-4.7", - "kimi-k2.5", "MiniMax-M2.5", ], # Curated HF model list — only agentic models that map to OpenRouter defaults. "huggingface": [ + "moonshotai/Kimi-K2.5", "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3.5-35B-A3B", "deepseek-ai/DeepSeek-V3.2", - "moonshotai/Kimi-K2.5", "MiniMaxAI/MiniMax-M2.5", "zai-org/GLM-5", "XiaomiMiMo/MiMo-V2-Flash", "moonshotai/Kimi-K2-Thinking", ], + # AWS Bedrock — static fallback list used when dynamic discovery is + # unavailable (no boto3, no credentials, or API error). The agent + # prefers live discovery via ListFoundationModels + ListInferenceProfiles. + # Use inference profile IDs (us.*) since most models require them. + "bedrock": [ + "us.anthropic.claude-sonnet-4-6", + "us.anthropic.claude-opus-4-6-v1", + "us.anthropic.claude-haiku-4-5-20251001-v1:0", + "us.anthropic.claude-sonnet-4-5-20250929-v1:0", + "us.amazon.nova-pro-v1:0", + "us.amazon.nova-lite-v1:0", + "us.amazon.nova-micro-v1:0", + "deepseek.v3.2", + "us.meta.llama4-maverick-17b-instruct-v1:0", + "us.meta.llama4-scout-17b-instruct-v1:0", + ], } # --------------------------------------------------------------------------- @@ -515,30 +550,35 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"), + ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"), ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"), - ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"), + ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — native Gemini API)"), + ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"), ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"), ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"), ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"), - ProviderEntry("kimi-coding", "Kimi / Moonshot", "Kimi / Moonshot (Moonshot AI direct API)"), + ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"), ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"), ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"), ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"), ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"), + ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"), ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"), ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"), ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"), ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"), + ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"), ] # Derived dicts — used throughout the codebase _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS} _PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider + _PROVIDER_ALIASES = { "glm": "zai", "z-ai": "zai", @@ -579,14 +619,26 @@ _PROVIDER_ALIASES = { "qwen": "alibaba", "alibaba-cloud": "alibaba", "qwen-portal": "qwen-oauth", + "gemini-cli": "google-gemini-cli", + "gemini-oauth": "google-gemini-cli", "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", + "aws": "bedrock", + "aws-bedrock": "bedrock", + "amazon-bedrock": "bedrock", + "amazon": "bedrock", "grok": "xai", "x-ai": "xai", "x.ai": "xai", + "nim": "nvidia", + "nvidia-nim": "nvidia", + "build-nvidia": "nvidia", + "nemotron": "nvidia", + "ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud + "ollama_cloud": "ollama-cloud", } @@ -1023,7 +1075,7 @@ def detect_provider_for_model( return (resolved_provider, default_models[0]) # Aggregators list other providers' models — never auto-switch TO them - _AGGREGATORS = {"nous", "openrouter"} + _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"} # If the model belongs to the current provider's catalog, don't suggest switching current_models = _PROVIDER_MODELS.get(current_provider, []) @@ -1040,7 +1092,8 @@ def detect_provider_for_model( break if direct_match: - # Check if we have credentials for this provider + # Check if we have credentials for this provider — env vars, + # credential pool, or auth store entries. has_creds = False try: from hermes_cli.auth import PROVIDER_REGISTRY @@ -1053,16 +1106,28 @@ def detect_provider_for_model( break except Exception: pass + # Also check credential pool and auth store — covers OAuth, + # Claude Code tokens, and other non-env-var credentials (#10300). + if not has_creds: + try: + from agent.credential_pool import load_pool + pool = load_pool(direct_match) + if pool.has_credentials(): + has_creds = True + except Exception: + pass + if not has_creds: + try: + from hermes_cli.auth import _load_auth_store + store = _load_auth_store() + if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}): + has_creds = True + except Exception: + pass - if has_creds: - return (direct_match, name) - - # No direct creds — try to find this model on OpenRouter instead - or_slug = _find_openrouter_slug(name) - if or_slug: - return ("openrouter", or_slug) - # Still return the direct provider — credential resolution will - # give a clear error rather than silently using the wrong provider + # Always return the direct provider match. If credentials are + # missing, the client init will give a clear error rather than + # silently routing through the wrong provider (#10300). return (direct_match, name) # --- Step 2: check OpenRouter catalog --- @@ -1252,6 +1317,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) live = _fetch_ai_gateway_models() if live: return live + if normalized == "ollama-cloud": + live = fetch_ollama_cloud_models(force_refresh=force_refresh) + if live: + return live if normalized == "custom": base_url = _get_custom_base_url() if base_url: @@ -1438,6 +1507,19 @@ _COPILOT_MODEL_ALIASES = { "anthropic/claude-sonnet-4.6": "claude-sonnet-4.6", "anthropic/claude-sonnet-4.5": "claude-sonnet-4.5", "anthropic/claude-haiku-4.5": "claude-haiku-4.5", + # Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use + # hyphens (anthropic native format), but Copilot's API only accepts + # dot-notation. Accept both so users who configure copilot + a + # default hyphenated Claude model don't hit HTTP 400 + # "model_not_supported". See issue #6879. + "claude-opus-4-6": "claude-opus-4.6", + "claude-sonnet-4-6": "claude-sonnet-4.6", + "claude-sonnet-4-5": "claude-sonnet-4.5", + "claude-haiku-4-5": "claude-haiku-4.5", + "anthropic/claude-opus-4-6": "claude-opus-4.6", + "anthropic/claude-sonnet-4-6": "claude-sonnet-4.6", + "anthropic/claude-sonnet-4-5": "claude-sonnet-4.5", + "anthropic/claude-haiku-4-5": "claude-haiku-4.5", } @@ -1536,6 +1618,11 @@ def copilot_model_api_mode( primary signal. Falls back to the catalog's ``supported_endpoints`` only for models not covered by the pattern check. """ + # Fetch the catalog once so normalize + endpoint check share it + # (avoids two redundant network calls for non-GPT-5 models). + if catalog is None and api_key: + catalog = fetch_github_model_catalog(api_key=api_key) + normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key) if not normalized: return "chat_completions" @@ -1545,9 +1632,6 @@ def copilot_model_api_mode( return "codex_responses" # Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.) - if catalog is None and api_key: - catalog = fetch_github_model_catalog(api_key=api_key) - if catalog: catalog_entry = next((item for item in catalog if item.get("id") == normalized), None) if isinstance(catalog_entry, dict): @@ -1690,7 +1774,7 @@ def probe_api_models( candidates.append((alternate_base, True)) tried: list[str] = [] - headers: dict[str, str] = {} + headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT} if api_key: headers["Authorization"] = f"Bearer {api_key}" if normalized.startswith(COPILOT_BASE_URL): @@ -1762,6 +1846,125 @@ def fetch_api_models( return probe_api_models(api_key, base_url, timeout=timeout).get("models") +# --------------------------------------------------------------------------- +# Ollama Cloud — merged model discovery with disk cache +# --------------------------------------------------------------------------- + + + +_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour + + +def _ollama_cloud_cache_path() -> Path: + """Return the path for the Ollama Cloud model cache.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "ollama_cloud_models_cache.json" + + +def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]: + """Load cached Ollama Cloud models from disk. + + Args: + ignore_ttl: If True, return data even if the TTL has expired (stale fallback). + """ + try: + cache_path = _ollama_cloud_cache_path() + if not cache_path.exists(): + return None + with open(cache_path, encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, dict): + return None + models = data.get("models") + if not (isinstance(models, list) and models): + return None + if not ignore_ttl: + cached_at = data.get("cached_at", 0) + if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL: + return None # stale + return data + except Exception: + pass + return None + + +def _save_ollama_cloud_cache(models: list[str]) -> None: + """Persist the merged Ollama Cloud model list to disk.""" + try: + from utils import atomic_json_write + cache_path = _ollama_cloud_cache_path() + cache_path.parent.mkdir(parents=True, exist_ok=True) + atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None) + except Exception: + pass + + +def fetch_ollama_cloud_models( + api_key: Optional[str] = None, + base_url: Optional[str] = None, + *, + force_refresh: bool = False, +) -> list[str]: + """Fetch Ollama Cloud models by merging live API + models.dev, with disk cache. + + Resolution order: + 1. Disk cache (if fresh, < 1 hour, and not force_refresh) + 2. Live ``/v1/models`` endpoint (primary — freshest source) + 3. models.dev registry (secondary — fills gaps for unlisted models) + 4. Merge: live models first, then models.dev additions (deduped) + + Returns a list of model IDs (never None — empty list on total failure). + """ + # 1. Check disk cache + if not force_refresh: + cached = _load_ollama_cloud_cache() + if cached is not None: + return cached["models"] + + # 2. Live API probe + if not api_key: + api_key = os.getenv("OLLAMA_API_KEY", "") + if not base_url: + base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1" + + live_models: list[str] = [] + if api_key: + result = fetch_api_models(api_key, base_url, timeout=8.0) + if result: + live_models = result + + # 3. models.dev registry + mdev_models: list[str] = [] + try: + from agent.models_dev import list_agentic_models + mdev_models = list_agentic_models("ollama-cloud") + except Exception: + pass + + # 4. Merge: live first, then models.dev additions (deduped, order-preserving) + if live_models or mdev_models: + seen: set[str] = set() + merged: list[str] = [] + for m in live_models: + if m and m not in seen: + seen.add(m) + merged.append(m) + for m in mdev_models: + if m and m not in seen: + seen.add(m) + merged.append(m) + if merged: + _save_ollama_cloud_cache(merged) + return merged + + # Total failure — return stale cache if available (ignore TTL) + stale = _load_ollama_cloud_cache(ignore_ttl=True) + if stale is not None: + return stale["models"] + + return [] + + def validate_requested_model( model_name: str, provider: Optional[str], @@ -1848,8 +2051,8 @@ def validate_requested_model( ) return { - "accepted": True, - "persist": True, + "accepted": False, + "persist": False, "recognized": False, "message": message, } @@ -1862,8 +2065,8 @@ def validate_requested_model( message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`" return { - "accepted": True, - "persist": True, + "accepted": False, + "persist": False, "recognized": False, "message": message, } @@ -1896,14 +2099,58 @@ def validate_requested_model( suggestion_text = "" if suggestions: suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) + return { + "accepted": False, + "persist": False, + "recognized": False, + "message": ( + f"Model `{requested}` was not found in the OpenAI Codex model listing." + f"{suggestion_text}" + ), + } + + # MiniMax providers don't expose a /models endpoint — validate against + # the static catalog instead, similar to openai-codex. + if normalized in ("minimax", "minimax-cn"): + try: + catalog_models = provider_model_ids(normalized) + except Exception: + catalog_models = [] + if catalog_models: + # Case-insensitive lookup (catalog uses mixed case like MiniMax-M2.7) + catalog_lower = {m.lower(): m for m in catalog_models} + if requested_for_lookup.lower() in catalog_lower: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + # Auto-correct close matches (case-insensitive) + catalog_lower_list = list(catalog_lower.keys()) + auto = get_close_matches(requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9) + if auto: + corrected = catalog_lower[auto[0]] + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": corrected, + "message": f"Auto-corrected `{requested}` → `{corrected}`", + } + suggestions = get_close_matches(requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join(f"`{catalog_lower[s]}`" for s in suggestions) return { "accepted": True, "persist": True, "recognized": False, "message": ( - f"Note: `{requested}` was not found in the OpenAI Codex model listing. " - f"It may still work if your account has access to it." + f"Note: `{requested}` was not found in the MiniMax catalog." f"{suggestion_text}" + "\n MiniMax does not expose a /models endpoint, so Hermes cannot verify the model name." + "\n The model may still work if it exists on the server." ), } @@ -1941,23 +2188,58 @@ def validate_requested_model( if suggestions: suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) + return { + "accepted": False, + "persist": False, + "recognized": False, + "message": ( + f"Model `{requested}` was not found in this provider's model listing." + f"{suggestion_text}" + ), + } + + # api_models is None — couldn't reach API. Accept and persist, + # but warn so typos don't silently break things. + + # Bedrock: use our own discovery instead of HTTP /models endpoint. + # Bedrock's bedrock-runtime URL doesn't support /models — it uses the + # AWS SDK control plane (ListFoundationModels + ListInferenceProfiles). + if normalized == "bedrock": + try: + from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region + region = resolve_bedrock_region() + discovered = discover_bedrock_models(region) + discovered_ids = {m["id"] for m in discovered} + if requested in discovered_ids: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + # Not in discovered list — still accept (user may have custom + # inference profiles or cross-account access), but warn. + suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) return { "accepted": True, "persist": True, "recognized": False, "message": ( - f"Note: `{requested}` was not found in this provider's model listing. " - f"It may still work if your plan supports it." + f"Note: `{requested}` was not found in Bedrock model discovery for {region}. " + f"It may still work with custom inference profiles or cross-account access." f"{suggestion_text}" ), } + except Exception: + pass # Fall through to generic warning - # api_models is None — couldn't reach API. Accept and persist, - # but warn so typos don't silently break things. provider_label = _PROVIDER_LABELS.get(normalized, normalized) return { - "accepted": True, - "persist": True, + "accepted": False, + "persist": False, "recognized": False, "message": ( f"Could not reach the {provider_label} API to validate `{requested}`. " diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index f1e4366c1b..691126a4c6 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str: "openai": "OpenAI TTS", "elevenlabs": "ElevenLabs", "edge": "Edge TTS", + "xai": "xAI TTS", "mistral": "Mistral Voxtral TTS", "neutts": "NeuTTS", } @@ -257,6 +258,15 @@ def get_nous_subscription_features( terminal_cfg.get("modal_mode") ) + # use_gateway flags — when True, the user explicitly opted into the + # Tool Gateway via `hermes model`, so direct credentials should NOT + # prevent gateway routing. + web_use_gateway = bool(web_cfg.get("use_gateway")) + tts_use_gateway = bool(tts_cfg.get("use_gateway")) + browser_use_gateway = bool(browser_cfg.get("use_gateway")) + image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {} + image_use_gateway = bool(image_gen_cfg.get("use_gateway")) + direct_exa = bool(get_env_value("EXA_API_KEY")) direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL")) direct_parallel = bool(get_env_value("PARALLEL_API_KEY")) @@ -269,6 +279,21 @@ def get_nous_subscription_features( direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY")) direct_modal = has_direct_modal_credentials() + # When use_gateway is set, suppress direct credentials for managed detection + if web_use_gateway: + direct_firecrawl = False + direct_exa = False + direct_parallel = False + direct_tavily = False + if image_use_gateway: + direct_fal = False + if tts_use_gateway: + direct_openai_tts = False + direct_elevenlabs = False + if browser_use_gateway: + direct_browser_use = False + direct_browserbase = False + managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl") managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue") managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio") @@ -439,37 +464,7 @@ def get_nous_subscription_features( ) -def get_nous_subscription_explainer_lines() -> list[str]: - if not managed_nous_tools_enabled(): - return [] - return [ - "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.", - "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.", - "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.", - ] - - -def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]: - """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`.""" - if not managed_nous_tools_enabled(): - return set() - - features = get_nous_subscription_features(config) - if not features.provider_is_nous: - return set() - - tts_cfg = config.get("tts") - if not isinstance(tts_cfg, dict): - tts_cfg = {} - config["tts"] = tts_cfg - - current_tts = str(tts_cfg.get("provider") or "edge").strip().lower() - if current_tts not in {"", "edge"}: - return set() - - tts_cfg["provider"] = "openai" - return {"tts"} def apply_nous_managed_defaults( @@ -529,3 +524,255 @@ def apply_nous_managed_defaults( changed.add("image_gen") return changed + + +# --------------------------------------------------------------------------- +# Tool Gateway offer — single Y/n prompt after model selection +# --------------------------------------------------------------------------- + +_GATEWAY_TOOL_LABELS = { + "web": "Web search & extract (Firecrawl)", + "image_gen": "Image generation (FAL)", + "tts": "Text-to-speech (OpenAI TTS)", + "browser": "Browser automation (Browser Use)", +} + + +def _get_gateway_direct_credentials() -> Dict[str, bool]: + """Return a dict of tool_key -> has_direct_credentials.""" + return { + "web": bool( + get_env_value("FIRECRAWL_API_KEY") + or get_env_value("FIRECRAWL_API_URL") + or get_env_value("PARALLEL_API_KEY") + or get_env_value("TAVILY_API_KEY") + or get_env_value("EXA_API_KEY") + ), + "image_gen": bool(get_env_value("FAL_KEY")), + "tts": bool( + resolve_openai_audio_api_key() + or get_env_value("ELEVENLABS_API_KEY") + ), + "browser": bool( + get_env_value("BROWSER_USE_API_KEY") + or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID")) + ), + } + + +_GATEWAY_DIRECT_LABELS = { + "web": "Firecrawl/Exa/Parallel/Tavily key", + "image_gen": "FAL key", + "tts": "OpenAI/ElevenLabs key", + "browser": "Browser Use/Browserbase key", +} + +_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser") + + +def get_gateway_eligible_tools( + config: Optional[Dict[str, object]] = None, +) -> tuple[list[str], list[str], list[str]]: + """Return (unconfigured, has_direct, already_managed) tool key lists. + + - unconfigured: tools with no direct credentials (easy switch) + - has_direct: tools where the user has their own API keys + - already_managed: tools already routed through the gateway + + All lists are empty when the user is not a paid Nous subscriber or + is not using Nous as their provider. + """ + if not managed_nous_tools_enabled(): + return [], [], [] + + if config is None: + from hermes_cli.config import load_config + config = load_config() or {} + + # Quick provider check without the heavy get_nous_subscription_features call + model_cfg = config.get("model") + if not isinstance(model_cfg, dict) or str(model_cfg.get("provider") or "").strip().lower() != "nous": + return [], [], [] + + direct = _get_gateway_direct_credentials() + + # Check which tools the user has explicitly opted into the gateway for. + # This is distinct from managed_by_nous which fires implicitly when + # no direct keys exist — we only skip the prompt for tools where + # use_gateway was explicitly set. + opted_in = { + "web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")), + "image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")), + "tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")), + "browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")), + } + + unconfigured: list[str] = [] + has_direct: list[str] = [] + already_managed: list[str] = [] + for key in _ALL_GATEWAY_KEYS: + if opted_in.get(key): + already_managed.append(key) + elif direct.get(key): + has_direct.append(key) + else: + unconfigured.append(key) + return unconfigured, has_direct, already_managed + + +def apply_gateway_defaults( + config: Dict[str, object], + tool_keys: list[str], +) -> set[str]: + """Apply Tool Gateway config for the given tool keys. + + Sets ``use_gateway: true`` in each tool's config section so the + runtime prefers the gateway even when direct API keys are present. + + Returns the set of tools that were actually changed. + """ + changed: set[str] = set() + + web_cfg = config.get("web") + if not isinstance(web_cfg, dict): + web_cfg = {} + config["web"] = web_cfg + + tts_cfg = config.get("tts") + if not isinstance(tts_cfg, dict): + tts_cfg = {} + config["tts"] = tts_cfg + + browser_cfg = config.get("browser") + if not isinstance(browser_cfg, dict): + browser_cfg = {} + config["browser"] = browser_cfg + + if "web" in tool_keys: + web_cfg["backend"] = "firecrawl" + web_cfg["use_gateway"] = True + changed.add("web") + + if "tts" in tool_keys: + tts_cfg["provider"] = "openai" + tts_cfg["use_gateway"] = True + changed.add("tts") + + if "browser" in tool_keys: + browser_cfg["cloud_provider"] = "browser-use" + browser_cfg["use_gateway"] = True + changed.add("browser") + + if "image_gen" in tool_keys: + image_cfg = config.get("image_gen") + if not isinstance(image_cfg, dict): + image_cfg = {} + config["image_gen"] = image_cfg + image_cfg["use_gateway"] = True + changed.add("image_gen") + + return changed + + +def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]: + """If eligible tools exist, prompt the user to enable the Tool Gateway. + + Uses prompt_choice() with a description parameter so the curses TUI + shows the tool context alongside the choices. + + Returns the set of tools that were enabled, or empty set if the user + declined or no tools were eligible. + """ + unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config) + if not unconfigured and not has_direct: + return set() + + try: + from hermes_cli.setup import prompt_choice + except Exception: + return set() + + # Build description lines showing full status of all gateway tools + desc_parts: list[str] = [ + "", + " The Tool Gateway gives you access to web search, image generation,", + " text-to-speech, and browser automation through your Nous subscription.", + " No need to sign up for separate API keys — just pick the tools you want.", + "", + ] + if already_managed: + for k in already_managed: + desc_parts.append(f" ✓ {_GATEWAY_TOOL_LABELS[k]} — using Tool Gateway") + if unconfigured: + for k in unconfigured: + desc_parts.append(f" ○ {_GATEWAY_TOOL_LABELS[k]} — not configured") + if has_direct: + for k in has_direct: + desc_parts.append(f" ○ {_GATEWAY_TOOL_LABELS[k]} — using {_GATEWAY_DIRECT_LABELS[k]}") + + # Build short choice labels — detail is in the description above + choices: list[str] = [] + choice_keys: list[str] = [] # maps choice index -> action + + if unconfigured and has_direct: + choices.append("Enable for all tools (existing keys kept, not used)") + choice_keys.append("all") + + choices.append("Enable only for tools without existing keys") + choice_keys.append("unconfigured") + + choices.append("Skip") + choice_keys.append("skip") + + elif unconfigured: + choices.append("Enable Tool Gateway") + choice_keys.append("unconfigured") + + choices.append("Skip") + choice_keys.append("skip") + + else: + choices.append("Enable Tool Gateway (existing keys kept, not used)") + choice_keys.append("all") + + choices.append("Skip") + choice_keys.append("skip") + + description = "\n".join(desc_parts) if desc_parts else None + # Default to "Enable" when user has no direct keys (new user), + # default to "Skip" when they have existing keys to preserve. + default_idx = 0 if not has_direct else len(choices) - 1 + + try: + idx = prompt_choice( + "Your Nous subscription includes the Tool Gateway.", + choices, + default_idx, + description=description, + ) + except (KeyboardInterrupt, EOFError, OSError, SystemExit): + return set() + + action = choice_keys[idx] + if action == "skip": + return set() + + if action == "all": + # Apply to switchable tools + ensure already-managed tools also + # have use_gateway persisted in config for consistency. + to_apply = list(_ALL_GATEWAY_KEYS) + else: + to_apply = unconfigured + + changed = apply_gateway_defaults(config, to_apply) + if changed: + from hermes_cli.config import save_config + save_config(config) + # Only report the tools that actually switched (not already-managed ones) + newly_switched = changed - set(already_managed) + for key in sorted(newly_switched): + label = _GATEWAY_TOOL_LABELS.get(key, key) + print(f" ✓ {label}: enabled via Nous subscription") + if already_managed and not newly_switched: + print(" (all tools already using Tool Gateway)") + return changed diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 9d78ca47f8..62a0928854 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -2,14 +2,20 @@ Hermes Plugin System ==================== -Discovers, loads, and manages plugins from three sources: +Discovers, loads, and manages plugins from four sources: -1. **User plugins** – ``~/.hermes/plugins//`` -2. **Project plugins** – ``./.hermes/plugins//`` (opt-in via +1. **Bundled plugins** – ``/plugins//`` (shipped with hermes-agent; + ``memory/`` and ``context_engine/`` subdirs are excluded — they have their + own discovery paths) +2. **User plugins** – ``~/.hermes/plugins//`` +3. **Project plugins** – ``./.hermes/plugins//`` (opt-in via ``HERMES_ENABLE_PROJECT_PLUGINS``) -3. **Pip plugins** – packages that expose the ``hermes_agent.plugins`` +4. **Pip plugins** – packages that expose the ``hermes_agent.plugins`` entry-point group. +Later sources override earlier ones on name collision, so a user or project +plugin with the same name as a bundled plugin replaces it. + Each directory plugin must contain a ``plugin.yaml`` manifest **and** an ``__init__.py`` with a ``register(ctx)`` function. @@ -54,6 +60,8 @@ logger = logging.getLogger(__name__) VALID_HOOKS: Set[str] = { "pre_tool_call", "post_tool_call", + "transform_terminal_output", + "transform_tool_result", "pre_llm_call", "post_llm_call", "pre_api_request", @@ -75,7 +83,12 @@ def _env_enabled(name: str) -> bool: def _get_disabled_plugins() -> set: - """Read the disabled plugins list from config.yaml.""" + """Read the disabled plugins list from config.yaml. + + Kept for backward compat and explicit deny-list semantics. A plugin + name in this set will never load, even if it appears in + ``plugins.enabled``. + """ try: from hermes_cli.config import load_config config = load_config() @@ -85,6 +98,36 @@ def _get_disabled_plugins() -> set: return set() +def _get_enabled_plugins() -> Optional[set]: + """Read the enabled-plugins allow-list from config.yaml. + + Plugins are opt-in by default — only plugins whose name appears in + this set are loaded. Returns: + + * ``None`` — the key is missing or malformed. Callers should treat + this as "nothing enabled yet" (the opt-in default); the first + ``migrate_config`` run populates the key with a grandfathered set + of currently-installed user plugins so existing setups don't + break on upgrade. + * ``set()`` — an empty list was explicitly set; nothing loads. + * ``set(...)`` — the concrete allow-list. + """ + try: + from hermes_cli.config import load_config + config = load_config() + plugins_cfg = config.get("plugins") + if not isinstance(plugins_cfg, dict): + return None + if "enabled" not in plugins_cfg: + return None + enabled = plugins_cfg.get("enabled") + if not isinstance(enabled, list): + return None + return set(enabled) + except Exception: + return None + + # --------------------------------------------------------------------------- # Data classes # --------------------------------------------------------------------------- @@ -112,6 +155,7 @@ class LoadedPlugin: module: Optional[types.ModuleType] = None tools_registered: List[str] = field(default_factory=list) hooks_registered: List[str] = field(default_factory=list) + commands_registered: List[str] = field(default_factory=list) enabled: bool = False error: Optional[str] = None @@ -211,6 +255,84 @@ class PluginContext: } logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name) + # -- slash command registration ------------------------------------------- + + def register_command( + self, + name: str, + handler: Callable, + description: str = "", + ) -> None: + """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions. + + The handler signature is ``fn(raw_args: str) -> str | None``. + It may also be an async callable — the gateway dispatch handles both. + + Unlike ``register_cli_command()`` (which creates ``hermes `` + terminal commands), this registers in-session slash commands that users + invoke during a conversation. + + Names conflicting with built-in commands are rejected with a warning. + """ + clean = name.lower().strip().lstrip("/").replace(" ", "-") + if not clean: + logger.warning( + "Plugin '%s' tried to register a command with an empty name.", + self.manifest.name, + ) + return + + # Reject if it conflicts with a built-in command + try: + from hermes_cli.commands import resolve_command + if resolve_command(clean) is not None: + logger.warning( + "Plugin '%s' tried to register command '/%s' which conflicts " + "with a built-in command. Skipping.", + self.manifest.name, clean, + ) + return + except Exception: + pass # If commands module isn't available, skip the check + + self._manager._plugin_commands[clean] = { + "handler": handler, + "description": description or "Plugin command", + "plugin": self.manifest.name, + } + logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean) + + # -- tool dispatch ------------------------------------------------------- + + def dispatch_tool(self, tool_name: str, args: dict, **kwargs) -> str: + """Dispatch a tool call through the registry, with parent agent context. + + This is the public interface for plugin slash commands that need to call + tools like ``delegate_task`` without reaching into framework internals. + The parent agent (if available) is resolved automatically — plugins never + need to access the agent directly. + + Args: + tool_name: Registry name of the tool (e.g. ``"delegate_task"``). + args: Tool arguments dict (same as what the model would pass). + **kwargs: Extra keyword args forwarded to the registry dispatch. + + Returns: + JSON string from the tool handler (same format as model tool calls). + """ + from tools.registry import registry + + # Wire up parent agent context when available (CLI mode). + # In gateway mode _cli_ref is None — tools degrade gracefully + # (workspace hints fall back to TERMINAL_CWD, no spinner). + if "parent_agent" not in kwargs: + cli = self._manager._cli_ref + agent = getattr(cli, "agent", None) if cli else None + if agent is not None: + kwargs["parent_agent"] = agent + + return registry.dispatch(tool_name, args, **kwargs) + # -- context engine registration ----------------------------------------- def register_context_engine(self, engine) -> None: @@ -323,6 +445,7 @@ class PluginManager: self._plugin_tool_names: Set[str] = set() self._cli_commands: Dict[str, dict] = {} self._context_engine = None # Set by a plugin via register_context_engine() + self._plugin_commands: Dict[str, dict] = {} # Slash commands registered by plugins self._discovered: bool = False self._cli_ref = None # Set by CLI after plugin discovery # Plugin skill registry: qualified name → metadata dict. @@ -340,27 +463,66 @@ class PluginManager: manifests: List[PluginManifest] = [] - # 1. User plugins (~/.hermes/plugins/) + # 1. Bundled plugins (/plugins//) + # Repo-shipped generic plugins live next to hermes_cli/. Memory and + # context_engine subdirs are handled by their own discovery paths, so + # skip those names here. Bundled plugins are discovered (so they + # show up in `hermes plugins`) but only loaded when added to + # `plugins.enabled` in config.yaml — opt-in like any other plugin. + repo_plugins = Path(__file__).resolve().parent.parent / "plugins" + manifests.extend( + self._scan_directory( + repo_plugins, + source="bundled", + skip_names={"memory", "context_engine"}, + ) + ) + + # 2. User plugins (~/.hermes/plugins/) user_dir = get_hermes_home() / "plugins" manifests.extend(self._scan_directory(user_dir, source="user")) - # 2. Project plugins (./.hermes/plugins/) + # 3. Project plugins (./.hermes/plugins/) if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"): project_dir = Path.cwd() / ".hermes" / "plugins" manifests.extend(self._scan_directory(project_dir, source="project")) - # 3. Pip / entry-point plugins + # 4. Pip / entry-point plugins manifests.extend(self._scan_entry_points()) - # Load each manifest (skip user-disabled plugins) + # Load each manifest (skip user-disabled plugins). + # Later sources override earlier ones on name collision — user plugins + # take precedence over bundled, project plugins take precedence over + # user. Dedup here so we only load the final winner. disabled = _get_disabled_plugins() + enabled = _get_enabled_plugins() # None = opt-in default (nothing enabled) + winners: Dict[str, PluginManifest] = {} for manifest in manifests: + winners[manifest.name] = manifest + for manifest in winners.values(): + # Explicit disable always wins. if manifest.name in disabled: loaded = LoadedPlugin(manifest=manifest, enabled=False) loaded.error = "disabled via config" self._plugins[manifest.name] = loaded logger.debug("Skipping disabled plugin '%s'", manifest.name) continue + # Opt-in gate: plugins must be in the enabled allow-list. + # If the allow-list is missing (None), treat as "nothing enabled" + # — users have to explicitly enable plugins to load them. + # Memory and context_engine providers are excluded from this gate + # since they have their own single-select config (memory.provider + # / context.engine), not the enabled list. + if enabled is None or manifest.name not in enabled: + loaded = LoadedPlugin(manifest=manifest, enabled=False) + loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format( + manifest.name + ) + self._plugins[manifest.name] = loaded + logger.debug( + "Skipping '%s' (not in plugins.enabled)", manifest.name + ) + continue self._load_plugin(manifest) if manifests: @@ -374,8 +536,18 @@ class PluginManager: # Directory scanning # ----------------------------------------------------------------------- - def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]: - """Read ``plugin.yaml`` manifests from subdirectories of *path*.""" + def _scan_directory( + self, + path: Path, + source: str, + skip_names: Optional[Set[str]] = None, + ) -> List[PluginManifest]: + """Read ``plugin.yaml`` manifests from subdirectories of *path*. + + *skip_names* is an optional allow-list of names to ignore (used + for the bundled scan to exclude ``memory`` / ``context_engine`` + subdirs that have their own discovery path). + """ manifests: List[PluginManifest] = [] if not path.is_dir(): return manifests @@ -383,6 +555,8 @@ class PluginManager: for child in sorted(path.iterdir()): if not child.is_dir(): continue + if skip_names and child.name in skip_names: + continue manifest_file = child / "plugin.yaml" if not manifest_file.exists(): manifest_file = child / "plugin.yml" @@ -450,7 +624,7 @@ class PluginManager: loaded = LoadedPlugin(manifest=manifest) try: - if manifest.source in ("user", "project"): + if manifest.source in ("user", "project", "bundled"): module = self._load_directory_module(manifest) else: module = self._load_entrypoint_module(manifest) @@ -485,6 +659,10 @@ class PluginManager: for h in p.hooks_registered } ) + loaded.commands_registered = [ + c for c in self._plugin_commands + if self._plugin_commands[c].get("plugin") == manifest.name + ] loaded.enabled = True except Exception as exc: @@ -598,6 +776,7 @@ class PluginManager: "enabled": loaded.enabled, "tools": len(loaded.tools_registered), "hooks": len(loaded.hooks_registered), + "commands": len(loaded.commands_registered), "error": loaded.error, } ) @@ -694,9 +873,31 @@ def get_pre_tool_call_block_message( return None +def _ensure_plugins_discovered() -> PluginManager: + """Return the global manager after running idempotent plugin discovery.""" + manager = get_plugin_manager() + manager.discover_and_load() + return manager + + def get_plugin_context_engine(): """Return the plugin-registered context engine, or None.""" - return get_plugin_manager()._context_engine + return _ensure_plugins_discovered()._context_engine + + +def get_plugin_command_handler(name: str) -> Optional[Callable]: + """Return the handler for a plugin-registered slash command, or ``None``.""" + entry = _ensure_plugins_discovered()._plugin_commands.get(name) + return entry["handler"] if entry else None + + +def get_plugin_commands() -> Dict[str, dict]: + """Return the full plugin commands dict (name → {handler, description, plugin}). + + Triggers idempotent plugin discovery so callers can use plugin commands + before any explicit discover_plugins() call. + """ + return _ensure_plugins_discovered()._plugin_commands def get_plugin_toolsets() -> List[tuple]: diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py index c92d8b0dc6..230e134207 100644 --- a/hermes_cli/plugins_cmd.py +++ b/hermes_cli/plugins_cmd.py @@ -15,6 +15,7 @@ import shutil import subprocess import sys from pathlib import Path +from typing import Optional from hermes_constants import get_hermes_home @@ -281,8 +282,16 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path: # --------------------------------------------------------------------------- -def cmd_install(identifier: str, force: bool = False) -> None: - """Install a plugin from a Git URL or owner/repo shorthand.""" +def cmd_install( + identifier: str, + force: bool = False, + enable: Optional[bool] = None, +) -> None: + """Install a plugin from a Git URL or owner/repo shorthand. + + After install, prompt "Enable now? [y/N]" unless *enable* is provided + (True = auto-enable without prompting, False = install disabled). + """ import tempfile from rich.console import Console @@ -391,6 +400,40 @@ def cmd_install(identifier: str, force: bool = False) -> None: _display_after_install(target, identifier) + # Determine the canonical plugin name for enable-list bookkeeping. + installed_name = installed_manifest.get("name") or target.name + + # Decide whether to enable: explicit flag > interactive prompt > default off + should_enable = enable + if should_enable is None: + # Interactive prompt unless stdin isn't a TTY (scripted install). + if sys.stdin.isatty() and sys.stdout.isatty(): + try: + answer = input( + f" Enable '{installed_name}' now? [y/N]: " + ).strip().lower() + should_enable = answer in ("y", "yes") + except (EOFError, KeyboardInterrupt): + should_enable = False + else: + should_enable = False + + if should_enable: + enabled = _get_enabled_set() + disabled = _get_disabled_set() + enabled.add(installed_name) + disabled.discard(installed_name) + _save_enabled_set(enabled) + _save_disabled_set(disabled) + console.print( + f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled." + ) + else: + console.print( + f"[dim]Plugin installed but not enabled. " + f"Run `hermes plugins enable {installed_name}` to activate.[/dim]" + ) + console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]") console.print("[dim] hermes gateway restart[/dim]") console.print() @@ -468,7 +511,11 @@ def cmd_remove(name: str) -> None: def _get_disabled_set() -> set: - """Read the disabled plugins set from config.yaml.""" + """Read the disabled plugins set from config.yaml. + + An explicit deny-list. A plugin name here never loads, even if also + listed in ``plugins.enabled``. + """ try: from hermes_cli.config import load_config config = load_config() @@ -488,103 +535,196 @@ def _save_disabled_set(disabled: set) -> None: save_config(config) +def _get_enabled_set() -> set: + """Read the enabled plugins allow-list from config.yaml. + + Plugins are opt-in: only names here are loaded. Returns ``set()`` if + the key is missing (same behaviour as "nothing enabled yet"). + """ + try: + from hermes_cli.config import load_config + config = load_config() + plugins_cfg = config.get("plugins", {}) + if not isinstance(plugins_cfg, dict): + return set() + enabled = plugins_cfg.get("enabled", []) + return set(enabled) if isinstance(enabled, list) else set() + except Exception: + return set() + + +def _save_enabled_set(enabled: set) -> None: + """Write the enabled plugins list to config.yaml.""" + from hermes_cli.config import load_config, save_config + config = load_config() + if "plugins" not in config: + config["plugins"] = {} + config["plugins"]["enabled"] = sorted(enabled) + save_config(config) + + def cmd_enable(name: str) -> None: - """Enable a previously disabled plugin.""" + """Add a plugin to the enabled allow-list (and remove it from disabled).""" from rich.console import Console console = Console() - plugins_dir = _plugins_dir() - - # Verify the plugin exists - target = plugins_dir / name - if not target.is_dir(): - console.print(f"[red]Plugin '{name}' is not installed.[/red]") + # Discover the plugin — check installed (user) AND bundled. + if not _plugin_exists(name): + console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]") sys.exit(1) + enabled = _get_enabled_set() disabled = _get_disabled_set() - if name not in disabled: + + if name in enabled and name not in disabled: console.print(f"[dim]Plugin '{name}' is already enabled.[/dim]") return + enabled.add(name) disabled.discard(name) + _save_enabled_set(enabled) _save_disabled_set(disabled) - console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. Takes effect on next session.") + console.print( + f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. " + "Takes effect on next session." + ) def cmd_disable(name: str) -> None: - """Disable a plugin without removing it.""" + """Remove a plugin from the enabled allow-list (and add to disabled).""" from rich.console import Console console = Console() - plugins_dir = _plugins_dir() - - # Verify the plugin exists - target = plugins_dir / name - if not target.is_dir(): - console.print(f"[red]Plugin '{name}' is not installed.[/red]") + if not _plugin_exists(name): + console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]") sys.exit(1) + enabled = _get_enabled_set() disabled = _get_disabled_set() - if name in disabled: + + if name not in enabled and name in disabled: console.print(f"[dim]Plugin '{name}' is already disabled.[/dim]") return + enabled.discard(name) disabled.add(name) + _save_enabled_set(enabled) _save_disabled_set(disabled) - console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.") + console.print( + f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. " + "Takes effect on next session." + ) -def cmd_list() -> None: - """List installed plugins.""" - from rich.console import Console - from rich.table import Table +def _plugin_exists(name: str) -> bool: + """Return True if a plugin with *name* is installed (user) or bundled.""" + # Installed: directory name or manifest name match in user plugins dir + user_dir = _plugins_dir() + if user_dir.is_dir(): + if (user_dir / name).is_dir(): + return True + for child in user_dir.iterdir(): + if not child.is_dir(): + continue + manifest = _read_manifest(child) + if manifest.get("name") == name: + return True + # Bundled: /plugins// + from pathlib import Path as _P + import hermes_cli + repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins" + if repo_plugins.is_dir(): + candidate = repo_plugins / name + if candidate.is_dir() and ( + (candidate / "plugin.yaml").exists() + or (candidate / "plugin.yml").exists() + ): + return True + return False + +def _discover_all_plugins() -> list: + """Return a list of (name, version, description, source, dir_path) for + every plugin the loader can see — user + bundled + project. + + Matches the ordering/dedup of ``PluginManager.discover_and_load``: + bundled first, then user, then project; user overrides bundled on + name collision. + """ try: import yaml except ImportError: yaml = None - console = Console() - plugins_dir = _plugins_dir() + seen: dict = {} # name -> (name, version, description, source, path) - dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir()) - if not dirs: + # Bundled (/plugins//), excluding memory/ and context_engine/ + import hermes_cli + repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins" + for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")): + if not base.is_dir(): + continue + for d in sorted(base.iterdir()): + if not d.is_dir(): + continue + if source == "bundled" and d.name in ("memory", "context_engine"): + continue + manifest_file = d / "plugin.yaml" + if not manifest_file.exists(): + manifest_file = d / "plugin.yml" + if not manifest_file.exists(): + continue + name = d.name + version = "" + description = "" + if yaml: + try: + with open(manifest_file) as f: + manifest = yaml.safe_load(f) or {} + name = manifest.get("name", d.name) + version = manifest.get("version", "") + description = manifest.get("description", "") + except Exception: + pass + # User plugins override bundled on name collision. + if name in seen and source == "bundled": + continue + src_label = source + if source == "user" and (d / ".git").exists(): + src_label = "git" + seen[name] = (name, version, description, src_label, d) + return list(seen.values()) + + +def cmd_list() -> None: + """List all plugins (bundled + user) with enabled/disabled state.""" + from rich.console import Console + from rich.table import Table + + console = Console() + entries = _discover_all_plugins() + if not entries: console.print("[dim]No plugins installed.[/dim]") console.print("[dim]Install with:[/dim] hermes plugins install owner/repo") return + enabled = _get_enabled_set() disabled = _get_disabled_set() - table = Table(title="Installed Plugins", show_lines=False) + table = Table(title="Plugins", show_lines=False) table.add_column("Name", style="bold") table.add_column("Status") table.add_column("Version", style="dim") table.add_column("Description") table.add_column("Source", style="dim") - for d in dirs: - manifest_file = d / "plugin.yaml" - name = d.name - version = "" - description = "" - source = "local" - - if manifest_file.exists() and yaml: - try: - with open(manifest_file) as f: - manifest = yaml.safe_load(f) or {} - name = manifest.get("name", d.name) - version = manifest.get("version", "") - description = manifest.get("description", "") - except Exception: - pass - - # Check if it's a git repo (installed via hermes plugins install) - if (d / ".git").exists(): - source = "git" - - is_disabled = name in disabled or d.name in disabled - status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]" + for name, version, description, source, _dir in entries: + if name in disabled: + status = "[red]disabled[/red]" + elif name in enabled: + status = "[green]enabled[/green]" + else: + status = "[yellow]not enabled[/yellow]" table.add_row(name, status, str(version), description, source) console.print() @@ -592,6 +732,7 @@ def cmd_list() -> None: console.print() console.print("[dim]Interactive toggle:[/dim] hermes plugins") console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable ") + console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]") # --------------------------------------------------------------------------- @@ -742,41 +883,25 @@ def cmd_toggle() -> None: """Interactive composite UI — general plugins + provider plugin categories.""" from rich.console import Console - try: - import yaml - except ImportError: - yaml = None - console = Console() - plugins_dir = _plugins_dir() - # -- General plugins discovery -- - dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir()) - disabled = _get_disabled_set() + # -- General plugins discovery (bundled + user) -- + entries = _discover_all_plugins() + enabled_set = _get_enabled_set() + disabled_set = _get_disabled_set() plugin_names = [] plugin_labels = [] plugin_selected = set() - for i, d in enumerate(dirs): - manifest_file = d / "plugin.yaml" - name = d.name - description = "" - - if manifest_file.exists() and yaml: - try: - with open(manifest_file) as f: - manifest = yaml.safe_load(f) or {} - name = manifest.get("name", d.name) - description = manifest.get("description", "") - except Exception: - pass - - plugin_names.append(name) + for i, (name, _version, description, source, _d) in enumerate(entries): label = f"{name} \u2014 {description}" if description else name + if source == "bundled": + label = f"{label} [bundled]" + plugin_names.append(name) plugin_labels.append(label) - - if name not in disabled and d.name not in disabled: + # Selected (enabled) when in enabled-set AND not in disabled-set + if name in enabled_set and name not in disabled_set: plugin_selected.add(i) # -- Provider categories -- @@ -804,10 +929,10 @@ def cmd_toggle() -> None: try: import curses _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected, - disabled, categories, console) + disabled_set, categories, console) except ImportError: _run_composite_fallback(plugin_names, plugin_labels, plugin_selected, - disabled, categories, console) + disabled_set, categories, console) def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected, @@ -1020,18 +1145,29 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected, curses.wrapper(_draw) flush_stdin() - # Persist general plugin changes - new_disabled = set() + # Persist general plugin changes. The new allow-list is the set of + # plugin names that were checked; anything not checked is explicitly + # disabled (written to disabled-list) so it remains off even if the + # plugin code does something clever like auto-enable in the future. + new_enabled: set = set() + new_disabled: set = set(disabled) # preserve existing disabled state for unseen plugins for i, name in enumerate(plugin_names): - if i not in chosen: + if i in chosen: + new_enabled.add(name) + new_disabled.discard(name) + else: new_disabled.add(name) - if new_disabled != disabled: + prev_enabled = _get_enabled_set() + enabled_changed = new_enabled != prev_enabled + disabled_changed = new_disabled != disabled + + if enabled_changed or disabled_changed: + _save_enabled_set(new_enabled) _save_disabled_set(new_disabled) - enabled_count = len(plugin_names) - len(new_disabled) console.print( - f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, " - f"{len(new_disabled)} disabled." + f"\n[green]\u2713[/green] General plugins: {len(new_enabled)} enabled, " + f"{len(plugin_names) - len(new_enabled)} disabled." ) elif n_plugins > 0: console.print("\n[dim]General plugins unchanged.[/dim]") @@ -1078,11 +1214,17 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected, return print() - new_disabled = set() + new_enabled: set = set() + new_disabled: set = set(disabled) for i, name in enumerate(plugin_names): - if i not in chosen: + if i in chosen: + new_enabled.add(name) + new_disabled.discard(name) + else: new_disabled.add(name) - if new_disabled != disabled: + prev_enabled = _get_enabled_set() + if new_enabled != prev_enabled or new_disabled != disabled: + _save_enabled_set(new_enabled) _save_disabled_set(new_disabled) # Provider categories @@ -1108,7 +1250,17 @@ def plugins_command(args) -> None: action = getattr(args, "plugins_action", None) if action == "install": - cmd_install(args.identifier, force=getattr(args, "force", False)) + # Map argparse tri-state: --enable=True, --no-enable=False, neither=None (prompt) + enable_arg = None + if getattr(args, "enable", False): + enable_arg = True + elif getattr(args, "no_enable", False): + enable_arg = False + cmd_install( + args.identifier, + force=getattr(args, "force", False), + enable=enable_arg, + ) elif action == "update": cmd_update(args.name) elif action in ("remove", "rm", "uninstall"): diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index 1e9fcae005..779728adc0 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -300,19 +300,10 @@ def _read_config_model(profile_dir: Path) -> tuple: def _check_gateway_running(profile_dir: Path) -> bool: """Check if a gateway is running for a given profile directory.""" - pid_file = profile_dir / "gateway.pid" - if not pid_file.exists(): - return False try: - raw = pid_file.read_text().strip() - if not raw: - return False - data = json.loads(raw) if raw.startswith("{") else {"pid": int(raw)} - pid = int(data["pid"]) - os.kill(pid, 0) # existence check - return True - except (json.JSONDecodeError, KeyError, ValueError, TypeError, - ProcessLookupError, PermissionError, OSError): + from gateway.status import get_running_pid + return get_running_pid(profile_dir / "gateway.pid", cleanup_stale=False) is not None + except Exception: return False diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 6fb940d31f..c701db4d50 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -64,6 +64,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_override="https://portal.qwen.ai/v1", base_url_env_var="HERMES_QWEN_BASE_URL", ), + "google-gemini-cli": HermesOverlay( + transport="openai_chat", + auth_type="oauth_external", + base_url_override="cloudcode-pa://google", + ), "copilot-acp": HermesOverlay( transport="codex_responses", auth_type="external_process", @@ -128,10 +133,15 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_env_var="HF_BASE_URL", ), "xai": HermesOverlay( - transport="openai_chat", + transport="codex_responses", base_url_override="https://api.x.ai/v1", base_url_env_var="XAI_BASE_URL", ), + "nvidia": HermesOverlay( + transport="openai_chat", + base_url_override="https://integrate.api.nvidia.com/v1", + base_url_env_var="NVIDIA_BASE_URL", + ), "xiaomi": HermesOverlay( transport="openai_chat", base_url_env_var="XIAOMI_BASE_URL", @@ -141,6 +151,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_override="https://api.arcee.ai/api/v1", base_url_env_var="ARCEE_BASE_URL", ), + "ollama-cloud": HermesOverlay( + transport="openai_chat", + base_url_env_var="OLLAMA_BASE_URL", + ), } @@ -180,6 +194,13 @@ ALIASES: Dict[str, str] = { # xai "x-ai": "xai", "x.ai": "xai", + "grok": "xai", + + # nvidia + "nim": "nvidia", + "nvidia-nim": "nvidia", + "build-nvidia": "nvidia", + "nemotron": "nvidia", # kimi-for-coding (models.dev ID) "kimi": "kimi-for-coding", @@ -227,6 +248,11 @@ ALIASES: Dict[str, str] = { "qwen": "alibaba", "alibaba-cloud": "alibaba", + # google-gemini-cli (OAuth + Code Assist) + "gemini-cli": "google-gemini-cli", + "gemini-oauth": "google-gemini-cli", + + # huggingface "hf": "huggingface", "hugging-face": "huggingface", @@ -236,6 +262,12 @@ ALIASES: Dict[str, str] = { "mimo": "xiaomi", "xiaomi-mimo": "xiaomi", + # bedrock + "aws": "bedrock", + "aws-bedrock": "bedrock", + "amazon-bedrock": "bedrock", + "amazon": "bedrock", + # arcee "arcee-ai": "arcee", "arceeai": "arcee", @@ -244,7 +276,7 @@ ALIASES: Dict[str, str] = { "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio", - "ollama": "ollama-cloud", + "ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud "vllm": "local", "llamacpp": "local", "llama.cpp": "local", @@ -262,6 +294,8 @@ _LABEL_OVERRIDES: Dict[str, str] = { "copilot-acp": "GitHub Copilot ACP", "xiaomi": "Xiaomi MiMo", "local": "Local endpoint", + "bedrock": "AWS Bedrock", + "ollama-cloud": "Ollama Cloud", } @@ -271,6 +305,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = { "openai_chat": "chat_completions", "anthropic_messages": "anthropic_messages", "codex_responses": "codex_responses", + "bedrock_converse": "bedrock_converse", } @@ -287,12 +322,16 @@ def normalize_provider(name: str) -> str: def get_provider(name: str) -> Optional[ProviderDef]: - """Look up a provider by id or alias, merging all data sources. + """Look up a built-in provider by id or alias. Resolution order: 1. Hermes overlays (for providers not in models.dev: nous, openai-codex, etc.) 2. models.dev catalog + Hermes overlay - 3. User-defined providers from config (TODO: Phase 4) + + User-defined providers from config.yaml (``providers:`` / ``custom_providers:``) + are resolved by :func:`resolve_provider_full`, which layers ``resolve_user_provider`` + and ``resolve_custom_provider`` on top of this function. Callers that need + user-config support should use ``resolve_provider_full`` instead. Returns a fully-resolved ProviderDef or None. """ @@ -388,6 +427,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: if pdef is not None: return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions") + # Direct provider checks for providers not in HERMES_OVERLAYS + if provider == "bedrock": + return "bedrock_converse" + # URL-based heuristics for custom / unknown providers if base_url: url_lower = base_url.rstrip("/").lower() @@ -395,6 +438,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: return "anthropic_messages" if "api.openai.com" in url_lower: return "codex_responses" + if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower: + return "bedrock_converse" return "chat_completions" diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index b2dec61cdb..392d7769dc 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -22,6 +22,7 @@ from hermes_cli.auth import ( resolve_nous_runtime_credentials, resolve_codex_runtime_credentials, resolve_qwen_runtime_credentials, + resolve_gemini_oauth_runtime_credentials, resolve_api_key_provider_credentials, resolve_external_process_provider_credentials, has_usable_secret, @@ -37,12 +38,21 @@ def _normalize_custom_provider_name(value: str) -> str: def _detect_api_mode_for_url(base_url: str) -> Optional[str]: """Auto-detect api_mode from the resolved base URL. - Direct api.openai.com endpoints need the Responses API for GPT-5.x - tool calls with reasoning (chat/completions returns 400). + - Direct api.openai.com endpoints need the Responses API for GPT-5.x + tool calls with reasoning (chat/completions returns 400). + - Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, + LiteLLM proxies, etc.) conventionally expose the native Anthropic + protocol under a ``/anthropic`` suffix — treat those as + ``anthropic_messages`` transport instead of the default + ``chat_completions``. """ normalized = (base_url or "").strip().lower().rstrip("/") + if "api.x.ai" in normalized: + return "codex_responses" if "api.openai.com" in normalized and "openrouter" not in normalized: return "codex_responses" + if normalized.endswith("/anthropic"): + return "anthropic_messages" return None @@ -124,7 +134,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str: return "chat_completions" -_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"} +_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"} def _parse_api_mode(raw: Any) -> Optional[str]: @@ -154,6 +164,9 @@ def _resolve_runtime_from_pool_entry( elif provider == "qwen-oauth": api_mode = "chat_completions" base_url = base_url or DEFAULT_QWEN_BASE_URL + elif provider == "google-gemini-cli": + api_mode = "chat_completions" + base_url = base_url or "cloudcode-pa://google" elif provider == "anthropic": api_mode = "anthropic_messages" cfg_provider = str(model_cfg.get("provider") or "").strip().lower() @@ -163,10 +176,13 @@ def _resolve_runtime_from_pool_entry( base_url = cfg_base_url or base_url or "https://api.anthropic.com" elif provider == "openrouter": base_url = base_url or OPENROUTER_BASE_URL + elif provider == "xai": + api_mode = "codex_responses" elif provider == "nous": api_mode = "chat_completions" elif provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) + base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url else: configured_provider = str(model_cfg.get("provider") or "").strip().lower() # Honour model.base_url from config.yaml when the configured provider @@ -185,8 +201,12 @@ def _resolve_runtime_from_pool_entry( elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, + # api.openai.com → codex_responses, api.x.ai → codex_responses). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the # Anthropic SDK prepends its own /v1/messages to the base_url. Strip the @@ -627,12 +647,17 @@ def _resolve_explicit_runtime( api_mode = "chat_completions" if provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, api_key) + elif provider == "xai": + api_mode = "codex_responses" else: configured_mode = _parse_api_mode(model_cfg.get("api_mode")) if configured_mode: api_mode = configured_mode - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints (/anthropic suffix). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected return { "provider": provider, @@ -797,6 +822,26 @@ def resolve_runtime_provider( logger.info("Qwen OAuth credentials failed; " "falling through to next provider.") + if provider == "google-gemini-cli": + try: + creds = resolve_gemini_oauth_runtime_credentials() + return { + "provider": "google-gemini-cli", + "api_mode": "chat_completions", + "base_url": creds.get("base_url", ""), + "api_key": creds.get("api_key", ""), + "source": creds.get("source", "google-oauth"), + "expires_at_ms": creds.get("expires_at_ms"), + "email": creds.get("email", ""), + "project_id": creds.get("project_id", ""), + "requested_provider": requested_provider, + } + except AuthError: + if requested_provider != "auto": + raise + logger.info("Google Gemini OAuth credentials failed; " + "falling through to next provider.") + if provider == "copilot-acp": creds = resolve_external_process_provider_credentials(provider) return { @@ -836,6 +881,77 @@ def resolve_runtime_provider( "requested_provider": requested_provider, } + # AWS Bedrock (native Converse API via boto3) + if provider == "bedrock": + from agent.bedrock_adapter import ( + has_aws_credentials, + resolve_aws_auth_env_var, + resolve_bedrock_region, + is_anthropic_bedrock_model, + ) + # When the user explicitly selected bedrock (not auto-detected), + # trust boto3's credential chain — it handles IMDS, ECS task roles, + # Lambda execution roles, SSO, and other implicit sources that our + # env-var check can't detect. + is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon") + if not is_explicit and not has_aws_credentials(): + raise AuthError( + "No AWS credentials found for Bedrock. Configure one of:\n" + " - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n" + " - AWS_PROFILE (for SSO / named profiles)\n" + " - IAM instance role (EC2, ECS, Lambda)\n" + "Or run 'aws configure' to set up credentials.", + code="no_aws_credentials", + ) + # Read bedrock-specific config from config.yaml + from hermes_cli.config import load_config as _load_bedrock_config + _bedrock_cfg = _load_bedrock_config().get("bedrock", {}) + # Region priority: config.yaml bedrock.region → env var → us-east-1 + region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region() + auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain" + # Build guardrail config if configured + _gr = _bedrock_cfg.get("guardrail", {}) + guardrail_config = None + if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"): + guardrail_config = { + "guardrailIdentifier": _gr["guardrail_identifier"], + "guardrailVersion": _gr["guardrail_version"], + } + if _gr.get("stream_processing_mode"): + guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"] + if _gr.get("trace"): + guardrail_config["trace"] = _gr["trace"] + # Dual-path routing: Claude models use AnthropicBedrock SDK for full + # feature parity (prompt caching, thinking budgets, adaptive thinking). + # Non-Claude models use the Converse API for multi-model support. + _current_model = str(model_cfg.get("default") or "").strip() + if is_anthropic_bedrock_model(_current_model): + # Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path + runtime = { + "provider": "bedrock", + "api_mode": "anthropic_messages", + "base_url": f"https://bedrock-runtime.{region}.amazonaws.com", + "api_key": "aws-sdk", + "source": auth_source, + "region": region, + "bedrock_anthropic": True, # Signal to use AnthropicBedrock client + "requested_provider": requested_provider, + } + else: + # Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API + runtime = { + "provider": "bedrock", + "api_mode": "bedrock_converse", + "base_url": f"https://bedrock-runtime.{region}.amazonaws.com", + "api_key": "aws-sdk", + "source": auth_source, + "region": region, + "requested_provider": requested_provider, + } + if guardrail_config: + runtime["guardrail_config"] = guardrail_config + return runtime + # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN) pconfig = PROVIDER_REGISTRY.get(provider) if pconfig and pconfig.auth_type == "api_key": @@ -852,6 +968,8 @@ def resolve_runtime_provider( api_mode = "chat_completions" if provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", "")) + elif provider == "xai": + api_mode = "codex_responses" else: configured_provider = str(model_cfg.get("provider") or "").strip().lower() # Only honor persisted api_mode when it belongs to the same provider family. @@ -861,10 +979,13 @@ def resolve_runtime_provider( elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) - # Auto-detect Anthropic-compatible endpoints by URL convention - # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints by URL convention + # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) + # plus api.openai.com → codex_responses and api.x.ai → codex_responses. + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # Strip trailing /v1 for OpenCode Anthropic models (see comment above). if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"): base_url = re.sub(r"/v1/?$", "", base_url) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 9044871dc3..6ce9f6dfab 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -20,10 +20,7 @@ import copy from pathlib import Path from typing import Optional, Dict, Any -from hermes_cli.nous_subscription import ( - apply_nous_provider_defaults, - get_nous_subscription_features, -) +from hermes_cli.nous_subscription import get_nous_subscription_features from tools.tool_backend_helpers import managed_nous_tools_enabled from hermes_constants import get_optional_skills_dir @@ -92,9 +89,8 @@ _DEFAULT_PROVIDER_MODELS = { "grok-code-fast-1", ], "gemini": [ - "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", - "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", - "gemma-4-31b-it", "gemma-4-26b-it", + "gemini-3.1-pro-preview", "gemini-3-pro-preview", + "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", ], "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], @@ -105,7 +101,7 @@ _DEFAULT_PROVIDER_MODELS = { "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], - "opencode-go": ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"], + "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"], "huggingface": [ "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", @@ -213,20 +209,20 @@ def prompt(question: str, default: str = None, password: bool = False) -> str: sys.exit(1) -def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int: +def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int: """Single-select menu using curses. Delegates to curses_radiolist.""" from hermes_cli.curses_ui import curses_radiolist - return curses_radiolist(question, choices, selected=default, cancel_returns=-1) + return curses_radiolist(question, choices, selected=default, cancel_returns=-1, description=description) -def prompt_choice(question: str, choices: list, default: int = 0) -> int: +def prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int: """Prompt for a choice from a list with arrow key navigation. Escape keeps the current default (skips the question). Ctrl+C exits the wizard. """ - idx = _curses_prompt_choice(question, choices, default) + idx = _curses_prompt_choice(question, choices, default, description=description) if idx >= 0: if idx == default: print_info(" Skipped (keeping current)") @@ -433,6 +429,8 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Text-to-Speech (MiniMax)", True, None)) elif tts_provider == "mistral" and get_env_value("MISTRAL_API_KEY"): tool_status.append(("Text-to-Speech (Mistral Voxtral)", True, None)) + elif tts_provider == "gemini" and (get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")): + tool_status.append(("Text-to-Speech (Google Gemini)", True, None)) elif tts_provider == "neutts": try: import importlib.util @@ -835,14 +833,7 @@ def setup_model_provider(config: dict, *, quick: bool = False): print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings") - if selected_provider == "nous" and nous_subscription_selected: - changed_defaults = apply_nous_provider_defaults(config) - current_tts = str(config.get("tts", {}).get("provider") or "edge") - if "tts" in changed_defaults: - print_success("TTS provider set to: OpenAI TTS via your Nous subscription") - else: - print_info(f"Keeping your existing TTS provider: {current_tts}") - + # Tool Gateway prompt is already shown by _model_flow_nous() above. save_config(config) if not quick and selected_provider != "nous": @@ -920,8 +911,10 @@ def _setup_tts_provider(config: dict): "edge": "Edge TTS", "elevenlabs": "ElevenLabs", "openai": "OpenAI TTS", + "xai": "xAI TTS", "minimax": "MiniMax TTS", "mistral": "Mistral Voxtral TTS", + "gemini": "Google Gemini TTS", "neutts": "NeuTTS", } current_label = provider_labels.get(current_provider, current_provider) @@ -941,12 +934,14 @@ def _setup_tts_provider(config: dict): "Edge TTS (free, cloud-based, no setup needed)", "ElevenLabs (premium quality, needs API key)", "OpenAI TTS (good quality, needs API key)", + "xAI TTS (Grok voices, needs API key)", "MiniMax TTS (high quality with voice cloning, needs API key)", "Mistral Voxtral TTS (multilingual, native Opus, needs API key)", + "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)", "NeuTTS (local on-device, free, ~300MB model download)", ] ) - providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"]) + providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"]) choices.append(f"Keep current ({current_label})") keep_current_idx = len(choices) - 1 idx = prompt_choice("Select TTS provider:", choices, keep_current_idx) @@ -1012,6 +1007,23 @@ def _setup_tts_provider(config: dict): print_warning("No API key provided. Falling back to Edge TTS.") selected = "edge" + elif selected == "xai": + existing = get_env_value("XAI_API_KEY") + if not existing: + print() + api_key = prompt("xAI API key for TTS", password=True) + if api_key: + save_env_value("XAI_API_KEY", api_key) + print_success("xAI TTS API key saved") + else: + from hermes_constants import display_hermes_home as _dhh + print_warning( + "No xAI API key provided for TTS. Configure XAI_API_KEY via " + f"hermes setup model or {_dhh()}/.env to use xAI TTS. " + "Falling back to Edge TTS." + ) + selected = "edge" + elif selected == "minimax": existing = get_env_value("MINIMAX_API_KEY") if not existing: @@ -1036,6 +1048,19 @@ def _setup_tts_provider(config: dict): print_warning("No API key provided. Falling back to Edge TTS.") selected = "edge" + elif selected == "gemini": + existing = get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY") + if not existing: + print() + print_info("Get a free API key at https://aistudio.google.com/app/apikey") + api_key = prompt("Gemini API key for TTS", password=True) + if api_key: + save_env_value("GEMINI_API_KEY", api_key) + print_success("Gemini TTS API key saved") + else: + print_warning("No API key provided. Falling back to Edge TTS.") + selected = "edge" + # Save the selection if "tts" not in config: config["tts"] = {} @@ -1435,7 +1460,9 @@ def setup_agent_settings(config: dict): ) print_info("Maximum tool-calling iterations per conversation.") print_info("Higher = more complex tasks, but costs more tokens.") - print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.") + print_info( + f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration." + ) max_iter_str = prompt("Max iterations", current_max) try: @@ -1611,9 +1638,19 @@ def _setup_telegram(): return print_info("Create a bot via @BotFather on Telegram") - token = prompt("Telegram bot token", password=True) - if not token: - return + import re + + while True: + token = prompt("Telegram bot token", password=True) + if not token: + return + if not re.match(r"^\d+:[A-Za-z0-9_-]{30,}$", token): + print_error( + "Invalid token format. Expected: : " + "(e.g., 123456789:ABCdefGHI-jklMNOpqrSTUvwxYZ)" + ) + continue + break save_env_value("TELEGRAM_BOT_TOKEN", token) print_success("Telegram token saved") @@ -1969,52 +2006,6 @@ def _setup_wecom_callback(): _gw_setup() -def _setup_qqbot(): - """Configure QQ Bot gateway.""" - print_header("QQ Bot") - existing = get_env_value("QQ_APP_ID") - if existing: - print_info("QQ Bot: already configured") - if not prompt_yes_no("Reconfigure QQ Bot?", False): - return - - print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).") - print_info(" Requires a QQ Bot application at q.qq.com") - print_info(" Reference: https://bot.q.qq.com/wiki/develop/api-v2/") - print() - - app_id = prompt("QQ Bot App ID") - if not app_id: - print_warning("App ID is required — skipping QQ Bot setup") - return - save_env_value("QQ_APP_ID", app_id.strip()) - - client_secret = prompt("QQ Bot App Secret", password=True) - if not client_secret: - print_warning("App Secret is required — skipping QQ Bot setup") - return - save_env_value("QQ_CLIENT_SECRET", client_secret) - print_success("QQ Bot credentials saved") - - print() - print_info("🔒 Security: Restrict who can DM your bot") - print_info(" Use QQ user OpenIDs (found in event payloads)") - print() - allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)") - if allowed_users: - save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", "")) - print_success("QQ Bot allowlist configured") - else: - print_info("⚠️ No allowlist set — anyone can DM the bot!") - - print() - print_info("📬 Home Channel: OpenID for cron job delivery and notifications.") - home_channel = prompt("Home channel OpenID (leave empty to set later)") - if home_channel: - save_env_value("QQ_HOME_CHANNEL", home_channel) - - print() - print_success("QQ Bot configured!") def _setup_bluebubbles(): @@ -2083,12 +2074,9 @@ def _setup_bluebubbles(): def _setup_qqbot(): - """Configure QQ Bot (Official API v2) via standard platform setup.""" - from hermes_cli.gateway import _PLATFORMS - qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None) - if qq_platform: - from hermes_cli.gateway import _setup_standard_platform - _setup_standard_platform(qq_platform) + """Configure QQ Bot (Official API v2) via gateway setup.""" + from hermes_cli.gateway import _setup_qqbot as _gateway_setup_qqbot + _gateway_setup_qqbot() def _setup_webhooks(): @@ -2228,7 +2216,9 @@ def setup_gateway(config: dict): missing_home.append("Slack") if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"): missing_home.append("BlueBubbles") - if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"): + if get_env_value("QQ_APP_ID") and not ( + get_env_value("QQBOT_HOME_CHANNEL") or get_env_value("QQ_HOME_CHANNEL") + ): missing_home.append("QQBot") if missing_home: @@ -2253,8 +2243,10 @@ def setup_gateway(config: dict): _is_service_running, supports_systemd_services, has_conflicting_systemd_units, + has_legacy_hermes_units, install_linux_gateway_from_setup, print_systemd_scope_conflict_warning, + print_legacy_unit_warning, systemd_start, systemd_restart, launchd_install, @@ -2272,6 +2264,10 @@ def setup_gateway(config: dict): print_systemd_scope_conflict_warning() print() + if supports_systemd and has_legacy_hermes_units(): + print_legacy_unit_warning() + print() + if service_running: if prompt_yes_no(" Restart the gateway to pick up changes?", True): try: diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index ed922805b7..bf92fafe10 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -515,6 +515,90 @@ def do_inspect(identifier: str, console: Optional[Console] = None) -> None: c.print() +def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> dict: + """Paginated hub browse for programmatic callers (e.g. TUI gateway). + + Returns ``{"items": [...], "page": int, "total_pages": int, "total": int}``. + """ + from tools.skills_hub import GitHubAuth, create_source_router + + page_size = max(1, min(page_size, 100)) + _TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1} + _PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50, + "claude-marketplace": 50, "lobehub": 50} + auth = GitHubAuth() + sources = create_source_router(auth) + all_results: list = [] + for src in sources: + sid = src.source_id() + if source != "all" and sid != source and sid != "official": + continue + try: + limit = _PER_SOURCE_LIMIT.get(sid, 50) + all_results.extend(src.search("", limit=limit)) + except Exception: + continue + if not all_results: + return {"items": [], "page": 1, "total_pages": 1, "total": 0} + seen: dict = {} + for r in all_results: + rank = _TRUST_RANK.get(r.trust_level, 0) + if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0): + seen[r.name] = r + deduped = list(seen.values()) + deduped.sort(key=lambda r: (-_TRUST_RANK.get(r.trust_level, 0), r.source != "official", r.name.lower())) + total = len(deduped) + total_pages = max(1, (total + page_size - 1) // page_size) + page = max(1, min(page, total_pages)) + start = (page - 1) * page_size + page_items = deduped[start : min(start + page_size, total)] + return { + "items": [{"name": r.name, "description": r.description, "source": r.source, + "trust": r.trust_level} for r in page_items], + "page": page, + "total_pages": total_pages, + "total": total, + } + + +def inspect_skill(identifier: str) -> Optional[dict]: + """Skill metadata (+ SKILL.md preview) for programmatic callers.""" + from tools.skills_hub import GitHubAuth, create_source_router + + class _Q: + def print(self, *a, **k): + pass + + c = _Q() + auth = GitHubAuth() + sources = create_source_router(auth) + ident = identifier + if "/" not in ident: + ident = _resolve_short_name(ident, sources, c) + if not ident: + return None + meta, bundle, _ = _resolve_source_meta_and_bundle(ident, sources) + if not meta: + return None + out: dict = { + "name": meta.name, + "description": meta.description, + "source": meta.source, + "identifier": meta.identifier, + "tags": list(meta.tags) if meta.tags else [], + } + if bundle and "SKILL.md" in bundle.files: + content = bundle.files["SKILL.md"] + if isinstance(content, bytes): + content = content.decode("utf-8", errors="replace") + lines = content.split("\n") + preview = "\n".join(lines[:50]) + if len(lines) > 50: + preview += f"\n\n... ({len(lines) - 50} more lines)" + out["skill_md_preview"] = preview + return out + + def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None: """List installed skills, distinguishing hub, builtin, and local skills.""" from tools.skills_hub import HubLockFile, ensure_hub_dirs @@ -684,6 +768,51 @@ def do_uninstall(name: str, console: Optional[Console] = None, c.print(f"[bold red]Error:[/] {msg}\n") +def do_reset(name: str, restore: bool = False, + console: Optional[Console] = None, + skip_confirm: bool = False, + invalidate_cache: bool = True) -> None: + """Reset a bundled skill's manifest tracking (+ optionally restore from bundled).""" + from tools.skills_sync import reset_bundled_skill + + c = console or _console + + if not skip_confirm and restore: + c.print(f"\n[bold]Restore '{name}' from bundled source?[/]") + c.print("[dim]This will DELETE your current copy and re-copy the bundled version.[/]") + try: + answer = input("Confirm [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + answer = "n" + if answer not in ("y", "yes"): + c.print("[dim]Cancelled.[/]\n") + return + + result = reset_bundled_skill(name, restore=restore) + + if not result["ok"]: + c.print(f"[bold red]Error:[/] {result['message']}\n") + return + + c.print(f"[bold green]{result['message']}[/]") + synced = result.get("synced") or {} + if synced.get("copied"): + c.print(f"[dim]Copied: {', '.join(synced['copied'])}[/]") + if synced.get("updated"): + c.print(f"[dim]Updated: {', '.join(synced['updated'])}[/]") + c.print() + + if invalidate_cache: + try: + from agent.prompt_builder import clear_skills_system_prompt_cache + clear_skills_system_prompt_cache(clear_snapshot=True) + except Exception: + pass + else: + c.print("[dim]Change will take effect in your next session.[/]") + c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n") + + def do_tap(action: str, repo: str = "", console: Optional[Console] = None) -> None: """Manage taps (custom GitHub repo sources).""" from tools.skills_hub import TapsManager @@ -1007,6 +1136,9 @@ def skills_command(args) -> None: do_audit(name=getattr(args, "name", None)) elif action == "uninstall": do_uninstall(args.name) + elif action == "reset": + do_reset(args.name, restore=getattr(args, "restore", False), + skip_confirm=getattr(args, "yes", False)) elif action == "publish": do_publish( args.skill_path, @@ -1029,7 +1161,7 @@ def skills_command(args) -> None: return do_tap(tap_action, repo=repo) else: - _console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|publish|snapshot|tap]\n") + _console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|reset|publish|snapshot|tap]\n") _console.print("Run 'hermes skills --help' for details.\n") @@ -1175,6 +1307,19 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None: do_uninstall(args[0], console=c, skip_confirm=skip_confirm, invalidate_cache=invalidate_cache) + elif action == "reset": + if not args: + c.print("[bold red]Usage:[/] /skills reset [--restore] [--now]\n") + c.print("[dim]Clears the bundled-skills manifest entry so future updates stop marking it as user-modified.[/]") + c.print("[dim]Pass --restore to also replace the current copy with the bundled version.[/]\n") + return + name = args[0] + restore = "--restore" in args + invalidate_cache = "--now" in args + # Slash commands can't prompt — --restore in slash mode is implicit consent. + do_reset(name, restore=restore, console=c, skip_confirm=True, + invalidate_cache=invalidate_cache) + elif action == "publish": if not args: c.print("[bold red]Usage:[/] /skills publish [--to github] [--repo owner/repo]\n") @@ -1231,6 +1376,7 @@ def _print_skills_help(console: Console) -> None: " [cyan]update[/] [name] Update hub skills with upstream changes\n" " [cyan]audit[/] [name] Re-scan hub skills for security\n" " [cyan]uninstall[/] Remove a hub-installed skill\n" + " [cyan]reset[/] [--restore] Reset bundled-skill tracking (fix 'user-modified' flag)\n" " [cyan]publish[/] --repo Publish a skill to GitHub via PR\n" " [cyan]snapshot[/] export|import Export/import skill configurations\n" " [cyan]tap[/] list|add|remove Manage skill sources\n", diff --git a/hermes_cli/skin_engine.py b/hermes_cli/skin_engine.py index b992ada06f..4222a966ef 100644 --- a/hermes_cli/skin_engine.py +++ b/hermes_cli/skin_engine.py @@ -23,7 +23,7 @@ All fields are optional. Missing values inherit from the ``default`` skin. banner_dim: "#B8860B" # Dim/muted text (separators, labels) banner_text: "#FFF8DC" # Body text (tool names, skill names) ui_accent: "#FFBF00" # General UI accent - ui_label: "#4dd0e1" # UI labels + ui_label: "#DAA520" # UI labels (warm gold; teal clashed w/ default banner gold) ui_ok: "#4caf50" # Success indicators ui_error: "#ef5350" # Error indicators ui_warn: "#ffa726" # Warning indicators @@ -163,7 +163,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = { "banner_dim": "#B8860B", "banner_text": "#FFF8DC", "ui_accent": "#FFBF00", - "ui_label": "#4dd0e1", + "ui_label": "#DAA520", "ui_ok": "#4caf50", "ui_error": "#ef5350", "ui_warn": "#ffa726", @@ -708,7 +708,9 @@ def init_skin_from_config(config: dict) -> None: Call this once during CLI init with the loaded config dict. """ - display = config.get("display", {}) + display = config.get("display") or {} + if not isinstance(display, dict): + display = {} skin_name = display.get("skin", "default") if isinstance(skin_name, str) and skin_name.strip(): set_active_skin(skin_name.strip()) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 5ec93f24de..540afc3037 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -212,7 +212,7 @@ def show_status(args): if managed_nous_tools_enabled(): features = get_nous_subscription_features(config) print() - print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD)) + print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD)) if not features.nous_auth_present: print(" Nous Portal ✗ not logged in") else: @@ -230,6 +230,18 @@ def show_status(args): else: state = "not configured" print(f" {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}") + elif nous_logged_in: + # Logged into Nous but on the free tier — show upgrade nudge + print() + print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD)) + print(" Your free-tier Nous account does not include Tool Gateway access.") + print(" Upgrade your subscription to unlock managed web, image, TTS, and browser tools.") + try: + portal_url = nous_status.get("portal_base_url", "").rstrip("/") + if portal_url: + print(f" Upgrade: {portal_url}") + except Exception: + pass # ========================================================================= # API-Key Providers @@ -305,7 +317,7 @@ def show_status(args): "WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None), "Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"), "BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"), - "QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"), + "QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"), } for name, (token_var, home_var) in platforms.items(): @@ -315,6 +327,9 @@ def show_status(args): home_channel = "" if home_var: home_channel = os.getenv(home_var, "") + # Back-compat: QQBot home channel was renamed from QQ_HOME_CHANNEL to QQBOT_HOME_CHANNEL + if not home_channel and home_var == "QQBOT_HOME_CHANNEL": + home_channel = os.getenv("QQ_HOME_CHANNEL", "") status = "configured" if has_token else "not configured" if home_channel: @@ -327,73 +342,36 @@ def show_status(args): # ========================================================================= print() print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD)) - - if _is_termux(): - try: - from hermes_cli.gateway import find_gateway_pids - gateway_pids = find_gateway_pids() - except Exception: - gateway_pids = [] - is_running = bool(gateway_pids) + + try: + from hermes_cli.gateway import get_gateway_runtime_snapshot, _format_gateway_pids + + snapshot = get_gateway_runtime_snapshot() + is_running = snapshot.running print(f" Status: {check_mark(is_running)} {'running' if is_running else 'stopped'}") - print(" Manager: Termux / manual process") - if gateway_pids: - rendered = ", ".join(str(pid) for pid in gateway_pids[:3]) - if len(gateway_pids) > 3: - rendered += ", ..." - print(f" PID(s): {rendered}") - else: + print(f" Manager: {snapshot.manager}") + if snapshot.gateway_pids: + print(f" PID(s): {_format_gateway_pids(snapshot.gateway_pids)}") + if snapshot.has_process_service_mismatch: + print(" Service: installed but not managing the current running gateway") + elif _is_termux() and not snapshot.gateway_pids: print(" Start with: hermes gateway") print(" Note: Android may stop background jobs when Termux is suspended") - - elif sys.platform.startswith('linux'): - from hermes_constants import is_container - if is_container(): - # Docker/Podman: no systemd — check for running gateway processes - try: - from hermes_cli.gateway import find_gateway_pids - gateway_pids = find_gateway_pids() - is_active = len(gateway_pids) > 0 - except Exception: - is_active = False - print(f" Status: {check_mark(is_active)} {'running' if is_active else 'stopped'}") - print(" Manager: docker (foreground)") + elif snapshot.service_installed and not snapshot.service_running: + print(" Service: installed but stopped") + except Exception: + if _is_termux(): + print(f" Status: {color('unknown', Colors.DIM)}") + print(" Manager: Termux / manual process") + elif sys.platform.startswith('linux'): + print(f" Status: {color('unknown', Colors.DIM)}") + print(" Manager: systemd/manual") + elif sys.platform == 'darwin': + print(f" Status: {color('unknown', Colors.DIM)}") + print(" Manager: launchd") else: - try: - from hermes_cli.gateway import get_service_name - _gw_svc = get_service_name() - except Exception: - _gw_svc = "hermes-gateway" - try: - result = subprocess.run( - ["systemctl", "--user", "is-active", _gw_svc], - capture_output=True, - text=True, - timeout=5 - ) - is_active = result.stdout.strip() == "active" - except (FileNotFoundError, subprocess.TimeoutExpired): - is_active = False - print(f" Status: {check_mark(is_active)} {'running' if is_active else 'stopped'}") - print(" Manager: systemd (user)") - - elif sys.platform == 'darwin': - from hermes_cli.gateway import get_launchd_label - try: - result = subprocess.run( - ["launchctl", "list", get_launchd_label()], - capture_output=True, - text=True, - timeout=5 - ) - is_loaded = result.returncode == 0 - except subprocess.TimeoutExpired: - is_loaded = False - print(f" Status: {check_mark(is_loaded)} {'loaded' if is_loaded else 'not loaded'}") - print(" Manager: launchd") - else: - print(f" Status: {color('N/A', Colors.DIM)}") - print(" Manager: (not supported on this platform)") + print(f" Status: {color('N/A', Colors.DIM)}") + print(" Manager: (not supported on this platform)") # ========================================================================= # Cron Jobs diff --git a/hermes_cli/timeouts.py b/hermes_cli/timeouts.py new file mode 100644 index 0000000000..59db4012be --- /dev/null +++ b/hermes_cli/timeouts.py @@ -0,0 +1,82 @@ +from __future__ import annotations + + +def _coerce_timeout(raw: object) -> float | None: + try: + timeout = float(raw) + except (TypeError, ValueError): + return None + if timeout <= 0: + return None + return timeout + + +def get_provider_request_timeout( + provider_id: str, model: str | None = None +) -> float | None: + """Return a configured provider request timeout in seconds, if any.""" + if not provider_id: + return None + + try: + from hermes_cli.config import load_config + except ImportError: + return None + + config = load_config() + providers = config.get("providers", {}) if isinstance(config, dict) else {} + provider_config = ( + providers.get(provider_id, {}) if isinstance(providers, dict) else {} + ) + if not isinstance(provider_config, dict): + return None + + model_config = _get_model_config(provider_config, model) + if model_config is not None: + timeout = _coerce_timeout(model_config.get("timeout_seconds")) + if timeout is not None: + return timeout + + return _coerce_timeout(provider_config.get("request_timeout_seconds")) + + +def get_provider_stale_timeout( + provider_id: str, model: str | None = None +) -> float | None: + """Return a configured non-stream stale timeout in seconds, if any.""" + if not provider_id: + return None + + try: + from hermes_cli.config import load_config + except ImportError: + return None + + config = load_config() + providers = config.get("providers", {}) if isinstance(config, dict) else {} + provider_config = ( + providers.get(provider_id, {}) if isinstance(providers, dict) else {} + ) + if not isinstance(provider_config, dict): + return None + + model_config = _get_model_config(provider_config, model) + if model_config is not None: + timeout = _coerce_timeout(model_config.get("stale_timeout_seconds")) + if timeout is not None: + return timeout + + return _coerce_timeout(provider_config.get("stale_timeout_seconds")) + + +def _get_model_config( + provider_config: dict[str, object], model: str | None +) -> dict[str, object] | None: + if not model: + return None + + models = provider_config.get("models", {}) + model_config = models.get(model, {}) if isinstance(models, dict) else {} + if isinstance(model_config, dict): + return model_config + return None diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index aa6cb9729f..71bace524a 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -245,7 +245,7 @@ TIPS = [ "Three plugin types: general (tools/hooks), memory providers, and context engines.", "hermes plugins install owner/repo installs plugins directly from GitHub.", "8 external memory providers available: Honcho, OpenViking, Mem0, Hindsight, and more.", - "Plugin hooks include pre_tool_call, post_tool_call, pre_llm_call, and post_llm_call.", + "Plugin hooks include pre/post_tool_call, pre/post_llm_call, and transform_terminal_output for output canonicalization.", # --- Miscellaneous --- "Prompt caching (Anthropic) reduces costs by reusing cached system prompt prefixes.", @@ -323,7 +323,6 @@ TIPS = [ "GPT-5 and Codex use 'developer' role instead of 'system' in the message format.", "Per-task auxiliary overrides: auxiliary.vision.provider, auxiliary.compression.model, etc. in config.yaml.", "The auxiliary client treats 'main' as a provider alias — resolves to your actual primary provider + model.", - "Smart routing can auto-route simple queries to a cheaper model — set smart_model_routing.enabled: true.", "hermes claw migrate --dry-run previews OpenClaw migration without writing anything.", "File paths pasted with quotes or escaped spaces are handled automatically — no manual cleanup needed.", "Slash commands never trigger the large-paste collapse — /command with big arguments works correctly.", @@ -346,4 +345,3 @@ def get_random_tip(exclude_recent: int = 0) -> str: return random.choice(TIPS) - diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index abe1ff2450..8e4bde883f 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -63,6 +63,7 @@ CONFIGURABLE_TOOLSETS = [ ("clarify", "❓ Clarifying Questions", "clarify"), ("delegation", "👥 Task Delegation", "delegate_task"), ("cronjob", "⏰ Cron Jobs", "create/list/update/pause/resume/run, with optional attached skills"), + ("messaging", "📨 Cross-Platform Messaging", "send_message"), ("rl", "🧪 RL Training", "Tinker-Atropos training tools"), ("homeassistant", "🏠 Home Assistant", "smart home device control"), ] @@ -121,6 +122,7 @@ TOOL_CATEGORIES = { "providers": [ { "name": "Nous Subscription", + "badge": "subscription", "tag": "Managed OpenAI TTS billed to your subscription", "env_vars": [], "tts_provider": "openai", @@ -130,21 +132,32 @@ TOOL_CATEGORIES = { }, { "name": "Microsoft Edge TTS", - "tag": "Free - no API key needed", + "badge": "★ recommended · free", + "tag": "Good quality, no API key needed", "env_vars": [], "tts_provider": "edge", }, { "name": "OpenAI TTS", - "tag": "Premium - high quality voices", + "badge": "paid", + "tag": "High quality voices", "env_vars": [ {"key": "VOICE_TOOLS_OPENAI_KEY", "prompt": "OpenAI API key", "url": "https://platform.openai.com/api-keys"}, ], "tts_provider": "openai", }, + { + "name": "xAI TTS", + "tag": "Grok voices - requires xAI API key", + "env_vars": [ + {"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"}, + ], + "tts_provider": "xai", + }, { "name": "ElevenLabs", - "tag": "Premium - most natural voices", + "badge": "paid", + "tag": "Most natural voices", "env_vars": [ {"key": "ELEVENLABS_API_KEY", "prompt": "ElevenLabs API key", "url": "https://elevenlabs.io/app/settings/api-keys"}, ], @@ -152,12 +165,22 @@ TOOL_CATEGORIES = { }, { "name": "Mistral (Voxtral TTS)", - "tag": "Multilingual, native Opus, needs MISTRAL_API_KEY", + "badge": "paid", + "tag": "Multilingual, native Opus", "env_vars": [ {"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"}, ], "tts_provider": "mistral", }, + { + "name": "Google Gemini TTS", + "badge": "preview", + "tag": "30 prebuilt voices, controllable via prompts", + "env_vars": [ + {"key": "GEMINI_API_KEY", "prompt": "Gemini API key", "url": "https://aistudio.google.com/app/apikey"}, + ], + "tts_provider": "gemini", + }, ], }, "web": { @@ -168,6 +191,7 @@ TOOL_CATEGORIES = { "providers": [ { "name": "Nous Subscription", + "badge": "subscription", "tag": "Managed Firecrawl billed to your subscription", "web_backend": "firecrawl", "env_vars": [], @@ -177,7 +201,8 @@ TOOL_CATEGORIES = { }, { "name": "Firecrawl Cloud", - "tag": "Hosted service - search, extract, and crawl", + "badge": "★ recommended", + "tag": "Full-featured search, extract, and crawl", "web_backend": "firecrawl", "env_vars": [ {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"}, @@ -185,7 +210,8 @@ TOOL_CATEGORIES = { }, { "name": "Exa", - "tag": "AI-native search and contents", + "badge": "paid", + "tag": "Neural search with semantic understanding", "web_backend": "exa", "env_vars": [ {"key": "EXA_API_KEY", "prompt": "Exa API key", "url": "https://exa.ai"}, @@ -193,7 +219,8 @@ TOOL_CATEGORIES = { }, { "name": "Parallel", - "tag": "AI-native search and extract", + "badge": "paid", + "tag": "AI-powered search and extract", "web_backend": "parallel", "env_vars": [ {"key": "PARALLEL_API_KEY", "prompt": "Parallel API key", "url": "https://parallel.ai"}, @@ -201,7 +228,8 @@ TOOL_CATEGORIES = { }, { "name": "Tavily", - "tag": "AI-native search, extract, and crawl", + "badge": "free tier", + "tag": "Search, extract, and crawl — 1000 free searches/mo", "web_backend": "tavily", "env_vars": [ {"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"}, @@ -209,7 +237,8 @@ TOOL_CATEGORIES = { }, { "name": "Firecrawl Self-Hosted", - "tag": "Free - run your own instance", + "badge": "free · self-hosted", + "tag": "Run your own Firecrawl instance (Docker)", "web_backend": "firecrawl", "env_vars": [ {"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"}, @@ -223,18 +252,22 @@ TOOL_CATEGORIES = { "providers": [ { "name": "Nous Subscription", + "badge": "subscription", "tag": "Managed FAL image generation billed to your subscription", "env_vars": [], "requires_nous_auth": True, "managed_nous_feature": "image_gen", "override_env_vars": ["FAL_KEY"], + "imagegen_backend": "fal", }, { "name": "FAL.ai", - "tag": "FLUX 2 Pro with auto-upscaling", + "badge": "paid", + "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.", "env_vars": [ {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"}, ], + "imagegen_backend": "fal", }, ], }, @@ -244,6 +277,7 @@ TOOL_CATEGORIES = { "providers": [ { "name": "Nous Subscription (Browser Use cloud)", + "badge": "subscription", "tag": "Managed Browser Use billed to your subscription", "env_vars": [], "browser_provider": "browser-use", @@ -254,14 +288,16 @@ TOOL_CATEGORIES = { }, { "name": "Local Browser", - "tag": "Free headless Chromium (no API key needed)", + "badge": "★ recommended · free", + "tag": "Headless Chromium, no API key needed", "env_vars": [], "browser_provider": "local", "post_setup": "agent_browser", }, { "name": "Browserbase", - "tag": "Cloud browser with stealth & proxies", + "badge": "paid", + "tag": "Cloud browser with stealth and proxies", "env_vars": [ {"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"}, {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"}, @@ -271,6 +307,7 @@ TOOL_CATEGORIES = { }, { "name": "Browser Use", + "badge": "paid", "tag": "Cloud browser with remote execution", "env_vars": [ {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"}, @@ -280,6 +317,7 @@ TOOL_CATEGORIES = { }, { "name": "Firecrawl", + "badge": "paid", "tag": "Cloud browser with remote execution", "env_vars": [ {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"}, @@ -289,7 +327,8 @@ TOOL_CATEGORIES = { }, { "name": "Camofox", - "tag": "Local anti-detection browser (Firefox/Camoufox)", + "badge": "free · local", + "tag": "Anti-detection browser (Firefox/Camoufox)", "env_vars": [ {"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377", "url": "https://github.com/jo-inc/camofox-browser"}, @@ -473,7 +512,7 @@ def _get_platform_tools( """Resolve which individual toolset names are enabled for a platform.""" from toolsets import resolve_toolset - platform_toolsets = config.get("platform_toolsets", {}) + platform_toolsets = config.get("platform_toolsets") or {} toolset_names = platform_toolsets.get(platform) if toolset_names is None or not isinstance(toolset_names, list): @@ -838,7 +877,8 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict): # Plain text labels only (no ANSI codes in menu items) provider_choices = [] for p in providers: - tag = f" ({p['tag']})" if p.get("tag") else "" + badge = f" [{p['badge']}]" if p.get("badge") else "" + tag = f" — {p['tag']}" if p.get("tag") else "" configured = "" env_vars = p.get("env_vars", []) if not env_vars or all(get_env_value(v["key"]) for v in env_vars): @@ -848,7 +888,7 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict): configured = "" else: configured = " [configured]" - provider_choices.append(f"{p['name']}{tag}{configured}") + provider_choices.append(f"{p['name']}{badge}{tag}{configured}") # Add skip option provider_choices.append("Skip — keep defaults / configure later") @@ -912,6 +952,106 @@ def _detect_active_provider_index(providers: list, config: dict) -> int: return 0 +# ─── Image Generation Model Pickers ─────────────────────────────────────────── +# +# IMAGEGEN_BACKENDS is a per-backend catalog. Each entry exposes: +# - config_key: top-level config.yaml key for this backend's settings +# - model_catalog_fn: returns an OrderedDict-like {model_id: metadata} +# - default_model: fallback when nothing is configured +# +# This prepares for future imagegen backends (Replicate, Stability, etc.): +# each new backend registers its own entry; the FAL provider entry in +# TOOL_CATEGORIES tags itself with `imagegen_backend: "fal"` to select the +# right catalog at picker time. + + +def _fal_model_catalog(): + """Lazy-load the FAL model catalog from the tool module.""" + from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL + return FAL_MODELS, DEFAULT_MODEL + + +IMAGEGEN_BACKENDS = { + "fal": { + "display": "FAL.ai", + "config_key": "image_gen", + "catalog_fn": _fal_model_catalog, + }, +} + + +def _format_imagegen_model_row(model_id: str, meta: dict, widths: dict) -> str: + """Format a single picker row with column-aligned speed / strengths / price.""" + return ( + f"{model_id:<{widths['model']}} " + f"{meta.get('speed', ''):<{widths['speed']}} " + f"{meta.get('strengths', ''):<{widths['strengths']}} " + f"{meta.get('price', '')}" + ) + + +def _configure_imagegen_model(backend_name: str, config: dict) -> None: + """Prompt the user to pick a model for the given imagegen backend. + + Writes selection to ``config[backend_config_key]["model"]``. Safe to + call even when stdin is not a TTY — curses_radiolist falls back to + keeping the current selection. + """ + backend = IMAGEGEN_BACKENDS.get(backend_name) + if not backend: + return + + catalog, default_model = backend["catalog_fn"]() + if not catalog: + return + + cfg_key = backend["config_key"] + cur_cfg = config.setdefault(cfg_key, {}) + if not isinstance(cur_cfg, dict): + cur_cfg = {} + config[cfg_key] = cur_cfg + current_model = cur_cfg.get("model") or default_model + if current_model not in catalog: + current_model = default_model + + model_ids = list(catalog.keys()) + # Put current model at the top so the cursor lands on it by default. + ordered = [current_model] + [m for m in model_ids if m != current_model] + + # Column widths + widths = { + "model": max(len(m) for m in model_ids), + "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6), + "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0), + } + + print() + header = ( + f" {'Model':<{widths['model']}} " + f"{'Speed':<{widths['speed']}} " + f"{'Strengths':<{widths['strengths']}} " + f"Price" + ) + print(color(header, Colors.CYAN)) + + rows = [] + for mid in ordered: + row = _format_imagegen_model_row(mid, catalog[mid], widths) + if mid == current_model: + row += " ← currently in use" + rows.append(row) + + idx = _prompt_choice( + f" Choose {backend['display']} model:", + rows, + default=0, + ) + + chosen = ordered[idx] + cur_cfg["model"] = chosen + _print_success(f" Model set to: {chosen}") + + def _configure_provider(provider: dict, config: dict): """Configure a single provider - prompt for API keys and set config.""" env_vars = provider.get("env_vars", []) @@ -925,34 +1065,53 @@ def _configure_provider(provider: dict, config: dict): # Set TTS provider in config if applicable if provider.get("tts_provider"): - config.setdefault("tts", {})["provider"] = provider["tts_provider"] + tts_cfg = config.setdefault("tts", {}) + tts_cfg["provider"] = provider["tts_provider"] + tts_cfg["use_gateway"] = bool(managed_feature) # Set browser cloud provider in config if applicable if "browser_provider" in provider: bp = provider["browser_provider"] + browser_cfg = config.setdefault("browser", {}) if bp == "local": - config.setdefault("browser", {})["cloud_provider"] = "local" + browser_cfg["cloud_provider"] = "local" _print_success(" Browser set to local mode") elif bp: - config.setdefault("browser", {})["cloud_provider"] = bp + browser_cfg["cloud_provider"] = bp _print_success(f" Browser cloud provider set to: {bp}") + browser_cfg["use_gateway"] = bool(managed_feature) # Set web search backend in config if applicable if provider.get("web_backend"): - config.setdefault("web", {})["backend"] = provider["web_backend"] + web_cfg = config.setdefault("web", {}) + web_cfg["backend"] = provider["web_backend"] + web_cfg["use_gateway"] = bool(managed_feature) _print_success(f" Web backend set to: {provider['web_backend']}") + # For tools without a specific config key (e.g. image_gen), still + # track use_gateway so the runtime knows the user's intent. + if managed_feature and managed_feature not in ("web", "tts", "browser"): + config.setdefault(managed_feature, {})["use_gateway"] = True + elif not managed_feature: + # User picked a non-gateway provider — find which category this + # belongs to and clear use_gateway if it was previously set. + for cat_key, cat in TOOL_CATEGORIES.items(): + if provider in cat.get("providers", []): + section = config.get(cat_key) + if isinstance(section, dict) and section.get("use_gateway"): + section["use_gateway"] = False + break + if not env_vars: if provider.get("post_setup"): _run_post_setup(provider["post_setup"]) _print_success(f" {provider['name']} - no configuration needed!") if managed_feature: _print_info(" Requests for this tool will be billed to your Nous subscription.") - override_envs = provider.get("override_env_vars", []) - if any(get_env_value(env_var) for env_var in override_envs): - _print_warning( - " Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env." - ) + # Imagegen backends prompt for model selection after backend pick. + backend = provider.get("imagegen_backend") + if backend: + _configure_imagegen_model(backend, config) return # Prompt for each required env var @@ -987,6 +1146,10 @@ def _configure_provider(provider: dict, config: dict): if all_configured: _print_success(f" {provider['name']} configured!") + # Imagegen backends prompt for model selection after env vars are in. + backend = provider.get("imagegen_backend") + if backend: + _configure_imagegen_model(backend, config) def _configure_simple_requirements(ts_key: str): @@ -1104,7 +1267,8 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict): provider_choices = [] for p in providers: - tag = f" ({p['tag']})" if p.get("tag") else "" + badge = f" [{p['badge']}]" if p.get("badge") else "" + tag = f" — {p['tag']}" if p.get("tag") else "" configured = "" env_vars = p.get("env_vars", []) if not env_vars or all(get_env_value(v["key"]) for v in env_vars): @@ -1114,7 +1278,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict): configured = "" else: configured = " [configured]" - provider_choices.append(f"{p['name']}{tag}{configured}") + provider_choices.append(f"{p['name']}{badge}{tag}{configured}") default_idx = _detect_active_provider_index(providers, config) @@ -1157,11 +1321,10 @@ def _reconfigure_provider(provider: dict, config: dict): _print_success(f" {provider['name']} - no configuration needed!") if managed_feature: _print_info(" Requests for this tool will be billed to your Nous subscription.") - override_envs = provider.get("override_env_vars", []) - if any(get_env_value(env_var) for env_var in override_envs): - _print_warning( - " Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env." - ) + # Imagegen backends prompt for model selection on reconfig too. + backend = provider.get("imagegen_backend") + if backend: + _configure_imagegen_model(backend, config) return for var in env_vars: @@ -1179,6 +1342,11 @@ def _reconfigure_provider(provider: dict, config: dict): else: _print_info(" Kept current") + # Imagegen backends prompt for model selection on reconfig too. + backend = provider.get("imagegen_backend") + if backend: + _configure_imagegen_model(backend, config) + def _reconfigure_simple_requirements(ts_key: str): """Reconfigure simple env var requirements.""" diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py index 8d8e3393b3..67cea41820 100644 --- a/hermes_cli/uninstall.py +++ b/hermes_cli/uninstall.py @@ -118,59 +118,166 @@ def remove_wrapper_script(): def uninstall_gateway_service(): - """Stop and uninstall the gateway service if running.""" + """Stop and uninstall the gateway service (systemd, launchd) and kill any + standalone gateway processes. + + Delegates to the gateway module which handles: + - Linux: user + system systemd services (with proper DBUS env setup) + - macOS: launchd plists + - All platforms: standalone ``hermes gateway run`` processes + - Termux/Android: skips systemd (no systemd on Android), still kills standalone processes + """ import platform - - if platform.system() != "Linux": - return False + stopped_something = False - prefix = os.getenv("PREFIX", "") - if os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix: - return False - + # 1. Kill any standalone gateway processes (all platforms, including Termux) try: - from hermes_cli.gateway import get_service_name - svc_name = get_service_name() - except Exception: - svc_name = "hermes-gateway" - - service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service" - - if not service_file.exists(): - return False - - try: - # Stop the service - subprocess.run( - ["systemctl", "--user", "stop", svc_name], - capture_output=True, - check=False - ) - - # Disable the service - subprocess.run( - ["systemctl", "--user", "disable", svc_name], - capture_output=True, - check=False - ) - - # Remove service file - service_file.unlink() - - # Reload systemd - subprocess.run( - ["systemctl", "--user", "daemon-reload"], - capture_output=True, - check=False - ) - - return True - + from hermes_cli.gateway import kill_gateway_processes, find_gateway_pids + pids = find_gateway_pids() + if pids: + killed = kill_gateway_processes() + if killed: + log_success(f"Killed {killed} running gateway process(es)") + stopped_something = True except Exception as e: - log_warn(f"Could not fully remove gateway service: {e}") + log_warn(f"Could not check for gateway processes: {e}") + + system = platform.system() + + # Termux/Android has no systemd and no launchd — nothing left to do. + prefix = os.getenv("PREFIX", "") + is_termux = bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix) + if is_termux: + return stopped_something + + # 2. Linux: uninstall systemd services (both user and system scopes) + if system == "Linux": + try: + from hermes_cli.gateway import ( + get_systemd_unit_path, + get_service_name, + _systemctl_cmd, + ) + svc_name = get_service_name() + + for is_system in (False, True): + unit_path = get_systemd_unit_path(system=is_system) + if not unit_path.exists(): + continue + + scope = "system" if is_system else "user" + try: + if is_system and os.geteuid() != 0: + log_warn(f"System gateway service exists at {unit_path} " + f"but needs sudo to remove") + continue + + cmd = _systemctl_cmd(is_system) + subprocess.run(cmd + ["stop", svc_name], + capture_output=True, check=False) + subprocess.run(cmd + ["disable", svc_name], + capture_output=True, check=False) + unit_path.unlink() + subprocess.run(cmd + ["daemon-reload"], + capture_output=True, check=False) + log_success(f"Removed {scope} gateway service ({unit_path})") + stopped_something = True + except Exception as e: + log_warn(f"Could not remove {scope} gateway service: {e}") + except Exception as e: + log_warn(f"Could not check systemd gateway services: {e}") + + # 3. macOS: uninstall launchd plist + elif system == "Darwin": + try: + from hermes_cli.gateway import get_launchd_plist_path + plist_path = get_launchd_plist_path() + if plist_path.exists(): + subprocess.run(["launchctl", "unload", str(plist_path)], + capture_output=True, check=False) + plist_path.unlink() + log_success(f"Removed macOS gateway service ({plist_path})") + stopped_something = True + except Exception as e: + log_warn(f"Could not remove launchd gateway service: {e}") + + return stopped_something + + +def _is_default_hermes_home(hermes_home: Path) -> bool: + """Return True when ``hermes_home`` points at the default (non-profile) root.""" + try: + from hermes_constants import get_default_hermes_root + return hermes_home.resolve() == get_default_hermes_root().resolve() + except Exception: return False +def _discover_named_profiles(): + """Return a list of ``ProfileInfo`` for every non-default profile, or ``[]`` + if profile support is unavailable or nothing is installed beyond the + default root.""" + try: + from hermes_cli.profiles import list_profiles + except Exception: + return [] + try: + return [p for p in list_profiles() if not getattr(p, "is_default", False)] + except Exception as e: + log_warn(f"Could not enumerate profiles: {e}") + return [] + + +def _uninstall_profile(profile) -> None: + """Fully uninstall a single named profile: stop its gateway service, + remove its alias wrapper, and wipe its HERMES_HOME directory. + + We shell out to ``hermes -p gateway stop|uninstall`` because + service names, unit paths, and plist paths are all derived from the + current HERMES_HOME and can't be easily switched in-process. + """ + import sys as _sys + name = profile.name + profile_home = profile.path + + log_info(f"Uninstalling profile '{name}'...") + + # 1. Stop and remove this profile's gateway service. + # Use `python -m hermes_cli.main` so we don't depend on a `hermes` + # wrapper that may be half-removed mid-uninstall. + hermes_invocation = [_sys.executable, "-m", "hermes_cli.main", "--profile", name] + for subcmd in ("stop", "uninstall"): + try: + subprocess.run( + hermes_invocation + ["gateway", subcmd], + capture_output=True, + text=True, + timeout=60, + check=False, + ) + except subprocess.TimeoutExpired: + log_warn(f" Gateway {subcmd} timed out for '{name}'") + except Exception as e: + log_warn(f" Could not run gateway {subcmd} for '{name}': {e}") + + # 2. Remove the wrapper alias script at ~/.local/bin/ (if any). + alias_path = getattr(profile, "alias_path", None) + if alias_path and alias_path.exists(): + try: + alias_path.unlink() + log_success(f" Removed alias {alias_path}") + except Exception as e: + log_warn(f" Could not remove alias {alias_path}: {e}") + + # 3. Wipe the profile's HERMES_HOME directory. + try: + if profile_home.exists(): + shutil.rmtree(profile_home) + log_success(f" Removed {profile_home}") + except Exception as e: + log_warn(f" Could not remove {profile_home}: {e}") + + def run_uninstall(args): """ Run the uninstall process. @@ -181,7 +288,13 @@ def run_uninstall(args): """ project_root = get_project_root() hermes_home = get_hermes_home() - + + # Detect named profiles when uninstalling from the default root — + # offer to clean them up too instead of leaving zombie HERMES_HOMEs + # and systemd units behind. + is_default_profile = _is_default_hermes_home(hermes_home) + named_profiles = _discover_named_profiles() if is_default_profile else [] + print() print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA, Colors.BOLD)) print(color("│ ⚕ Hermes Agent Uninstaller │", Colors.MAGENTA, Colors.BOLD)) @@ -195,6 +308,13 @@ def run_uninstall(args): print(f" Secrets: {hermes_home / '.env'}") print(f" Data: {hermes_home / 'cron/'}, {hermes_home / 'sessions/'}, {hermes_home / 'logs/'}") print() + + if named_profiles: + print(color("Other profiles detected:", Colors.CYAN, Colors.BOLD)) + for p in named_profiles: + running = " (gateway running)" if getattr(p, "gateway_running", False) else "" + print(f" • {p.name}{running}: {p.path}") + print() # Ask for confirmation print(color("Uninstall Options:", Colors.YELLOW, Colors.BOLD)) @@ -221,12 +341,40 @@ def run_uninstall(args): return full_uninstall = (choice == "2") - + + # When doing a full uninstall from the default profile, also offer to + # remove any named profiles — stopping their gateway services, unlinking + # their alias wrappers, and wiping their HERMES_HOME dirs. Otherwise + # those leave zombie services and data behind. + remove_profiles = False + if full_uninstall and named_profiles: + print() + print(color("Other profiles will NOT be removed by default.", Colors.YELLOW)) + print(f"Found {len(named_profiles)} named profile(s): " + + ", ".join(p.name for p in named_profiles)) + print() + try: + resp = input(color( + f"Also stop and remove these {len(named_profiles)} profile(s)? [y/N]: ", + Colors.BOLD + )).strip().lower() + except (KeyboardInterrupt, EOFError): + print() + print("Cancelled.") + return + remove_profiles = resp in ("y", "yes") + # Final confirmation print() if full_uninstall: print(color("⚠️ WARNING: This will permanently delete ALL Hermes data!", Colors.RED, Colors.BOLD)) print(color(" Including: configs, API keys, sessions, scheduled jobs, logs", Colors.RED)) + if remove_profiles: + print(color( + f" Plus {len(named_profiles)} profile(s): " + + ", ".join(p.name for p in named_profiles), + Colors.RED + )) else: print("This will remove the Hermes code but keep your configuration and data.") @@ -247,12 +395,10 @@ def run_uninstall(args): print(color("Uninstalling...", Colors.CYAN, Colors.BOLD)) print() - # 1. Stop and uninstall gateway service - log_info("Checking for gateway service...") - if uninstall_gateway_service(): - log_success("Gateway service stopped and removed") - else: - log_info("No gateway service found") + # 1. Stop and uninstall gateway service + kill standalone processes + log_info("Checking for running gateway...") + if not uninstall_gateway_service(): + log_info("No gateway service or processes found") # 2. Remove PATH entries from shell configs log_info("Removing PATH entries from shell configs...") @@ -291,8 +437,17 @@ def run_uninstall(args): log_warn(f"Could not fully remove {project_root}: {e}") log_info("You may need to manually remove it") - # 5. Optionally remove ~/.hermes/ data directory + # 5. Optionally remove ~/.hermes/ data directory (and named profiles) if full_uninstall: + # 5a. Stop and remove each named profile's gateway service and + # alias wrapper. The profile HERMES_HOME dirs live under + # ``/profiles//`` and will be swept away by the + # rmtree below, but services + alias scripts live OUTSIDE the + # default root and have to be cleaned up explicitly. + if remove_profiles and named_profiles: + for prof in named_profiles: + _uninstall_profile(prof) + log_info("Removing configuration and data...") try: if hermes_home.exists(): diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index f18afbf866..93169f416a 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -11,6 +11,7 @@ Usage: import asyncio import hmac +import importlib.util import json import logging import os @@ -55,10 +56,10 @@ try: except ImportError: raise SystemExit( "Web UI requires fastapi and uvicorn.\n" - "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]" + f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'" ) -WEB_DIST = Path(__file__).parent / "web_dist" +WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist" _log = logging.getLogger(__name__) app = FastAPI(title="Hermes Agent", version=__version__) @@ -96,6 +97,9 @@ _PUBLIC_API_PATHS: frozenset = frozenset({ "/api/config/defaults", "/api/config/schema", "/api/model/info", + "/api/dashboard/themes", + "/api/dashboard/plugins", + "/api/dashboard/plugins/rescan", }) @@ -114,7 +118,7 @@ def _require_token(request: Request) -> None: async def auth_middleware(request: Request, call_next): """Require the session token on all /api/ routes except the public list.""" path = request.url.path - if path.startswith("/api/") and path not in _PUBLIC_API_PATHS: + if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"): auth = request.headers.get("authorization", "") expected = f"Bearer {_SESSION_TOKEN}" if not hmac.compare_digest(auth.encode(), expected.encode()): @@ -166,6 +170,11 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = { "description": "CLI visual theme", "options": ["default", "ares", "mono", "slate"], }, + "dashboard.theme": { + "type": "select", + "description": "Web dashboard visual theme", + "options": ["default", "midnight", "ember", "mono", "cyberpunk", "rose"], + }, "display.resume_display": { "type": "select", "description": "How resumed sessions display history", @@ -223,7 +232,8 @@ _CATEGORY_MERGE: Dict[str, str] = { "checkpoints": "agent", "approvals": "security", "human_delay": "display", - "smart_model_routing": "agent", + "dashboard": "display", + "code_execution": "agent", } # Display order for tabs — unlisted categories sort alphabetically after these. @@ -457,6 +467,7 @@ async def get_status(): "latest_config_version": latest_ver, "gateway_running": gateway_running, "gateway_pid": gateway_pid, + "gateway_health_url": _GATEWAY_HEALTH_URL, "gateway_state": gateway_state, "gateway_platforms": gateway_platforms, "gateway_exit_reason": gateway_exit_reason, @@ -1433,38 +1444,8 @@ def _nous_poller(session_id: str) -> None: auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0, force_refresh=False, force_mint=True, ) - # Save into credential pool same as auth_commands.py does - from agent.credential_pool import ( - PooledCredential, - load_pool, - AUTH_TYPE_OAUTH, - SOURCE_MANUAL, - ) - pool = load_pool("nous") - entry = PooledCredential.from_dict("nous", { - **full_state, - "label": "dashboard device_code", - "auth_type": AUTH_TYPE_OAUTH, - "source": f"{SOURCE_MANUAL}:dashboard_device_code", - "base_url": full_state.get("inference_base_url"), - }) - pool.add_entry(entry) - # Also persist to auth store so get_nous_auth_status() sees it - # (matches what _login_nous in auth.py does for the CLI flow). - try: - from hermes_cli.auth import ( - _load_auth_store, _save_provider_state, _save_auth_store, - _auth_store_lock, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - _save_provider_state(auth_store, "nous", full_state) - _save_auth_store(auth_store) - except Exception as store_exc: - _log.warning( - "oauth/device: credential pool saved but auth store write failed " - "(session=%s): %s", session_id, store_exc, - ) + from hermes_cli.auth import persist_nous_credentials + persist_nous_credentials(full_state) with _oauth_sessions_lock: sess["status"] = "approved" _log.info("oauth/device: nous login completed (session=%s)", session_id) @@ -2086,6 +2067,237 @@ def mount_spa(application: FastAPI): return _serve_index() +# --------------------------------------------------------------------------- +# Dashboard theme endpoints +# --------------------------------------------------------------------------- + +# Built-in dashboard themes — label + description only. The actual color +# definitions live in the frontend (web/src/themes/presets.ts). +_BUILTIN_DASHBOARD_THEMES = [ + {"name": "default", "label": "Hermes Teal", "description": "Classic dark teal — the canonical Hermes look"}, + {"name": "midnight", "label": "Midnight", "description": "Deep blue-violet with cool accents"}, + {"name": "ember", "label": "Ember", "description": "Warm crimson and bronze — forge vibes"}, + {"name": "mono", "label": "Mono", "description": "Clean grayscale — minimal and focused"}, + {"name": "cyberpunk", "label": "Cyberpunk", "description": "Neon green on black — matrix terminal"}, + {"name": "rose", "label": "Rosé", "description": "Soft pink and warm ivory — easy on the eyes"}, +] + + +def _discover_user_themes() -> list: + """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.""" + themes_dir = get_hermes_home() / "dashboard-themes" + if not themes_dir.is_dir(): + return [] + result = [] + for f in sorted(themes_dir.glob("*.yaml")): + try: + data = yaml.safe_load(f.read_text(encoding="utf-8")) + if isinstance(data, dict) and data.get("name"): + result.append({ + "name": data["name"], + "label": data.get("label", data["name"]), + "description": data.get("description", ""), + }) + except Exception: + continue + return result + + +@app.get("/api/dashboard/themes") +async def get_dashboard_themes(): + """Return available themes and the currently active one.""" + config = load_config() + active = config.get("dashboard", {}).get("theme", "default") + user_themes = _discover_user_themes() + # Merge built-in + user, user themes override built-in by name. + seen = set() + themes = [] + for t in _BUILTIN_DASHBOARD_THEMES: + seen.add(t["name"]) + themes.append(t) + for t in user_themes: + if t["name"] not in seen: + themes.append(t) + seen.add(t["name"]) + return {"themes": themes, "active": active} + + +class ThemeSetBody(BaseModel): + name: str + + +@app.put("/api/dashboard/theme") +async def set_dashboard_theme(body: ThemeSetBody): + """Set the active dashboard theme (persists to config.yaml).""" + config = load_config() + if "dashboard" not in config: + config["dashboard"] = {} + config["dashboard"]["theme"] = body.name + save_config(config) + return {"ok": True, "theme": body.name} + + +# --------------------------------------------------------------------------- +# Dashboard plugin system +# --------------------------------------------------------------------------- + +def _discover_dashboard_plugins() -> list: + """Scan plugins/*/dashboard/manifest.json for dashboard extensions. + + Checks three plugin sources (same as hermes_cli.plugins): + 1. User plugins: ~/.hermes/plugins//dashboard/manifest.json + 2. Bundled plugins: /plugins//dashboard/manifest.json (memory/, etc.) + 3. Project plugins: ./.hermes/plugins/ (only if HERMES_ENABLE_PROJECT_PLUGINS) + """ + plugins = [] + seen_names: set = set() + + search_dirs = [ + (get_hermes_home() / "plugins", "user"), + (PROJECT_ROOT / "plugins" / "memory", "bundled"), + (PROJECT_ROOT / "plugins", "bundled"), + ] + if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"): + search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project")) + + for plugins_root, source in search_dirs: + if not plugins_root.is_dir(): + continue + for child in sorted(plugins_root.iterdir()): + if not child.is_dir(): + continue + manifest_file = child / "dashboard" / "manifest.json" + if not manifest_file.exists(): + continue + try: + data = json.loads(manifest_file.read_text(encoding="utf-8")) + name = data.get("name", child.name) + if name in seen_names: + continue + seen_names.add(name) + plugins.append({ + "name": name, + "label": data.get("label", name), + "description": data.get("description", ""), + "icon": data.get("icon", "Puzzle"), + "version": data.get("version", "0.0.0"), + "tab": data.get("tab", {"path": f"/{name}", "position": "end"}), + "entry": data.get("entry", "dist/index.js"), + "css": data.get("css"), + "has_api": bool(data.get("api")), + "source": source, + "_dir": str(child / "dashboard"), + "_api_file": data.get("api"), + }) + except Exception as exc: + _log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc) + continue + return plugins + + +# Cache discovered plugins per-process (refresh on explicit re-scan). +_dashboard_plugins_cache: Optional[list] = None + + +def _get_dashboard_plugins(force_rescan: bool = False) -> list: + global _dashboard_plugins_cache + if _dashboard_plugins_cache is None or force_rescan: + _dashboard_plugins_cache = _discover_dashboard_plugins() + return _dashboard_plugins_cache + + +@app.get("/api/dashboard/plugins") +async def get_dashboard_plugins(): + """Return discovered dashboard plugins.""" + plugins = _get_dashboard_plugins() + # Strip internal fields before sending to frontend. + return [ + {k: v for k, v in p.items() if not k.startswith("_")} + for p in plugins + ] + + +@app.get("/api/dashboard/plugins/rescan") +async def rescan_dashboard_plugins(): + """Force re-scan of dashboard plugins.""" + plugins = _get_dashboard_plugins(force_rescan=True) + return {"ok": True, "count": len(plugins)} + + +@app.get("/dashboard-plugins/{plugin_name}/{file_path:path}") +async def serve_plugin_asset(plugin_name: str, file_path: str): + """Serve static assets from a dashboard plugin directory. + + Only serves files from the plugin's ``dashboard/`` subdirectory. + Path traversal is blocked by checking ``resolve().is_relative_to()``. + """ + plugins = _get_dashboard_plugins() + plugin = next((p for p in plugins if p["name"] == plugin_name), None) + if not plugin: + raise HTTPException(status_code=404, detail="Plugin not found") + + base = Path(plugin["_dir"]) + target = (base / file_path).resolve() + + if not target.is_relative_to(base.resolve()): + raise HTTPException(status_code=403, detail="Path traversal blocked") + if not target.exists() or not target.is_file(): + raise HTTPException(status_code=404, detail="File not found") + + # Guess content type + suffix = target.suffix.lower() + content_types = { + ".js": "application/javascript", + ".mjs": "application/javascript", + ".css": "text/css", + ".json": "application/json", + ".html": "text/html", + ".svg": "image/svg+xml", + ".png": "image/png", + ".jpg": "image/jpeg", + ".woff2": "font/woff2", + ".woff": "font/woff", + } + media_type = content_types.get(suffix, "application/octet-stream") + return FileResponse(target, media_type=media_type) + + +def _mount_plugin_api_routes(): + """Import and mount backend API routes from plugins that declare them. + + Each plugin's ``api`` field points to a Python file that must expose + a ``router`` (FastAPI APIRouter). Routes are mounted under + ``/api/plugins//``. + """ + for plugin in _get_dashboard_plugins(): + api_file_name = plugin.get("_api_file") + if not api_file_name: + continue + api_path = Path(plugin["_dir"]) / api_file_name + if not api_path.exists(): + _log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name) + continue + try: + spec = importlib.util.spec_from_file_location( + f"hermes_dashboard_plugin_{plugin['name']}", api_path, + ) + if spec is None or spec.loader is None: + continue + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + router = getattr(mod, "router", None) + if router is None: + _log.warning("Plugin %s api file has no 'router' attribute", plugin["name"]) + continue + app.include_router(router, prefix=f"/api/plugins/{plugin['name']}") + _log.info("Mounted plugin API routes: /api/plugins/%s/", plugin["name"]) + except Exception as exc: + _log.warning("Failed to load plugin %s API routes: %s", plugin["name"], exc) + + +# Mount plugin API routes before the SPA catch-all. +_mount_plugin_api_routes() + mount_spa(app) diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py index 8ff135e29e..378f11b4a7 100644 --- a/hermes_cli/webhook.py +++ b/hermes_cli/webhook.py @@ -155,6 +155,15 @@ def _cmd_subscribe(args): "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), } + if getattr(args, "deliver_only", False): + if route["deliver"] == "log": + print( + "Error: --deliver-only requires --deliver to be a real target " + "(telegram, discord, slack, github_comment, etc.) — not 'log'." + ) + return + route["deliver_only"] = True + if args.deliver_chat_id: route["deliver_extra"] = {"chat_id": args.deliver_chat_id} @@ -172,9 +181,12 @@ def _cmd_subscribe(args): else: print(" Events: (all)") print(f" Deliver: {route['deliver']}") + if route.get("deliver_only"): + print(" Mode: direct delivery (no agent, zero LLM cost)") if route.get("prompt"): prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "") - print(f" Prompt: {prompt_preview}") + label = "Message" if route.get("deliver_only") else "Prompt" + print(f" {label}: {prompt_preview}") print(f"\n Configure your service to POST to the URL above.") print(f" Use the secret for HMAC-SHA256 signature validation.") print(f" The gateway must be running to receive events (hermes gateway run).\n") @@ -192,6 +204,8 @@ def _cmd_list(args): for name, route in subs.items(): events = ", ".join(route.get("events", [])) or "(all)" deliver = route.get("deliver", "log") + if route.get("deliver_only"): + deliver = f"{deliver} (direct — no agent)" desc = route.get("description", "") print(f" ◆ {name}") if desc: diff --git a/hermes_constants.py b/hermes_constants.py index 3bc56d4f78..35dbf86ab2 100644 --- a/hermes_constants.py +++ b/hermes_constants.py @@ -14,7 +14,8 @@ def get_hermes_home() -> Path: Reads HERMES_HOME env var, falls back to ~/.hermes. This is the single source of truth — all other copies should import this. """ - return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) + val = os.environ.get("HERMES_HOME", "").strip() + return Path(val) if val else Path.home() / ".hermes" def get_default_hermes_root() -> Path: diff --git a/hermes_logging.py b/hermes_logging.py index dbef213287..0ebc450a22 100644 --- a/hermes_logging.py +++ b/hermes_logging.py @@ -358,6 +358,7 @@ def _add_rotating_handler( path.parent.mkdir(parents=True, exist_ok=True) handler = _ManagedRotatingFileHandler( str(path), maxBytes=max_bytes, backupCount=backup_count, + encoding="utf-8", ) handler.setLevel(level) handler.setFormatter(formatter) diff --git a/hermes_state.py b/hermes_state.py index 5e563666e8..2d8a0fd4af 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -383,10 +383,19 @@ class SessionDB: return session_id def end_session(self, session_id: str, end_reason: str) -> None: - """Mark a session as ended.""" + """Mark a session as ended. + + No-ops when the session is already ended. The first end_reason wins: + compression-split sessions must keep their ``end_reason = 'compression'`` + record even if a later stale ``end_session()`` call (e.g. from a + desynced CLI session_id after ``/resume`` or ``/branch``) targets them + with a different reason. Use ``reopen_session()`` first if you + intentionally need to re-end a closed session with a new reason. + """ def _do(conn): conn.execute( - "UPDATE sessions SET ended_at = ?, end_reason = ? WHERE id = ?", + "UPDATE sessions SET ended_at = ?, end_reason = ? " + "WHERE id = ? AND ended_at IS NULL", (time.time(), end_reason, session_id), ) self._execute_write(_do) @@ -714,6 +723,42 @@ class SessionDB: return f"{base} #{max_num + 1}" + def get_compression_tip(self, session_id: str) -> Optional[str]: + """Walk the compression-continuation chain forward and return the tip. + + A compression continuation is a child session where: + 1. The parent's ``end_reason = 'compression'`` + 2. The child was created AFTER the parent was ended (started_at >= ended_at) + + The second condition distinguishes compression continuations from + delegate subagents or branch children, which can also have a + ``parent_session_id`` but were created while the parent was still live. + + Returns the session_id of the latest continuation in the chain, or the + input ``session_id`` if it isn't part of a compression chain (or if the + input itself doesn't exist). + """ + current = session_id + # Bound the walk defensively — compression chains this deep are + # pathological and shouldn't happen in practice. 100 = plenty. + for _ in range(100): + with self._lock: + cursor = self._conn.execute( + "SELECT id FROM sessions " + "WHERE parent_session_id = ? " + " AND started_at >= (" + " SELECT ended_at FROM sessions " + " WHERE id = ? AND end_reason = 'compression'" + " ) " + "ORDER BY started_at DESC LIMIT 1", + (current, current), + ) + row = cursor.fetchone() + if row is None: + return current + current = row["id"] + return current + def list_sessions_rich( self, source: str = None, @@ -721,6 +766,7 @@ class SessionDB: limit: int = 20, offset: int = 0, include_children: bool = False, + project_compression_tips: bool = True, ) -> List[Dict[str, Any]]: """List sessions with preview (first user message) and last active timestamp. @@ -732,6 +778,14 @@ class SessionDB: By default, child sessions (subagent runs, compression continuations) are excluded. Pass ``include_children=True`` to include them. + + With ``project_compression_tips=True`` (default), sessions that are + roots of compression chains are projected forward to their latest + continuation — one logical conversation = one list entry, showing the + live continuation's id/message_count/title/last_active. This prevents + compressed continuations from being invisible to users while keeping + delegate subagents and branches hidden. Pass ``False`` to return the + raw root rows (useful for admin/debug UIs). """ where_clauses = [] params = [] @@ -782,8 +836,77 @@ class SessionDB: s["preview"] = "" sessions.append(s) + # Project compression roots forward to their tips. Each row whose + # end_reason is 'compression' has a continuation child; replace the + # surfaced fields (id, message_count, title, last_active, ended_at, + # end_reason, preview) with the tip's values so the list entry acts + # as the live conversation. Keep the root's started_at to preserve + # chronological ordering by original conversation start. + if project_compression_tips and not include_children: + projected = [] + for s in sessions: + if s.get("end_reason") != "compression": + projected.append(s) + continue + tip_id = self.get_compression_tip(s["id"]) + if tip_id == s["id"]: + projected.append(s) + continue + tip_row = self._get_session_rich_row(tip_id) + if not tip_row: + projected.append(s) + continue + # Preserve the root's started_at for stable sort order, but + # surface the tip's identity and activity data. + merged = dict(s) + for key in ( + "id", "ended_at", "end_reason", "message_count", + "tool_call_count", "title", "last_active", "preview", + "model", "system_prompt", + ): + if key in tip_row: + merged[key] = tip_row[key] + merged["_lineage_root_id"] = s["id"] + projected.append(merged) + sessions = projected + return sessions + def _get_session_rich_row(self, session_id: str) -> Optional[Dict[str, Any]]: + """Fetch a single session with the same enriched columns as + ``list_sessions_rich`` (preview + last_active). Returns None if the + session doesn't exist. + """ + query = """ + SELECT s.*, + COALESCE( + (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63) + FROM messages m + WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL + ORDER BY m.timestamp, m.id LIMIT 1), + '' + ) AS _preview_raw, + COALESCE( + (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id), + s.started_at + ) AS last_active + FROM sessions s + WHERE s.id = ? + """ + with self._lock: + cursor = self._conn.execute(query, (session_id,)) + row = cursor.fetchone() + if not row: + return None + s = dict(row) + raw = s.pop("_preview_raw", "").strip() + if raw: + text = raw[:60] + s["preview"] = text + ("..." if len(raw) > 60 else "") + else: + s["preview"] = "" + return s + # ========================================================================= # Message storage # ========================================================================= @@ -987,6 +1110,22 @@ class SessionDB: return sanitized.strip() + + @staticmethod + def _contains_cjk(text: str) -> bool: + """Check if text contains CJK (Chinese, Japanese, Korean) characters.""" + for ch in text: + cp = ord(ch) + if (0x4E00 <= cp <= 0x9FFF or # CJK Unified Ideographs + 0x3400 <= cp <= 0x4DBF or # CJK Extension A + 0x20000 <= cp <= 0x2A6DF or # CJK Extension B + 0x3000 <= cp <= 0x303F or # CJK Symbols + 0x3040 <= cp <= 0x309F or # Hiragana + 0x30A0 <= cp <= 0x30FF or # Katakana + 0xAC00 <= cp <= 0xD7AF): # Hangul Syllables + return True + return False + def search_messages( self, query: str, @@ -1062,8 +1201,47 @@ class SessionDB: cursor = self._conn.execute(sql, params) except sqlite3.OperationalError: # FTS5 query syntax error despite sanitization — return empty - return [] - matches = [dict(row) for row in cursor.fetchall()] + # unless query contains CJK (fall back to LIKE below) + if not self._contains_cjk(query): + return [] + matches = [] + else: + matches = [dict(row) for row in cursor.fetchall()] + + # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK + # characters individually, causing multi-character queries to fail. + if not matches and self._contains_cjk(query): + raw_query = query.strip('"').strip() + like_where = ["m.content LIKE ?"] + like_params: list = [f"%{raw_query}%"] + if source_filter is not None: + like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})") + like_params.extend(source_filter) + if exclude_sources is not None: + like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})") + like_params.extend(exclude_sources) + if role_filter: + like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})") + like_params.extend(role_filter) + like_sql = f""" + SELECT m.id, m.session_id, m.role, + substr(m.content, + max(1, instr(m.content, ?) - 40), + 120) AS snippet, + m.content, m.timestamp, m.tool_name, + s.source, s.model, s.started_at AS session_started + FROM messages m + JOIN sessions s ON s.id = m.session_id + WHERE {' AND '.join(like_where)} + ORDER BY m.timestamp DESC + LIMIT ? OFFSET ? + """ + like_params.extend([limit, offset]) + # instr() parameter goes first in the bound list + like_params = [raw_query] + like_params + with self._lock: + like_cursor = self._conn.execute(like_sql, like_params) + matches = [dict(row) for row in like_cursor.fetchall()] # Add surrounding context (1 message before + after each match). # Done outside the lock so we don't hold it across N sequential queries. @@ -1071,10 +1249,37 @@ class SessionDB: try: with self._lock: ctx_cursor = self._conn.execute( - """SELECT role, content FROM messages - WHERE session_id = ? AND id >= ? - 1 AND id <= ? + 1 - ORDER BY id""", - (match["session_id"], match["id"], match["id"]), + """WITH target AS ( + SELECT session_id, timestamp, id + FROM messages + WHERE id = ? + ) + SELECT role, content + FROM ( + SELECT m.id, m.timestamp, m.role, m.content + FROM messages m + JOIN target t ON t.session_id = m.session_id + WHERE (m.timestamp < t.timestamp) + OR (m.timestamp = t.timestamp AND m.id < t.id) + ORDER BY m.timestamp DESC, m.id DESC + LIMIT 1 + ) + UNION ALL + SELECT role, content + FROM messages + WHERE id = ? + UNION ALL + SELECT role, content + FROM ( + SELECT m.id, m.timestamp, m.role, m.content + FROM messages m + JOIN target t ON t.session_id = m.session_id + WHERE (m.timestamp > t.timestamp) + OR (m.timestamp = t.timestamp AND m.id > t.id) + ORDER BY m.timestamp ASC, m.id ASC + LIMIT 1 + )""", + (match["id"], match["id"]), ) context_msgs = [ {"role": r["role"], "content": (r["content"] or "")[:200]} diff --git a/landingpage/apple-touch-icon.png b/landingpage/apple-touch-icon.png deleted file mode 100644 index c5da175f8e..0000000000 Binary files a/landingpage/apple-touch-icon.png and /dev/null differ diff --git a/landingpage/favicon-16x16.png b/landingpage/favicon-16x16.png deleted file mode 100644 index 5bc67ef224..0000000000 Binary files a/landingpage/favicon-16x16.png and /dev/null differ diff --git a/landingpage/favicon-32x32.png b/landingpage/favicon-32x32.png deleted file mode 100644 index 8db2977a5b..0000000000 Binary files a/landingpage/favicon-32x32.png and /dev/null differ diff --git a/landingpage/favicon.ico b/landingpage/favicon.ico deleted file mode 100644 index 8586c395f6..0000000000 Binary files a/landingpage/favicon.ico and /dev/null differ diff --git a/landingpage/hermes-agent-banner.png b/landingpage/hermes-agent-banner.png deleted file mode 100644 index 2c4a160ceb..0000000000 Binary files a/landingpage/hermes-agent-banner.png and /dev/null differ diff --git a/landingpage/icon-192.png b/landingpage/icon-192.png deleted file mode 100644 index 126a395793..0000000000 Binary files a/landingpage/icon-192.png and /dev/null differ diff --git a/landingpage/icon-512.png b/landingpage/icon-512.png deleted file mode 100644 index c5b4c63a57..0000000000 Binary files a/landingpage/icon-512.png and /dev/null differ diff --git a/landingpage/index.html b/landingpage/index.html deleted file mode 100644 index e24ed11c48..0000000000 --- a/landingpage/index.html +++ /dev/null @@ -1,665 +0,0 @@ - - - - - - Hermes Agent — An Agent That Grows With You - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
-
-
- - Open Source • MIT License -
- - - - -

- An agent that
- grows with you. -

- -

- It's not a coding copilot tethered to an IDE or a chatbot wrapper - around a single API. It's an autonomous agent that - lives on your server, remembers what it learns, and gets more capable - the longer it runs. -

- -
-
-
-
- - - -
-
- -
-
-
- $ - curl -fsSL - https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh - | bash - -
-
-

- Works on Linux, macOS & WSL2 · No prerequisites · Installs - everything automatically -

-
- - -
-
- -
-
-
-

Get started in 60 seconds

-
- -
-
-
1
-
-

Install

-
-
-
- -
- -
-
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
-
-

- Installs uv, Python 3.11, clones the repo, sets up everything. - No sudo needed. -

-
-
- -
-
2
-
-

Configure

-
-
- bash - -
-
# Interactive setup wizard
-hermes setup
-
-# Or choose your model
-hermes model
-
-

- Connect to Nous Portal (OAuth), OpenRouter (API key), or your - own endpoint. -

-
-
- -
-
3
-
-

Start chatting

-
-
- bash - -
-
hermes
-
-

- That's it. Full interactive CLI with tools, memory, and skills. -

-
-
- -
-
4
-
-

- Go multi-platform (optional) -

-
-
- bash - -
-
# Interactive gateway setup wizard
-hermes gateway setup
-
-# Start the messaging gateway
-hermes gateway
-
-# Install as a system service
-hermes gateway install
-
-

- Walk through connecting Telegram, Discord, Slack, or WhatsApp. - Runs as a systemd service. -

-
-
- -
-
5
-
-

Keep it up to date

-
-
- bash - -
-
hermes update
-
-

- Pulls the latest changes and reinstalls dependencies. Run - anytime to get new features and fixes. -

-
-
-
- -
-

- Native Windows support is extremely experimental and unsupported. - Please install - WSL2 - and run Hermes Agent from there. -

-
-
-
- - -
-
-
-

See it in action

-
- -
-
-
- - - -
- hermes -
-
-
-
-
- - -
-
-
-

Features

-
- -
-
-
-
- - - -
-

Lives Where You Do

-
-

- Telegram, Discord, Slack, WhatsApp, and CLI from a single gateway - — start on one, pick up on another. -

-
- -
-
-
- - - - -
-

Grows the Longer It Runs

-
-

- Persistent memory and auto-generated skills — it learns your - projects and never forgets how it solved a problem. -

-
- -
-
-
- - - - -
-

Scheduled Automations

-
-

- Natural language cron scheduling for reports, backups, and - briefings — running unattended through the gateway. -

-
- -
-
-
- - - - - - -
-

Delegates & Parallelizes

-
-

- Isolated subagents with their own conversations, terminals, and - Python RPC scripts for zero-context-cost pipelines. -

-
- -
-
-
- - - - -
-

Real Sandboxing

-
-

- Five backends — local, Docker, SSH, Singularity, Modal — with - container hardening and namespace isolation. -

-
- -
-
-
- - - - - -
-

Full Web & Browser Control

-
-

- Web search, browser automation, vision, image generation, - text-to-speech, and multi-model reasoning. -

-
-
- -
- -
- -
-
-
-

Tools

-

- 40+ built-in — web search, terminal, file system, browser - automation, vision, image generation, text-to-speech, code - execution, subagent delegation, memory, task planning, cron - scheduling, multi-model reasoning, and more. -

-
- -
-

Platforms

-

- Telegram, Discord, Slack, WhatsApp, Signal, Email, and CLI — all - from a single gateway. Connect to - Nous Portal, OpenRouter, or any OpenAI-compatible API. -

-
- -
-

Environments

-

- Run locally, in Docker, over SSH, on Modal, Daytona, or - Singularity. Container hardening with read-only root, dropped - capabilities, and namespace isolation. -

-
- -
-

Skills

-

- 40+ bundled skills covering MLOps, GitHub workflows, research, - and more. The agent creates new skills on the fly and shares - them via the open - agentskills.io - format. Install community skills from - ClawHub, - LobeHub, and GitHub. -

-
- -
-

Research

-

- Batch trajectory generation with parallel workers and - checkpointing. Atropos integration for RL training. Export to - ShareGPT for fine-tuning with trajectory compression. -

-
-
-
-
-
- - - - - - diff --git a/landingpage/nous-logo.png b/landingpage/nous-logo.png deleted file mode 100644 index cfea9a6613..0000000000 Binary files a/landingpage/nous-logo.png and /dev/null differ diff --git a/landingpage/script.js b/landingpage/script.js deleted file mode 100644 index 4cd097bdb2..0000000000 --- a/landingpage/script.js +++ /dev/null @@ -1,521 +0,0 @@ -// ========================================================================= -// Hermes Agent Landing Page — Interactions -// ========================================================================= - -// --- Platform install commands --- -const PLATFORMS = { - linux: { - command: - "curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash", - prompt: "$", - note: "Works on Linux, macOS & WSL2 · No prerequisites · Installs everything automatically", - stepNote: - "Installs uv, Python 3.11, clones the repo, sets up everything. No sudo needed.", - }, -}; - -function detectPlatform() { - return "linux"; -} - -function switchPlatform(platform) { - const cfg = PLATFORMS[platform]; - if (!cfg) return; - - // Update hero install widget - const commandEl = document.getElementById("install-command"); - const promptEl = document.getElementById("install-prompt"); - const noteEl = document.getElementById("install-note"); - - if (commandEl) commandEl.textContent = cfg.command; - if (promptEl) promptEl.textContent = cfg.prompt; - if (noteEl) noteEl.textContent = cfg.note; - - // Update active tab in hero - document.querySelectorAll(".install-tab").forEach((tab) => { - tab.classList.toggle("active", tab.dataset.platform === platform); - }); - - // Sync the step section tabs too - switchStepPlatform(platform); -} - -function switchStepPlatform(platform) { - const cfg = PLATFORMS[platform]; - if (!cfg) return; - - const commandEl = document.getElementById("step1-command"); - const copyBtn = document.getElementById("step1-copy"); - const noteEl = document.getElementById("step1-note"); - - if (commandEl) commandEl.textContent = cfg.command; - if (copyBtn) copyBtn.setAttribute("data-text", cfg.command); - if (noteEl) noteEl.textContent = cfg.stepNote; - - // Update active tab in step section - document.querySelectorAll(".code-tab").forEach((tab) => { - tab.classList.toggle("active", tab.dataset.platform === platform); - }); -} - -function toggleMobileNav() { - document.getElementById("nav-mobile").classList.toggle("open"); - document.getElementById("nav-hamburger").classList.toggle("open"); -} - -function toggleSpecs() { - const wrapper = document.getElementById("specs-wrapper"); - const btn = document.getElementById("specs-toggle"); - const label = btn.querySelector(".toggle-label"); - const isOpen = wrapper.classList.contains("open"); - - if (isOpen) { - wrapper.style.maxHeight = wrapper.scrollHeight + "px"; - requestAnimationFrame(() => { - wrapper.style.maxHeight = "0"; - }); - wrapper.classList.remove("open"); - btn.classList.remove("open"); - if (label) label.textContent = "More details"; - } else { - wrapper.classList.add("open"); - wrapper.style.maxHeight = wrapper.scrollHeight + "px"; - btn.classList.add("open"); - if (label) label.textContent = "Less"; - wrapper.addEventListener( - "transitionend", - () => { - if (wrapper.classList.contains("open")) { - wrapper.style.maxHeight = "none"; - } - }, - { once: true } - ); - } -} - -// --- Copy to clipboard --- -function copyInstall() { - const text = document.getElementById("install-command").textContent; - navigator.clipboard.writeText(text).then(() => { - const btn = document.querySelector(".install-widget-body .copy-btn"); - const original = btn.querySelector(".copy-text").textContent; - btn.querySelector(".copy-text").textContent = "Copied!"; - btn.style.color = "var(--primary-light)"; - setTimeout(() => { - btn.querySelector(".copy-text").textContent = original; - btn.style.color = ""; - }, 2000); - }); -} - -function copyText(btn) { - const text = btn.getAttribute("data-text"); - navigator.clipboard.writeText(text).then(() => { - const original = btn.textContent; - btn.textContent = "Copied!"; - btn.style.color = "var(--primary-light)"; - setTimeout(() => { - btn.textContent = original; - btn.style.color = ""; - }, 2000); - }); -} - -// --- Scroll-triggered fade-in --- -function initScrollAnimations() { - const elements = document.querySelectorAll( - ".feature-card, .install-step, " + - ".section-header, .terminal-window", - ); - - elements.forEach((el) => el.classList.add("fade-in")); - - const observer = new IntersectionObserver( - (entries) => { - entries.forEach((entry) => { - if (entry.isIntersecting) { - // Stagger children within grids - const parent = entry.target.parentElement; - if (parent) { - const siblings = parent.querySelectorAll(".fade-in"); - let idx = Array.from(siblings).indexOf(entry.target); - if (idx < 0) idx = 0; - setTimeout(() => { - entry.target.classList.add("visible"); - }, idx * 60); - } else { - entry.target.classList.add("visible"); - } - observer.unobserve(entry.target); - } - }); - }, - { threshold: 0.1, rootMargin: "0px 0px -40px 0px" }, - ); - - elements.forEach((el) => observer.observe(el)); -} - -// --- Terminal Demo --- -const CURSOR = ''; - -const demoSequence = [ - { type: "prompt", text: "❯ " }, - { - type: "type", - text: "Research the latest approaches to GRPO training and write a summary", - delay: 30, - }, - { type: "pause", ms: 600 }, - { - type: "output", - lines: [ - "", - ' web_search "GRPO reinforcement learning 2026" 1.2s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' web_extract arxiv.org/abs/2402.03300 3.1s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' web_search "GRPO vs PPO ablation results" 0.9s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' web_extract huggingface.co/blog/grpo 2.8s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' write_file ~/research/grpo-summary.md 0.1s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - 'Done! I\'ve written a summary covering:', - "", - ' GRPO\'s group-relative advantage (no critic model needed)', - ' Comparison with PPO/DPO on reasoning benchmarks', - ' Implementation notes for Axolotl and TRL', - "", - 'Saved to ~/research/grpo-summary.md', - ], - }, - { type: "pause", ms: 2500 }, - - { type: "clear" }, - { type: "prompt", text: "❯ " }, - { - type: "type", - text: "Review the PR at NousResearch/hermes-agent#42 and fix any issues", - delay: 30, - }, - { type: "pause", ms: 600 }, - { - type: "output", - lines: [ - "", - ' delegate_task "review PR #42 changes" 2.1s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - ' git diff main..pr-42 0.4s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' patch tools/registry.py 0.1s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' python -m pytest tests/ -x 3.2s', - ], - }, - { type: "pause", ms: 400 }, - { - type: "output", - lines: [ - ' git commit -m "fix: handle empty tool schemas" 0.3s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - 'Found 2 issues in the PR and fixed both:', - "", - ' Empty tool schema crash in registry.py — added guard', - ' Missing error handling in delegate_tool.py — added try/except', - "", - 'Tests pass. Committed the fix and pushed to the PR branch.', - 'I also saved a skill for this PR review pattern.', - ], - }, - { type: "pause", ms: 2500 }, - - { type: "clear" }, - { type: "prompt", text: "❯ " }, - { - type: "type", - text: "How did we fix that Docker networking issue?", - delay: 35, - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - ' session_search "Docker networking" 1.4s', - ], - }, - { type: "pause", ms: 500 }, - { - type: "output", - lines: [ - "", - 'Found it — from a session on February 12th:', - "", - 'The containers couldn\'t reach each other because the compose', - 'file was using the default bridge network. We switched to a', - 'custom network with driver: overlay, added explicit', - 'aliases, and set dns: 8.8.8.8 as a fallback.', - "", - 'The fix was committed in docker-compose.prod.yml.', - ], - }, - { type: "pause", ms: 3000 }, -]; - -class TerminalDemo { - constructor(container) { - this.container = container; - this.running = false; - this.content = ""; - } - - async start() { - if (this.running) return; - this.running = true; - - while (this.running) { - for (const step of demoSequence) { - if (!this.running) return; - await this.execute(step); - } - this.clear(); - await this.sleep(1000); - } - } - - stop() { - this.running = false; - } - - async execute(step) { - switch (step.type) { - case "prompt": - this.append(`${step.text}`); - break; - case "type": - for (const char of step.text) { - if (!this.running) return; - this.append(`${char}`); - await this.sleep(step.delay || 30); - } - break; - case "output": - for (const line of step.lines) { - if (!this.running) return; - this.append("\n" + line); - await this.sleep(50); - } - break; - case "pause": - await this.sleep(step.ms); - break; - case "clear": - this.clear(); - break; - } - } - - append(html) { - this.content += html; - this.render(); - } - - render() { - this.container.innerHTML = this.content + CURSOR; - this.container.scrollTop = this.container.scrollHeight; - } - - clear() { - this.content = ""; - this.container.innerHTML = ""; - } - - sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); - } -} - -// --- Noise Overlay (ported from hermes-chat NoiseOverlay) --- -function initNoiseOverlay() { - if (window.matchMedia("(prefers-reduced-motion: reduce)").matches) return; - if (typeof THREE === "undefined") return; - - const canvas = document.getElementById("noise-overlay"); - if (!canvas) return; - - const vertexShader = ` - varying vec2 vUv; - void main() { - vUv = uv; - gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0); - } - `; - - const fragmentShader = ` - uniform vec2 uRes; - uniform float uDpr, uSize, uDensity, uOpacity; - uniform vec3 uColor; - varying vec2 vUv; - - float hash(vec2 p) { - vec3 p3 = fract(vec3(p.xyx) * 0.1031); - p3 += dot(p3, p3.yzx + 33.33); - return fract((p3.x + p3.y) * p3.z); - } - - void main() { - float n = hash(floor(vUv * uRes / (uSize * uDpr))); - gl_FragColor = vec4(uColor, step(1.0 - uDensity, n)) * uOpacity; - } - `; - - function hexToVec3(hex) { - const c = hex.replace("#", ""); - return new THREE.Vector3( - parseInt(c.substring(0, 2), 16) / 255, - parseInt(c.substring(2, 4), 16) / 255, - parseInt(c.substring(4, 6), 16) / 255, - ); - } - - const renderer = new THREE.WebGLRenderer({ - alpha: true, - canvas, - premultipliedAlpha: false, - }); - renderer.setClearColor(0x000000, 0); - - const scene = new THREE.Scene(); - const camera = new THREE.OrthographicCamera(-1, 1, 1, -1, 0, 1); - const geo = new THREE.PlaneGeometry(2, 2); - - const mat = new THREE.ShaderMaterial({ - vertexShader, - fragmentShader, - transparent: true, - uniforms: { - uColor: { value: hexToVec3("#8090BB") }, - uDensity: { value: 0.1 }, - uDpr: { value: 1 }, - uOpacity: { value: 0.4 }, - uRes: { value: new THREE.Vector2() }, - uSize: { value: 1.0 }, - }, - }); - - scene.add(new THREE.Mesh(geo, mat)); - - function resize() { - const dpr = window.devicePixelRatio; - const w = window.innerWidth; - const h = window.innerHeight; - renderer.setSize(w, h); - renderer.setPixelRatio(dpr); - mat.uniforms.uRes.value.set(w * dpr, h * dpr); - mat.uniforms.uDpr.value = dpr; - } - - resize(); - window.addEventListener("resize", resize); - - function loop() { - requestAnimationFrame(loop); - renderer.render(scene, camera); - } - loop(); -} - -// --- Initialize --- -document.addEventListener("DOMContentLoaded", () => { - const detectedPlatform = detectPlatform(); - switchPlatform(detectedPlatform); - - initScrollAnimations(); - initNoiseOverlay(); - - const terminalEl = document.getElementById("terminal-demo"); - - if (terminalEl) { - const demo = new TerminalDemo(terminalEl); - - const observer = new IntersectionObserver( - (entries) => { - entries.forEach((entry) => { - if (entry.isIntersecting) { - demo.start(); - } else { - demo.stop(); - } - }); - }, - { threshold: 0.3 }, - ); - - observer.observe(document.querySelector(".terminal-window")); - } - - const nav = document.querySelector(".nav"); - let ticking = false; - window.addEventListener("scroll", () => { - if (!ticking) { - requestAnimationFrame(() => { - if (window.scrollY > 50) { - nav.style.borderBottomColor = "rgba(48, 80, 255, 0.15)"; - } else { - nav.style.borderBottomColor = ""; - } - ticking = false; - }); - ticking = true; - } - }); -}); diff --git a/landingpage/style.css b/landingpage/style.css deleted file mode 100644 index 30334df0d0..0000000000 --- a/landingpage/style.css +++ /dev/null @@ -1,1178 +0,0 @@ -/* ========================================================================= - Hermes Agent Landing Page - Colors: Nous Blue (#3050FF) palette - ========================================================================= */ - -/* --- Reset & Base --- */ -*, *::before, *::after { - margin: 0; - padding: 0; - box-sizing: border-box; -} - -:root { - --primary: #3050FF; - --primary-light: #5070FF; - --primary-dim: #2040CC; - --primary-dark: #1E30AA; - --bg: #0A0E1A; - --bg-card: #12182A; - --bg-card-hover: #1A2240; - --border: rgba(48, 80, 255, 0.1); - --border-hover: rgba(48, 80, 255, 0.22); - --text: #E8ECFF; - --text-dim: #8090BB; - --text-muted: #506090; - --font-sans: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; - --font-mono: 'JetBrains Mono', 'Fira Code', 'Cascadia Code', monospace; - --container: 1080px; - --radius: 12px; - --radius-sm: 8px; - - --ease-in-quad: cubic-bezier(.55, .085, .68, .53); - --ease-in-cubic: cubic-bezier(.550, .055, .675, .19); - --ease-in-quart: cubic-bezier(.895, .03, .685, .22); - --ease-in-quint: cubic-bezier(.755, .05, .855, .06); - --ease-in-expo: cubic-bezier(.95, .05, .795, .035); - --ease-in-circ: cubic-bezier(.6, .04, .98, .335); - - --ease-out-quad: cubic-bezier(.25, .46, .45, .94); - --ease-out-cubic: cubic-bezier(.215, .61, .355, 1); - --ease-out-quart: cubic-bezier(.165, .84, .44, 1); - --ease-out-quint: cubic-bezier(.23, 1, .32, 1); - --ease-out-expo: cubic-bezier(.19, 1, .22, 1); - --ease-out-circ: cubic-bezier(.075, .82, .165, 1); - - --ease-in-out-quad: cubic-bezier(.455, .03, .515, .955); - --ease-in-out-cubic: cubic-bezier(.645, .045, .355, 1); - --ease-in-out-quart: cubic-bezier(.77, 0, .175, 1); - --ease-in-out-quint: cubic-bezier(.86, 0, .07, 1); - --ease-in-out-expo: cubic-bezier(1, 0, 0, 1); - --ease-in-out-circ: cubic-bezier(.785, .135, .15, .86); -} - -html { - scroll-behavior: smooth; - -webkit-font-smoothing: antialiased; - -moz-osx-font-smoothing: grayscale; - overflow-x: hidden; -} - -body { - font-family: var(--font-sans); - background: var(--bg); - color: var(--text); - line-height: 1.6; - overflow-x: hidden; - width: 100%; - max-width: 100vw; - background-image: radial-gradient(rgba(48, 80, 255, 0.04) 1px, transparent 1px); - background-size: 32px 32px; -} - -a { - color: var(--primary); - text-decoration: none; - transition: color 0.2s var(--ease-out-quad); -} -a:hover { - color: var(--primary-light); -} - -strong { - color: #fff; - font-weight: 600; -} - -/* --- Noise Overlay --- */ -#noise-overlay { - position: fixed; - inset: 0; - width: 100%; - height: 100%; - z-index: 50; - pointer-events: none; - mix-blend-mode: soft-light; -} - -/* --- Ambient Glow --- */ -.ambient-glow { - position: fixed; - pointer-events: none; - z-index: 0; - border-radius: 50%; - filter: blur(120px); - opacity: 0.15; -} -.glow-1 { - width: 600px; - height: 600px; - background: var(--primary); - top: -200px; - left: -200px; - opacity: 0.08; -} -.glow-2 { - width: 500px; - height: 500px; - background: var(--primary-dim); - bottom: 20%; - right: -150px; - opacity: 0.06; -} - -/* --- Container --- */ -.container { - max-width: var(--container); - margin: 0 auto; - padding: 0 24px; -} - -/* --- Navigation --- */ -.nav { - position: fixed; - top: 0; - left: 0; - right: 0; - z-index: 100; - background: rgba(7, 7, 13, 0.8); - backdrop-filter: blur(20px); - -webkit-backdrop-filter: blur(20px); - border-bottom: 1px solid var(--border); - transition: border-bottom-color 0.3s var(--ease-out-quad); -} - -.nav-inner { - max-width: var(--container); - margin: 0 auto; - padding: 0 24px; - height: 60px; - display: flex; - align-items: center; - justify-content: space-between; -} - -.nav-logo { - display: flex; - align-items: center; - gap: 10px; - color: var(--text); - font-weight: 600; - font-size: 15px; - transition: color 0.2s var(--ease-out-quad); -} -.nav-logo:hover { color: var(--primary-light); } - -.nav-nous-logo { - width: 22px; - height: 22px; - border-radius: 4px; -} - -.nav-by { - font-weight: 400; - color: var(--text-muted); - font-size: 13px; -} - -.nav-links { - display: flex; - align-items: center; - gap: 28px; -} - -.nav-links a { - color: var(--text-dim); - font-size: 14px; - font-weight: 500; - display: flex; - align-items: center; - gap: 4px; - transition: color 0.2s var(--ease-out-quad); -} -.nav-links a:hover { color: #fff; } - -.external-icon { opacity: 0.4; } - -/* --- Hamburger & Mobile Nav --- */ -.nav-hamburger { - display: none; - background: none; - border: none; - cursor: pointer; - padding: 6px; - width: 34px; - height: 34px; - flex-direction: column; - justify-content: center; - gap: 5px; -} - -.hamburger-bar { - display: block; - width: 20px; - height: 2px; - background: var(--text-dim); - border-radius: 1px; - transition: transform 0.25s var(--ease-out-quint), opacity 0.2s var(--ease-out-quad); - transform-origin: center; -} - -.nav-hamburger.open .hamburger-bar:nth-child(1) { - transform: translateY(7px) rotate(45deg); -} - -.nav-hamburger.open .hamburger-bar:nth-child(2) { - opacity: 0; -} - -.nav-hamburger.open .hamburger-bar:nth-child(3) { - transform: translateY(-7px) rotate(-45deg); -} - -.nav-mobile { - display: none; -} - -.nav-mobile.open { - display: flex; - flex-direction: column; - position: absolute; - top: 60px; - left: 0; - right: 0; - background: rgba(7, 7, 13, 0.95); - backdrop-filter: blur(20px); - -webkit-backdrop-filter: blur(20px); - border-bottom: 1px solid var(--border); - padding: 16px 24px; - gap: 16px; -} - -.nav-mobile a { - color: var(--text-dim); - font-size: 15px; - font-weight: 500; - padding: 4px 0; - transition: color 0.2s var(--ease-out-quad); -} - -.nav-mobile a:hover { - color: #fff; -} - -/* --- Hero --- */ -.hero { - position: relative; - z-index: 1; - min-height: 100vh; - display: flex; - align-items: center; - justify-content: center; - padding: 120px 24px 80px; - text-align: center; -} - -.hero-content { - max-width: 760px; -} - -.hero-badge { - display: inline-flex; - align-items: center; - gap: 8px; - padding: 6px 16px; - background: rgba(48, 80, 255, 0.08); - border: 1px solid rgba(48, 80, 255, 0.18); - border-radius: 100px; - font-size: 13px; - color: var(--text-dim); - margin-bottom: 32px; - font-weight: 450; -} - -.badge-dot { - width: 6px; - height: 6px; - border-radius: 50%; - background: var(--primary); - display: inline-block; - animation: pulse-dot 2s var(--ease-in-out-quad) infinite; -} - -@keyframes pulse-dot { - 0%, 100% { opacity: 1; } - 50% { opacity: 0.3; } -} - -.hero-ascii { - margin-bottom: 28px; - font-family: 'JetBrains Mono', monospace; - font-variant-ligatures: none; - font-size: clamp(4px, 0.95vw, 11px); - line-height: 1.15; - color: var(--primary-light); - text-align: center; - text-shadow: 0 0 20px rgba(48, 80, 255, 0.3); - opacity: 0.85; - transition: opacity 0.3s var(--ease-out-cubic); - overflow-x: auto; - white-space: pre; -} - -.hero-ascii:hover { - opacity: 1; -} - -.hero-title { - font-size: clamp(36px, 6vw, 56px); - font-weight: 700; - line-height: 1.15; - letter-spacing: -0.03em; - margin-bottom: 20px; - color: #fff; -} - -.hero-gradient { - background: linear-gradient(135deg, var(--primary), var(--primary-light), #90B0FF); - -webkit-background-clip: text; - -webkit-text-fill-color: transparent; - background-clip: text; -} - -.hero-subtitle { - font-size: 17px; - line-height: 1.7; - color: var(--text-dim); - max-width: 620px; - margin: 0 auto 36px; -} - -.hero-install { - margin-bottom: 32px; -} - -/* --- Install Widget (hero tabbed installer) --- */ -.install-widget { - max-width: 740px; - margin: 0 auto; - background: var(--bg-card); - border: 1px solid var(--border); - border-radius: var(--radius); - overflow: hidden; - transition: border-color 0.3s var(--ease-out-quad); -} - -.install-widget:hover { - border-color: var(--border-hover); -} - -.install-widget-header { - display: flex; - align-items: center; - gap: 16px; - padding: 10px 16px; - background: rgba(255, 255, 255, 0.02); - border-bottom: 1px solid var(--border); -} - -.install-dots { - display: flex; - gap: 6px; - flex-shrink: 0; -} - -.install-dots .dot { - width: 10px; - height: 10px; - border-radius: 50%; -} - -.install-tabs { - display: flex; - gap: 4px; - flex-wrap: wrap; -} - -.install-tab { - display: inline-flex; - align-items: center; - gap: 6px; - padding: 5px 14px; - border: none; - border-radius: 6px; - font-family: var(--font-sans); - font-size: 12px; - font-weight: 500; - cursor: pointer; - transition: color 0.2s var(--ease-out-quad), background 0.2s var(--ease-out-quad); - background: transparent; - color: var(--text-muted); -} - -.install-tab:hover { - color: var(--text-dim); - background: rgba(255, 255, 255, 0.04); -} - -.install-tab.active { - background: rgba(48, 80, 255, 0.14); - color: var(--primary-light); -} - -.install-tab svg { - flex-shrink: 0; -} - -.install-widget-body { - display: flex; - align-items: center; - gap: 10px; - padding: 14px 16px; - font-family: var(--font-mono); - font-size: 13px; - color: var(--text); - overflow-x: auto; -} - -.install-prompt { - color: var(--primary-light); - font-weight: 600; - flex-shrink: 0; - opacity: 0.7; -} - -.install-widget-body code { - flex: 1; - white-space: nowrap; - overflow: hidden; - text-overflow: ellipsis; - text-align: left; - transition: opacity 0.15s var(--ease-out-quad); -} - -/* --- Code block tabs (install step section) --- */ -.code-tabs { - display: flex; - gap: 2px; -} - -.code-tab { - padding: 3px 10px; - border: none; - border-radius: 4px; - font-family: var(--font-mono); - font-size: 11px; - font-weight: 500; - cursor: pointer; - transition: color 0.2s var(--ease-out-quad), background 0.2s var(--ease-out-quad); - background: transparent; - color: var(--text-muted); -} - -.code-tab:hover { - color: var(--text-dim); - background: rgba(255, 255, 255, 0.04); -} - -.code-tab.active { - background: rgba(48, 80, 255, 0.12); - color: var(--primary-light); -} - -.copy-btn { - flex-shrink: 0; - display: flex; - align-items: center; - gap: 6px; - background: none; - border: none; - color: var(--text-dim); - cursor: pointer; - padding: 4px 8px; - border-radius: 6px; - font-family: var(--font-sans); - font-size: 12px; - transition: color 0.2s var(--ease-out-quad), background 0.2s var(--ease-out-quad); -} -.copy-btn:hover { - color: var(--primary-light); - background: rgba(48, 80, 255, 0.1); -} -.copy-btn:active { - transform: scale(0.95); -} - -.install-note { - font-size: 13px; - color: var(--text-muted); - margin-top: 12px; -} - -.hero-links { - display: flex; - gap: 12px; - justify-content: center; - flex-wrap: wrap; -} - -.btn { - display: inline-flex; - align-items: center; - gap: 8px; - padding: 11px 24px; - border-radius: var(--radius); - font-size: 14px; - font-weight: 550; - transition: background 0.25s var(--ease-out-quint), border-color 0.25s var(--ease-out-quad), color 0.2s var(--ease-out-quad), transform 0.25s var(--ease-out-quint); - border: 1px solid transparent; - will-change: transform; -} - -.btn-primary { - background: rgba(48, 80, 255, 0.12); - color: var(--primary-light); - border-color: rgba(48, 80, 255, 0.25); -} -.btn-primary:hover { - background: rgba(48, 80, 255, 0.22); - border-color: rgba(48, 80, 255, 0.4); - color: #fff; -} - -@media (hover: hover) and (pointer: fine) { - .btn-primary:hover { - transform: translateY(-1px); - } -} -.btn:active { - transform: scale(0.97); -} - -/* --- Sections --- */ -.section { - position: relative; - z-index: 1; - padding: 80px 0; -} - -.section-header { - display: flex; - align-items: center; - justify-content: center; - gap: 12px; - margin-bottom: 48px; -} - -.section-header h2 { - font-size: 28px; - font-weight: 650; - color: #fff; - letter-spacing: -0.02em; -} - -.section-desc { - color: var(--text-dim); - font-size: 16px; - line-height: 1.7; - max-width: 640px; - margin: 0 auto 40px; - text-align: center; -} - -/* --- Features Grid --- */ -.features-grid { - display: grid; - grid-template-columns: repeat(3, 1fr); - gap: 16px; -} - -.feature-card { - background: var(--bg-card); - border: 1px solid var(--border); - border-radius: var(--radius); - padding: 20px; - transition: border-color 0.3s var(--ease-out-quad), background 0.3s var(--ease-out-quad), transform 0.3s var(--ease-out-quint); - will-change: transform; -} - -.feature-card:hover { - border-color: var(--border-hover); - background: var(--bg-card-hover); -} - -@media (hover: hover) and (pointer: fine) { - .feature-card:hover { - transform: translateY(-2px); - } -} - -.feature-header { - display: flex; - align-items: center; - gap: 10px; - margin-bottom: 10px; -} - -.feature-icon { - color: var(--primary-light); - opacity: 0.85; - flex-shrink: 0; - display: flex; - line-height: 0; -} - -.feature-card h3 { - font-size: 15px; - font-weight: 600; - color: #fff; - letter-spacing: -0.01em; -} - -.feature-card p { - font-size: 14px; - color: var(--text-dim); - line-height: 1.65; -} - -/* --- Terminal Demo --- */ -.section-demo { - padding-bottom: 60px; - border-top: 1px solid var(--border); - border-bottom: 1px solid var(--border); -} - -.terminal-window { - background: #0c0c14; - border: 1px solid var(--border); - border-radius: var(--radius); - overflow: hidden; - max-width: 800px; - margin: 0 auto; -} - -.terminal-header { - display: flex; - align-items: center; - padding: 12px 16px; - background: rgba(255, 255, 255, 0.02); - border-bottom: 1px solid var(--border); - gap: 12px; -} - -.terminal-dots { - display: flex; - gap: 6px; -} - -.dot { - width: 10px; - height: 10px; - border-radius: 50%; -} -.dot-red { background: #ff5f57; } -.dot-yellow { background: #febc2e; } -.dot-green { background: #28c840; } - -.terminal-title { - font-family: var(--font-mono); - font-size: 12px; - color: var(--text-muted); -} - -.terminal-body { - padding: 20px 24px; - height: 340px; - font-family: var(--font-mono); - font-size: 13px; - line-height: 1.7; - white-space: pre-wrap; - overflow-y: auto; - overflow-x: hidden; -} - -.terminal-cursor { - animation: blink 1s step-end infinite; - color: var(--primary-light); - opacity: 0.8; -} - -@keyframes blink { - 0%, 100% { opacity: 0.8; } - 50% { opacity: 0; } -} - -/* Terminal demo colors */ -.t-prompt { color: var(--primary-light); } -.t-cmd { color: #fff; } -.t-dim { color: var(--text-muted); } -.t-text { color: var(--text-dim); } -.t-green { color: #4ade80; } -.t-blue { color: #60a5fa; } -.t-accent { color: var(--primary-light); } -.t-highlight { color: #90B0FF; } -.t-tool { color: var(--text-muted); } - -/* --- Specs Toggle --- */ -.features-more { - text-align: center; - margin-top: 32px; -} - -.more-toggle { - background: none; - border: 1px solid var(--border); - color: var(--text-dim); - font-size: 14px; - font-family: inherit; - padding: 8px 20px; - border-radius: 6px; - cursor: pointer; - display: inline-flex; - align-items: center; - gap: 6px; - transition: color 0.2s var(--ease-out-quad), border-color 0.2s var(--ease-out-quad); -} - -.more-toggle:hover { - color: var(--primary-light); - border-color: var(--primary-light); -} -.more-toggle:active { - transform: scale(0.97); -} - -.more-chevron { - transition: transform 0.3s var(--ease-in-out-cubic); -} - -.more-toggle.open .more-chevron { - transform: rotate(180deg); -} - -.specs-wrapper { - max-height: 0; - overflow: hidden; - transition: max-height 0.4s var(--ease-out-quart), opacity 0.3s var(--ease-out-quad); - opacity: 0; -} - -.specs-wrapper.open { - opacity: 1; -} - -/* --- Specs --- */ -.section-specs { -} - -.specs-list { - max-width: 720px; - margin: 0 auto; - padding-top: 24px; -} - -.spec-row { - display: grid; - grid-template-columns: 120px 1fr; - gap: 24px; - padding: 24px 0; - border-bottom: 1px solid var(--border); -} - -.spec-row:last-child { - border-bottom: none; -} - -.spec-label { - font-size: 14px; - font-weight: 600; - color: var(--primary-light); - padding-top: 2px; -} - -.spec-value { - font-size: 15px; - color: var(--text-dim); - line-height: 1.7; -} - -.spec-value a { - color: var(--text); - border-bottom: 1px solid var(--border-hover); - transition: border-color 0.2s var(--ease-out-quad), color 0.2s var(--ease-out-quad); -} - -.spec-value a:hover { - color: var(--primary-light); - border-color: var(--primary-light); -} - -/* --- Install Section --- */ -.section-install { - border-top: 1px solid var(--border); -} - -.install-steps { - display: grid; - gap: 28px; - max-width: 640px; - margin: 0 auto; -} - -.install-step { - display: flex; - gap: 20px; -} - -.step-number { - flex-shrink: 0; - width: 32px; - height: 32px; - display: flex; - align-items: center; - justify-content: center; - background: rgba(48, 80, 255, 0.1); - border: 1px solid rgba(48, 80, 255, 0.2); - border-radius: 50%; - font-size: 14px; - font-weight: 600; - color: var(--primary-light); - margin-top: 2px; -} - -.step-content { - flex: 1; - min-width: 0; -} - -.step-content h4 { - font-size: 16px; - font-weight: 600; - color: #fff; - margin-bottom: 10px; -} - -.step-optional { - font-size: 12px; - font-weight: 400; - color: var(--text-muted); -} - -.step-note { - font-size: 13px; - color: var(--text-muted); - margin-top: 8px; -} - -.code-block { - background: #0c0c14; - border: 1px solid var(--border); - border-radius: var(--radius-sm); - overflow: hidden; -} - -.code-block-sm { - max-width: 640px; -} - -.code-header { - display: flex; - justify-content: space-between; - align-items: center; - padding: 8px 14px; - background: rgba(255, 255, 255, 0.02); - border-bottom: 1px solid var(--border); - font-family: var(--font-mono); - font-size: 11px; - color: var(--text-muted); -} - -.code-block pre { - padding: 14px 16px; - font-family: var(--font-mono); - font-size: 13px; - line-height: 1.6; - color: var(--text); - overflow-x: auto; - white-space: pre-wrap; - word-break: break-all; -} - -.code-comment { - color: var(--text-muted); -} - -.install-windows { - margin-top: 48px; - padding-top: 32px; - border-top: 1px solid var(--border); - max-width: 640px; - margin-left: auto; - margin-right: auto; -} - -.install-windows p { - font-size: 14px; - color: var(--text-dim); - margin-bottom: 12px; -} - -/* --- Footer --- */ -.footer { - position: relative; - z-index: 1; - padding: 40px 0 32px; - border-top: 1px solid var(--border); -} - -.footer-copy { - text-align: center; - font-size: 13px; - color: var(--text-muted); -} - -.footer-copy a { - color: var(--text-dim); - transition: color 0.2s var(--ease-out-quad); -} - -.footer-copy a:hover { - color: var(--primary-light); -} - -/* --- Scroll Animations --- */ -.fade-in { - opacity: 0; - transform: translateY(20px); - transition: opacity 0.6s var(--ease-out-quart), transform 0.6s var(--ease-out-quart); - will-change: transform, opacity; -} - -.fade-in.visible { - opacity: 1; - transform: translateY(0); -} - -/* --- Responsive --- */ - -/* Clamp ambient glows so they can't cause horizontal scroll */ -@media (max-width: 900px) { - .ambient-glow { display: none; } - - .features-grid { - grid-template-columns: repeat(2, 1fr); - } - -} - -@media (max-width: 640px) { - /* --- Global mobile --- */ - .container { - padding: 0 16px; - } - - .section { - padding: 50px 0; - } - - .section-header { - margin-bottom: 32px; - } - - .section-header h2 { - font-size: 20px; - } - - .section-desc { - font-size: 14px; - } - - /* --- Nav --- */ - .nav-inner { - padding: 0 16px; - } - - .nav-links { - display: none; - } - - .nav-hamburger { - display: flex; - } - - /* --- Hero --- */ - .hero { - padding: 90px 16px 50px; - min-height: auto; - } - - .hero-content { - max-width: 100%; - } - - .hero-badge { - font-size: 11px; - padding: 5px 12px; - margin-bottom: 24px; - } - - .hero-ascii { - font-size: 3.5px; - } - - .hero-title { - font-size: 26px; - margin-bottom: 14px; - } - - .hero-subtitle { - font-size: 14px; - line-height: 1.6; - margin: 0 auto 28px; - } - - .install-widget-body { - font-size: 10px; - padding: 10px 12px; - } - - .install-widget-body code { - overflow: hidden; - text-overflow: ellipsis; - display: block; - } - - .install-widget-header { - padding: 8px 12px; - gap: 10px; - } - - .install-tabs { - gap: 2px; - } - - .install-tab { - padding: 4px 10px; - font-size: 11px; - } - - .install-tab svg { - display: none; - } - - .copy-btn { - padding: 3px 6px; - } - - .copy-btn .copy-text { display: none; } - - .install-note { - font-size: 11px; - } - - .hero-links { - flex-direction: column; - align-items: stretch; - } - - .hero-links .btn { - justify-content: center; - } - - /* --- Grids → single column --- */ - .features-grid { - grid-template-columns: 1fr; - } - - .spec-row { - grid-template-columns: 1fr; - gap: 6px; - padding: 18px 0; - } - - .feature-card { - padding: 16px 18px; - } - - .feature-card p { - font-size: 13px; - line-height: 1.5; - } - - /* --- Terminal demo --- */ - .terminal-body { - font-size: 11px; - padding: 14px; - height: 260px; - } - - /* --- Install steps --- */ - .install-steps { - max-width: 100%; - } - - .install-step { - gap: 14px; - } - - .step-number { - width: 28px; - height: 28px; - font-size: 13px; - } - - .code-block pre { - font-size: 11px; - word-break: break-all; - } - - .install-windows { - max-width: 100%; - } - - /* --- Footer --- */ - .footer { - padding: 32px 0 24px; - } - -} - -/* --- Reduced Motion --- */ -@media (prefers-reduced-motion: reduce) { - *, *::before, *::after { - animation-duration: 0.01ms !important; - animation-iteration-count: 1 !important; - transition-duration: 0.01ms !important; - } - - .fade-in { - opacity: 1; - transform: none; - } - - .hero-ascii { - opacity: 0.85; - } -} - -/* --- Selection --- */ -::selection { - background: rgba(48, 80, 255, 0.25); - color: #fff; -} - -/* --- Scrollbar --- */ -::-webkit-scrollbar { - width: 6px; - height: 6px; -} -::-webkit-scrollbar-track { - background: var(--bg); -} -::-webkit-scrollbar-thumb { - background: var(--border-hover); - border-radius: 3px; -} -::-webkit-scrollbar-thumb:hover { - background: var(--primary-dim); -} diff --git a/mcp_serve.py b/mcp_serve.py index e8294d1f91..e0aeb70619 100644 --- a/mcp_serve.py +++ b/mcp_serve.py @@ -433,7 +433,7 @@ def create_mcp_server(event_bridge: Optional[EventBridge] = None) -> "FastMCP": if not _MCP_SERVER_AVAILABLE: raise ImportError( "MCP server requires the 'mcp' package. " - "Install with: pip install 'hermes-agent[mcp]'" + f"Install with: {sys.executable} -m pip install 'mcp'" ) mcp = FastMCP( @@ -838,7 +838,7 @@ def run_mcp_server(verbose: bool = False) -> None: if not _MCP_SERVER_AVAILABLE: print( "Error: MCP server requires the 'mcp' package.\n" - "Install with: pip install 'hermes-agent[mcp]'", + f"Install with: {sys.executable} -m pip install 'mcp'", file=sys.stderr, ) sys.exit(1) diff --git a/mini_swe_runner.py b/mini_swe_runner.py index 28c0ae48c9..a642e2411f 100644 --- a/mini_swe_runner.py +++ b/mini_swe_runner.py @@ -43,6 +43,18 @@ from dotenv import load_dotenv load_dotenv() +def _effective_temperature_for_model( + model: str, + base_url: Optional[str] = None, +) -> Optional[float]: + """Return a fixed temperature for models with strict sampling contracts.""" + try: + from agent.auxiliary_client import _fixed_temperature_for_model + except Exception: + return None + return _fixed_temperature_for_model(model, base_url) + + # ============================================================================ @@ -442,12 +454,20 @@ Complete the user's task step by step.""" # Make API call try: - response = self.client.chat.completions.create( - model=self.model, - messages=api_messages, - tools=self.tools, - timeout=300.0 + api_kwargs = { + "model": self.model, + "messages": api_messages, + "tools": self.tools, + "timeout": 300.0, + } + fixed_temperature = _effective_temperature_for_model( + self.model, + str(getattr(self.client, "base_url", "") or ""), ) + if fixed_temperature is not None: + api_kwargs["temperature"] = fixed_temperature + + response = self.client.chat.completions.create(**api_kwargs) except Exception as e: self.logger.error(f"API call failed: {e}") break diff --git a/model_tools.py b/model_tools.py index 1924b25168..db4b46326b 100644 --- a/model_tools.py +++ b/model_tools.py @@ -26,7 +26,7 @@ import logging import threading from typing import Dict, Any, List, Optional, Tuple -from tools.registry import registry +from tools.registry import discover_builtin_tools, registry from toolsets import resolve_toolset, validate_toolset logger = logging.getLogger(__name__) @@ -129,45 +129,7 @@ def _run_async(coro): # Tool Discovery (importing each module triggers its registry.register calls) # ============================================================================= -def _discover_tools(): - """Import all tool modules to trigger their registry.register() calls. - - Wrapped in a function so import errors in optional tools (e.g., fal_client - not installed) don't prevent the rest from loading. - """ - _modules = [ - "tools.web_tools", - "tools.terminal_tool", - "tools.file_tools", - "tools.vision_tools", - "tools.mixture_of_agents_tool", - "tools.image_generation_tool", - "tools.skills_tool", - "tools.skill_manager_tool", - "tools.browser_tool", - "tools.cronjob_tools", - "tools.rl_training_tool", - "tools.tts_tool", - "tools.todo_tool", - "tools.memory_tool", - "tools.session_search_tool", - "tools.clarify_tool", - "tools.code_execution_tool", - "tools.delegate_tool", - "tools.process_registry", - "tools.send_message_tool", - # "tools.honcho_tools", # Removed — Honcho is now a memory provider plugin - "tools.homeassistant_tool", - ] - import importlib - for mod_name in _modules: - try: - importlib.import_module(mod_name) - except Exception as e: - logger.warning("Could not import tool module %s: %s", mod_name, e) - - -_discover_tools() +discover_builtin_tools() # MCP tool discovery (external MCP servers from config) try: @@ -312,14 +274,39 @@ def get_tool_definitions( # execute_code" even when the API key isn't configured or the toolset is # disabled (#560-discord). if "execute_code" in available_tool_names: - from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema + from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names - dynamic_schema = build_execute_code_schema(sandbox_enabled) + dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode()) for i, td in enumerate(filtered_tools): if td.get("function", {}).get("name") == "execute_code": filtered_tools[i] = {"type": "function", "function": dynamic_schema} break + # Rebuild discord_server schema based on the bot's privileged intents + # (detected from GET /applications/@me) and the user's action allowlist + # in config. Hides actions the bot's intents don't support so the + # model never attempts them, and annotates fetch_messages when the + # MESSAGE_CONTENT intent is missing. + if "discord_server" in available_tool_names: + try: + from tools.discord_tool import get_dynamic_schema + dynamic = get_dynamic_schema() + except Exception: # pragma: no cover — defensive, fall back to static + dynamic = None + if dynamic is None: + # Tool filtered out entirely (empty allowlist or detection disabled + # the only remaining actions). Drop it from the schema list. + filtered_tools = [ + t for t in filtered_tools + if t.get("function", {}).get("name") != "discord_server" + ] + available_tool_names.discard("discord_server") + else: + for i, td in enumerate(filtered_tools): + if td.get("function", {}).get("name") == "discord_server": + filtered_tools[i] = {"type": "function", "function": dynamic} + break + # Strip web tool cross-references from browser_navigate description when # web_search / web_extract are not available. The static schema says # "prefer web_search or web_extract" which causes the model to hallucinate @@ -563,6 +550,30 @@ def handle_function_call( except Exception: pass + # Generic tool-result canonicalization seam: plugins receive the + # final result string (JSON, usually) and may replace it by + # returning a string from transform_tool_result. Runs after + # post_tool_call (which stays observational) and before the result + # is appended back into conversation context. Fail-open; the first + # valid string return wins; non-string returns are ignored. + try: + from hermes_cli.plugins import invoke_hook + hook_results = invoke_hook( + "transform_tool_result", + tool_name=function_name, + args=function_args, + result=result, + task_id=task_id or "", + session_id=session_id or "", + tool_call_id=tool_call_id or "", + ) + for hook_result in hook_results: + if isinstance(hook_result, str): + result = hook_result + break + except Exception: + pass + return result except Exception as e: diff --git a/nix/checks.nix b/nix/checks.nix index 6dd5115c93..984016a4f4 100644 --- a/nix/checks.nix +++ b/nix/checks.nix @@ -37,7 +37,30 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2) in { packages.configKeys = configKeys; - checks = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux { + checks = { + # Cross-platform evaluation — catches "not supported for interpreter" + # errors (e.g. sphinx dropping python311) without needing a darwin builder. + # Evaluation is pure and instant; it doesn't build anything. + cross-eval = let + targetSystems = builtins.filter + (s: inputs.self.packages ? ${s}) + [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" "x86_64-darwin" ]; + tryEvalPkg = sys: + let pkg = inputs.self.packages.${sys}.default; + in builtins.tryEval (builtins.seq pkg.drvPath true); + results = map (sys: { inherit sys; result = tryEvalPkg sys; }) targetSystems; + failures = builtins.filter (r: !r.result.success) results; + failMsg = lib.concatMapStringsSep "\n" (r: " - ${r.sys}") failures; + in pkgs.runCommand "hermes-cross-eval" { } ( + if failures != [] then + builtins.throw "Package fails to evaluate on:\n${failMsg}" + else '' + echo "PASS: package evaluates on all ${toString (builtins.length targetSystems)} platforms" + mkdir -p $out + echo "ok" > $out/result + '' + ); + } // lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux { # Verify binaries exist and are executable package-contents = pkgs.runCommand "hermes-package-contents" { } '' set -e @@ -103,6 +126,51 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2) echo "ok" > $out/result ''; + # Verify bundled TUI is present and compiled + bundled-tui = pkgs.runCommand "hermes-bundled-tui" { } '' + set -e + echo "=== Checking bundled TUI ===" + test -d ${hermes-agent}/ui-tui || (echo "FAIL: ui-tui directory missing"; exit 1) + echo "PASS: ui-tui directory exists" + + test -f ${hermes-agent}/ui-tui/dist/entry.js || (echo "FAIL: compiled entry.js missing"; exit 1) + echo "PASS: compiled entry.js present" + + test -d ${hermes-agent}/ui-tui/node_modules || (echo "FAIL: node_modules missing"; exit 1) + echo "PASS: node_modules present" + + grep -q "HERMES_TUI_DIR" ${hermes-agent}/bin/hermes || \ + (echo "FAIL: HERMES_TUI_DIR not in wrapper"; exit 1) + echo "PASS: HERMES_TUI_DIR set in wrapper" + + echo "=== All bundled TUI checks passed ===" + mkdir -p $out + echo "ok" > $out/result + ''; + + # Verify HERMES_NODE is set in wrapper and points to Node 20+ + # (string-width uses the /v regex flag which requires Node 20+) + hermes-node = pkgs.runCommand "hermes-node-version" { } '' + set -e + echo "=== Checking HERMES_NODE in wrapper ===" + grep -q "HERMES_NODE" ${hermes-agent}/bin/hermes || \ + (echo "FAIL: HERMES_NODE not set in wrapper"; exit 1) + echo "PASS: HERMES_NODE present in wrapper" + + HERMES_NODE=$(sed -n "s/^export HERMES_NODE='\(.*\)'/\1/p" ${hermes-agent}/bin/hermes) + test -x "$HERMES_NODE" || (echo "FAIL: HERMES_NODE=$HERMES_NODE not executable"; exit 1) + echo "PASS: HERMES_NODE executable at $HERMES_NODE" + + NODE_MAJOR=$("$HERMES_NODE" --version | sed 's/^v//' | cut -d. -f1) + test "$NODE_MAJOR" -ge 20 || \ + (echo "FAIL: Node v$NODE_MAJOR < 20, TUI needs /v regex flag support"; exit 1) + echo "PASS: Node v$NODE_MAJOR >= 20" + + echo "=== All HERMES_NODE checks passed ===" + mkdir -p $out + echo "ok" > $out/result + ''; + # Verify HERMES_MANAGED guard works on all mutation commands managed-guard = pkgs.runCommand "hermes-managed-guard" { } '' set -e diff --git a/nix/devShell.nix b/nix/devShell.nix index 7f8b5a1b03..63edc59cf1 100644 --- a/nix/devShell.nix +++ b/nix/devShell.nix @@ -1,49 +1,26 @@ -# nix/devShell.nix — Fast dev shell with stamp-file optimization +# nix/devShell.nix — Dev shell that delegates setup to each package +# +# Each package in inputsFrom exposes passthru.devShellHook — a bash snippet +# with stamp-checked setup logic. This file collects and runs them all. { inputs, ... }: { - perSystem = { pkgs, ... }: + perSystem = { pkgs, system, ... }: let - python = pkgs.python311; + hermes-agent = inputs.self.packages.${system}.default; + hermes-tui = inputs.self.packages.${system}.tui; + packages = [ hermes-agent hermes-tui ]; in { devShells.default = pkgs.mkShell { + inputsFrom = packages; packages = with pkgs; [ - python uv nodejs_20 ripgrep git openssh ffmpeg + python312 uv nodejs_22 ripgrep git openssh ffmpeg ]; - shellHook = '' + shellHook = let + hooks = map (p: p.passthru.devShellHook or "") packages; + combined = pkgs.lib.concatStringsSep "\n" (builtins.filter (h: h != "") hooks); + in '' echo "Hermes Agent dev shell" - - # Composite stamp: changes when nix python or uv change - STAMP_VALUE="${python}:${pkgs.uv}" - STAMP_FILE=".venv/.nix-stamp" - - # Create venv if missing - if [ ! -d .venv ]; then - echo "Creating Python 3.11 venv..." - uv venv .venv --python ${python}/bin/python3 - fi - - source .venv/bin/activate - - # Only install if stamp is stale or missing - if [ ! -f "$STAMP_FILE" ] || [ "$(cat "$STAMP_FILE")" != "$STAMP_VALUE" ]; then - echo "Installing Python dependencies..." - uv pip install -e ".[all]" - if [ -d mini-swe-agent ]; then - uv pip install -e ./mini-swe-agent 2>/dev/null || true - fi - if [ -d tinker-atropos ]; then - uv pip install -e ./tinker-atropos 2>/dev/null || true - fi - - # Install npm deps - if [ -f package.json ] && [ ! -d node_modules ]; then - echo "Installing npm dependencies..." - npm install - fi - - echo "$STAMP_VALUE" > "$STAMP_FILE" - fi - + ${combined} echo "Ready. Run 'hermes' to start." ''; }; diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix index 75b3dca31b..3f2709f814 100644 --- a/nix/nixosModules.nix +++ b/nix/nixosModules.nix @@ -121,11 +121,19 @@ # ── Provision apt packages (first boot only, cached in writable layer) ── # sudo: agent self-modification # nodejs/npm: writable node so npm i -g works (nix store copies are read-only) - # curl: needed for uv installer + # Node 22 via NodeSource — Ubuntu 24.04 ships Node 18 which is EOL. + # curl: needed for uv installer + NodeSource setup if [ ! -f /var/lib/hermes-tools-provisioned ] && command -v apt-get >/dev/null 2>&1; then echo "First boot: provisioning agent tools..." apt-get update -qq - apt-get install -y -qq sudo nodejs npm curl + apt-get install -y -qq sudo curl ca-certificates gnupg + mkdir -p /etc/apt/keyrings + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ + | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" \ + > /etc/apt/sources.list.d/nodesource.list + apt-get update -qq + apt-get install -y -qq nodejs touch /var/lib/hermes-tools-provisioned fi @@ -140,15 +148,14 @@ su -s /bin/sh "$TARGET_USER" -c 'curl -LsSf https://astral.sh/uv/install.sh | sh' || true fi - # Python 3.11 venv — gives the agent a writable Python with pip. - # Uses uv to install Python 3.11 (Ubuntu 24.04 ships 3.12). + # Python 3.12 venv — gives the agent a writable Python with pip. # --seed includes pip/setuptools so bare `pip install` works. _UV_BIN="$TARGET_HOME/.local/bin/uv" if [ ! -d "$TARGET_HOME/.venv" ] && [ -x "$_UV_BIN" ]; then su -s /bin/sh "$TARGET_USER" -c " export PATH=\"\$HOME/.local/bin:\$PATH\" - uv python install 3.11 - uv venv --python 3.11 --seed \"\$HOME/.venv\" + uv python install 3.12 + uv venv --python 3.12 --seed \"\$HOME/.venv\" " || true fi @@ -171,7 +178,7 @@ # Package and entrypoint use stable symlinks (current-package, current-entrypoint) # so they can update without recreation. Env vars go through $HERMES_HOME/.env. containerIdentity = builtins.hashString "sha256" (builtins.toJSON { - schema = 3; # bump when identity inputs change + schema = 4; # bump when identity inputs change (4: Node 18→22 via NodeSource) image = cfg.container.image; extraVolumes = cfg.container.extraVolumes; extraOptions = cfg.container.extraOptions; diff --git a/nix/packages.nix b/nix/packages.nix index eb50d4a17b..912be7843b 100644 --- a/nix/packages.nix +++ b/nix/packages.nix @@ -1,54 +1,116 @@ # nix/packages.nix — Hermes Agent package built with uv2nix -{ inputs, ... }: { - perSystem = { pkgs, system, ... }: +{ inputs, ... }: +{ + perSystem = + { pkgs, inputs', ... }: let hermesVenv = pkgs.callPackage ./python.nix { inherit (inputs) uv2nix pyproject-nix pyproject-build-systems; }; + hermesTui = pkgs.callPackage ./tui.nix { + npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default; + }; + # Import bundled skills, excluding runtime caches bundledSkills = pkgs.lib.cleanSourceWith { src = ../skills; - filter = path: _type: - !(pkgs.lib.hasInfix "/index-cache/" path); + filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path); + }; + + hermesWeb = pkgs.callPackage ./web.nix { + npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default; }; runtimeDeps = with pkgs; [ - nodejs_20 ripgrep git openssh ffmpeg tirith + nodejs_22 + ripgrep + git + openssh + ffmpeg + tirith ]; runtimePath = pkgs.lib.makeBinPath runtimeDeps; - in { - packages.default = pkgs.stdenv.mkDerivation { - pname = "hermes-agent"; - version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version; - dontUnpack = true; - dontBuild = true; - nativeBuildInputs = [ pkgs.makeWrapper ]; + # Lockfile hashes for dev shell stamps + pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml); + uvLockHash = + if builtins.pathExists ../uv.lock then + builtins.hashString "sha256" (builtins.readFile ../uv.lock) + else + "none"; + in + { + packages = { + default = pkgs.stdenv.mkDerivation { + pname = "hermes-agent"; + version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version; - installPhase = '' - runHook preInstall + dontUnpack = true; + dontBuild = true; + nativeBuildInputs = [ pkgs.makeWrapper ]; - mkdir -p $out/share/hermes-agent $out/bin - cp -r ${bundledSkills} $out/share/hermes-agent/skills + installPhase = '' + runHook preInstall - ${pkgs.lib.concatMapStringsSep "\n" (name: '' - makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \ - --suffix PATH : "${runtimePath}" \ - --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills - '') [ "hermes" "hermes-agent" "hermes-acp" ]} + mkdir -p $out/share/hermes-agent $out/bin + cp -r ${bundledSkills} $out/share/hermes-agent/skills + cp -r ${hermesWeb} $out/share/hermes-agent/web_dist - runHook postInstall - ''; + # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/) + mkdir -p $out/ui-tui + cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/ - meta = with pkgs.lib; { - description = "AI agent with advanced tool-calling capabilities"; - homepage = "https://github.com/NousResearch/hermes-agent"; - mainProgram = "hermes"; - license = licenses.mit; - platforms = platforms.unix; + ${pkgs.lib.concatMapStringsSep "\n" + (name: '' + makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \ + --suffix PATH : "${runtimePath}" \ + --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \ + --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \ + --set HERMES_TUI_DIR $out/ui-tui \ + --set HERMES_PYTHON ${hermesVenv}/bin/python3 \ + --set HERMES_NODE ${pkgs.nodejs_22}/bin/node + '') + [ + "hermes" + "hermes-agent" + "hermes-acp" + ] + } + + runHook postInstall + ''; + + passthru.devShellHook = '' + STAMP=".nix-stamps/hermes-agent" + STAMP_VALUE="${pyprojectHash}:${uvLockHash}" + if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then + echo "hermes-agent: installing Python dependencies..." + uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true + source .venv/bin/activate + uv pip install -e ".[all]" + [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true + [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true + mkdir -p .nix-stamps + echo "$STAMP_VALUE" > "$STAMP" + else + source .venv/bin/activate + export HERMES_PYTHON=${hermesVenv}/bin/python3 + fi + ''; + + meta = with pkgs.lib; { + description = "AI agent with advanced tool-calling capabilities"; + homepage = "https://github.com/NousResearch/hermes-agent"; + mainProgram = "hermes"; + license = licenses.mit; + platforms = platforms.unix; + }; }; + + tui = hermesTui; + web = hermesWeb; }; }; } diff --git a/nix/python.nix b/nix/python.nix index 160b4ee790..0bcd017e76 100644 --- a/nix/python.nix +++ b/nix/python.nix @@ -1,6 +1,6 @@ # nix/python.nix — uv2nix virtual environment builder { - python311, + python312, lib, callPackage, uv2nix, @@ -35,30 +35,46 @@ let }; }; + # Legacy alibabacloud packages ship only sdists with setup.py/setup.cfg + # and no pyproject.toml, so setuptools isn't declared as a build dep. + buildSystemOverrides = final: prev: builtins.mapAttrs + (name: _: prev.${name}.overrideAttrs (old: { + nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ]; + })) + (lib.genAttrs [ + "alibabacloud-credentials-api" + "alibabacloud-endpoint-util" + "alibabacloud-gateway-dingtalk" + "alibabacloud-gateway-spi" + "alibabacloud-tea" + ] (_: null)); + pythonPackageOverrides = final: _prev: if isAarch64Darwin then { - numpy = mkPrebuiltOverride final python311.pkgs.numpy { }; + numpy = mkPrebuiltOverride final python312.pkgs.numpy { }; - av = mkPrebuiltOverride final python311.pkgs.av { }; + pyarrow = mkPrebuiltOverride final python312.pkgs.pyarrow { }; - humanfriendly = mkPrebuiltOverride final python311.pkgs.humanfriendly { }; + av = mkPrebuiltOverride final python312.pkgs.av { }; - coloredlogs = mkPrebuiltOverride final python311.pkgs.coloredlogs { + humanfriendly = mkPrebuiltOverride final python312.pkgs.humanfriendly { }; + + coloredlogs = mkPrebuiltOverride final python312.pkgs.coloredlogs { humanfriendly = [ ]; }; - onnxruntime = mkPrebuiltOverride final python311.pkgs.onnxruntime { + onnxruntime = mkPrebuiltOverride final python312.pkgs.onnxruntime { coloredlogs = [ ]; numpy = [ ]; packaging = [ ]; }; - ctranslate2 = mkPrebuiltOverride final python311.pkgs.ctranslate2 { + ctranslate2 = mkPrebuiltOverride final python312.pkgs.ctranslate2 { numpy = [ ]; pyyaml = [ ]; }; - faster-whisper = mkPrebuiltOverride final python311.pkgs.faster-whisper { + faster-whisper = mkPrebuiltOverride final python312.pkgs.faster-whisper { av = [ ]; ctranslate2 = [ ]; huggingface-hub = [ ]; @@ -70,11 +86,12 @@ let pythonSet = (callPackage pyproject-nix.build.packages { - python = python311; + python = python312; }).overrideScope (lib.composeManyExtensions [ pyproject-build-systems.overlays.default overlay + buildSystemOverrides pythonPackageOverrides ]); in diff --git a/nix/tui.nix b/nix/tui.nix new file mode 100644 index 0000000000..7303edecb9 --- /dev/null +++ b/nix/tui.nix @@ -0,0 +1,77 @@ +# nix/tui.nix — Hermes TUI (Ink/React) compiled with tsc and bundled +{ pkgs, npm-lockfile-fix, ... }: +let + src = ../ui-tui; + npmDeps = pkgs.fetchNpmDeps { + inherit src; + hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90="; + }; + + packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json")); + version = packageJson.version; + + npmLockHash = builtins.hashString "sha256" (builtins.readFile ../ui-tui/package-lock.json); +in +pkgs.buildNpmPackage { + pname = "hermes-tui"; + inherit src npmDeps version; + + doCheck = false; + + installPhase = '' + runHook preInstall + + mkdir -p $out/lib/hermes-tui + + cp -r dist $out/lib/hermes-tui/dist + + # runtime node_modules + cp -r node_modules $out/lib/hermes-tui/node_modules + + # @hermes/ink is a file: dependency, we need to copy it in fr + rm -f $out/lib/hermes-tui/node_modules/@hermes/ink + cp -r packages/hermes-ink $out/lib/hermes-tui/node_modules/@hermes/ink + + # package.json needed for "type": "module" resolution + cp package.json $out/lib/hermes-tui/ + + runHook postInstall + ''; + + nativeBuildInputs = [ + (pkgs.writeShellScriptBin "update_tui_lockfile" '' + set -euox pipefail + + # get root of repo + REPO_ROOT=$(git rev-parse --show-toplevel) + + # cd into ui-tui and reinstall + cd "$REPO_ROOT/ui-tui" + rm -rf node_modules/ + npm cache clean --force + CI=true npm install # ci env var to suppress annoying unicode install banner lag + ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json + + NIX_FILE="$REPO_ROOT/nix/tui.nix" + # compute the new hash + sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE + NIX_OUTPUT=$(nix build .#tui 2>&1 || true) + NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') + echo got new hash $NEW_HASH + sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE + nix build .#tui + echo "Updated npm hash in $NIX_FILE to $NEW_HASH" + '') + ]; + + passthru.devShellHook = '' + STAMP=".nix-stamps/hermes-tui" + STAMP_VALUE="${npmLockHash}" + if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then + echo "hermes-tui: installing npm dependencies..." + cd ui-tui && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd .. + mkdir -p .nix-stamps + echo "$STAMP_VALUE" > "$STAMP" + fi + ''; +} diff --git a/nix/web.nix b/nix/web.nix new file mode 100644 index 0000000000..247889753f --- /dev/null +++ b/nix/web.nix @@ -0,0 +1,63 @@ +# nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build +{ pkgs, npm-lockfile-fix, ... }: +let + src = ../web; + npmDeps = pkgs.fetchNpmDeps { + inherit src; + hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4="; + }; + + npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json); +in +pkgs.buildNpmPackage { + pname = "hermes-web"; + version = "0.0.0"; + inherit src npmDeps; + + doCheck = false; + + buildPhase = '' + npx tsc -b + npx vite build --outDir dist + ''; + + installPhase = '' + runHook preInstall + cp -r dist $out + runHook postInstall + ''; + + nativeBuildInputs = [ + (pkgs.writeShellScriptBin "update_web_lockfile" '' + set -euox pipefail + + REPO_ROOT=$(git rev-parse --show-toplevel) + + cd "$REPO_ROOT/web" + rm -rf node_modules/ + npm cache clean --force + CI=true npm install + ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json + + NIX_FILE="$REPO_ROOT/nix/web.nix" + sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE + NIX_OUTPUT=$(nix build .#web 2>&1 || true) + NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') + echo got new hash $NEW_HASH + sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE + nix build .#web + echo "Updated npm hash in $NIX_FILE to $NEW_HASH" + '') + ]; + + passthru.devShellHook = '' + STAMP=".nix-stamps/hermes-web" + STAMP_VALUE="${npmLockHash}" + if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then + echo "hermes-web: installing npm dependencies..." + cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd .. + mkdir -p .nix-stamps + echo "$STAMP_VALUE" > "$STAMP" + fi + ''; +} diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index 174eaa5d48..1c099ca605 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -1,12 +1,12 @@ --- name: honcho -description: Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, and dialectic reasoning. Use when setting up Honcho, troubleshooting memory, managing profiles with Honcho peers, or tuning observation and recall settings. -version: 1.0.0 +description: Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, dialectic reasoning, session summaries, and context budget enforcement. Use when setting up Honcho, troubleshooting memory, managing profiles with Honcho peers, or tuning observation, recall, and dialectic settings. +version: 2.0.0 author: Hermes Agent license: MIT metadata: hermes: - tags: [Honcho, Memory, Profiles, Observation, Dialectic, User-Modeling] + tags: [Honcho, Memory, Profiles, Observation, Dialectic, User-Modeling, Session-Summary] homepage: https://docs.honcho.dev related_skills: [hermes-agent] prerequisites: @@ -22,8 +22,9 @@ Honcho provides AI-native cross-session user modeling. It learns who the user is - Setting up Honcho (cloud or self-hosted) - Troubleshooting memory not working / peers not syncing - Creating multi-profile setups where each agent has its own Honcho peer -- Tuning observation, recall, or write frequency settings -- Understanding what the 4 Honcho tools do and when to use them +- Tuning observation, recall, dialectic depth, or write frequency settings +- Understanding what the 5 Honcho tools do and when to use them +- Configuring context budgets and session summary injection ## Setup @@ -51,6 +52,27 @@ hermes honcho status # shows resolved config, connection test, peer info ## Architecture +### Base Context Injection + +When Honcho injects context into the system prompt (in `hybrid` or `context` recall modes), it assembles the base context block in this order: + +1. **Session summary** -- a short digest of the current session so far (placed first so the model has immediate conversational continuity) +2. **User representation** -- Honcho's accumulated model of the user (preferences, facts, patterns) +3. **AI peer card** -- the identity card for this Hermes profile's AI peer + +The session summary is generated automatically by Honcho at the start of each turn (when a prior session exists). It gives the model a warm start without replaying full history. + +### Cold / Warm Prompt Selection + +Honcho automatically selects between two prompt strategies: + +| Condition | Strategy | What happens | +|-----------|----------|--------------| +| No prior session or empty representation | **Cold start** | Lightweight intro prompt; skips summary injection; encourages the model to learn about the user | +| Existing representation and/or session history | **Warm start** | Full base context injection (summary → representation → card); richer system prompt | + +You do not need to configure this -- it is automatic based on session state. + ### Peers Honcho models conversations as interactions between **peers**. Hermes creates two peers per session: @@ -112,6 +134,65 @@ How the agent accesses Honcho memory: | `context` | Yes | No (hidden) | Minimal token cost, no tool calls | | `tools` | No | Yes | Agent controls all memory access explicitly | +## Three Orthogonal Knobs + +Honcho's dialectic behavior is controlled by three independent dimensions. Each can be tuned without affecting the others: + +### Cadence (when) + +Controls **how often** dialectic and context calls happen. + +| Key | Default | Description | +|-----|---------|-------------| +| `contextCadence` | `1` | Min turns between context API calls | +| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 | +| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | + +Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn. + +### Depth (how many) + +Controls **how many rounds** of dialectic reasoning Honcho performs per query. + +| Key | Default | Range | Description | +|-----|---------|-------|-------------| +| `dialecticDepth` | `1` | 1-3 | Number of dialectic reasoning rounds per query | +| `dialecticDepthLevels` | -- | array | Optional per-depth-round level overrides (see below) | + +`dialecticDepth: 2` means Honcho runs two rounds of dialectic synthesis. The first round produces an initial answer; the second refines it. + +`dialecticDepthLevels` lets you set the reasoning level for each round independently: + +```json +{ + "dialecticDepth": 3, + "dialecticDepthLevels": ["low", "medium", "high"] +} +``` + +If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived from `dialecticReasoningLevel` (the base): + +| Depth | Pass levels | +|-------|-------------| +| 1 | [base] | +| 2 | [minimal, base] | +| 3 | [minimal, base, low] | + +This keeps earlier passes cheap while using full depth on the final synthesis. + +**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout. + +### Level (how hard) + +Controls the **intensity** of each dialectic reasoning round. + +| Key | Default | Description | +|-----|---------|-------------| +| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` | +| `dialecticDynamic` | `true` | When `true`, the model can pass `reasoning_level` to `honcho_reasoning` to override the default per-call. `false` = always use `dialecticReasoningLevel`, model overrides ignored | + +Higher levels produce richer synthesis but cost more tokens on Honcho's backend. + ## Multi-Profile Setup Each Hermes profile gets its own Honcho AI peer while sharing the same workspace (user context). This means: @@ -149,6 +230,7 @@ Override any setting in the host block: "hermes.coder": { "aiPeer": "coder", "recallMode": "tools", + "dialecticDepth": 2, "observation": { "user": { "observeMe": true, "observeOthers": false }, "ai": { "observeMe": true, "observeOthers": true } @@ -160,19 +242,97 @@ Override any setting in the host block: ## Tools -The agent has 4 Honcho tools (hidden in `context` recall mode): +The agent has 5 bidirectional Honcho tools (hidden in `context` recall mode): + +| Tool | LLM call? | Cost | Use when | +|------|-----------|------|----------| +| `honcho_profile` | No | minimal | Quick factual snapshot at conversation start or for fast name/role/pref lookups | +| `honcho_search` | No | low | Fetch specific past facts to reason over yourself — raw excerpts, no synthesis | +| `honcho_context` | No | low | Full session context snapshot: summary, representation, card, recent messages | +| `honcho_reasoning` | Yes | medium–high | Natural language question synthesized by Honcho's dialectic engine | +| `honcho_conclude` | No | minimal | Write or delete a persistent fact; pass `peer: "ai"` for AI self-knowledge | ### `honcho_profile` -Quick factual snapshot of the user -- name, role, preferences, patterns. No LLM call, minimal cost. Use at conversation start or for fast lookups. +Read or update a peer card — curated key facts (name, role, preferences, communication style). Pass `card: [...]` to update; omit to read. No LLM call. ### `honcho_search` -Semantic search over stored context. Returns raw excerpts ranked by relevance, no LLM synthesis. Default 800 tokens, max 2000. Use when you want specific past facts to reason over yourself. +Semantic search over stored context for a specific peer. Returns raw excerpts ranked by relevance, no synthesis. Default 800 tokens, max 2000. Good when you need specific past facts to reason over yourself rather than a synthesized answer. ### `honcho_context` -Natural language question answered by Honcho's dialectic reasoning (LLM call on Honcho's backend). Higher cost, higher quality. Can query about user (default) or the AI peer. +Full session context snapshot from Honcho — session summary, peer representation, peer card, and recent messages. No LLM call. Use when you want to see everything Honcho knows about the current session and peer in one shot. + +### `honcho_reasoning` +Natural language question answered by Honcho's dialectic reasoning engine (LLM call on Honcho's backend). Higher cost, higher quality. Pass `reasoning_level` to control depth: `minimal` (fast/cheap) → `low` → `medium` → `high` → `max` (thorough). Omit to use the configured default (`low`). Use for synthesized understanding of the user's patterns, goals, or current state. ### `honcho_conclude` -Write a persistent fact about the user. Conclusions build the user's profile over time. Use when the user states a preference, corrects you, or shares something to remember. +Write or delete a persistent conclusion about a peer. Pass `conclusion: "..."` to create. Pass `delete_id: "..."` to remove a conclusion (for PII removal — Honcho self-heals incorrect conclusions over time, so deletion is only needed for PII). You MUST pass exactly one of the two. + +### Bidirectional peer targeting + +All 5 tools accept an optional `peer` parameter: +- `peer: "user"` (default) — operates on the user peer +- `peer: "ai"` — operates on this profile's AI peer +- `peer: ""` — any peer ID in the workspace + +Examples: +``` +honcho_profile # read user's card +honcho_profile peer="ai" # read AI peer's card +honcho_reasoning query="What does this user care about most?" +honcho_reasoning query="What are my interaction patterns?" peer="ai" reasoning_level="medium" +honcho_conclude conclusion="Prefers terse answers" +honcho_conclude conclusion="I tend to over-explain code" peer="ai" +honcho_conclude delete_id="abc123" # PII removal +``` + +## Agent Usage Patterns + +Guidelines for Hermes when Honcho memory is active. + +### On conversation start + +``` +1. honcho_profile → fast warmup, no LLM cost +2. If context looks thin → honcho_context (full snapshot, still no LLM) +3. If deep synthesis needed → honcho_reasoning (LLM call, use sparingly) +``` + +Do NOT call `honcho_reasoning` on every turn. Auto-injection already handles ongoing context refresh. Use the reasoning tool only when you genuinely need synthesized insight the base context doesn't provide. + +### When the user shares something to remember + +``` +honcho_conclude conclusion="" +``` + +Good conclusions: "Prefers code examples over prose explanations", "Working on a Rust async project through April 2026" +Bad conclusions: "User said something about Rust" (too vague), "User seems technical" (already in representation) + +### When the user asks about past context / you need to recall specifics + +``` +honcho_search query="" → fast, no LLM, good for specific facts +honcho_context → full snapshot with summary + messages +honcho_reasoning query="" → synthesized answer, use when search isn't enough +``` + +### When to use `peer: "ai"` + +Use AI peer targeting to build and query the agent's own self-knowledge: +- `honcho_conclude conclusion="I tend to be verbose when explaining architecture" peer="ai"` — self-correction +- `honcho_reasoning query="How do I typically handle ambiguous requests?" peer="ai"` — self-audit +- `honcho_profile peer="ai"` — review own identity card + +### When NOT to call tools + +In `hybrid` and `context` modes, base context (user representation + card + session summary) is auto-injected before every turn. Do not re-fetch what was already injected. Call tools only when: +- You need something the injected context doesn't have +- The user explicitly asks you to recall or check memory +- You're writing a conclusion about something new + +### Cadence awareness + +`honcho_reasoning` on the tool side shares the same cost as auto-injection dialectic. After an explicit tool call, the auto-injection cadence resets — avoiding double-charging the same turn. ## Config Reference @@ -191,18 +351,39 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `observation` | all on | Per-peer `observeMe`/`observeOthers` booleans | | `writeFrequency` | `async` | `async`, `turn`, `session`, or integer N | | `sessionStrategy` | `per-directory` | `per-directory`, `per-repo`, `per-session`, `global` | -| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` | -| `dialecticDynamic` | `true` | Auto-bump reasoning by query length. `false` = fixed level | | `messageMaxChars` | `25000` | Max chars per message (chunked if exceeded) | -| `dialecticMaxInputChars` | `10000` | Max chars for dialectic query input | -### Cost-awareness (advanced, root config only) +### Dialectic settings | Key | Default | Description | |-----|---------|-------------| +| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` | +| `dialecticDynamic` | `true` | Auto-bump reasoning by query complexity. `false` = fixed level | +| `dialecticDepth` | `1` | Number of dialectic rounds per query (1-3) | +| `dialecticDepthLevels` | -- | Optional array of per-round levels, e.g. `["low", "high"]` | +| `dialecticMaxInputChars` | `10000` | Max chars for dialectic query input | + +### Context budget and injection + +| Key | Default | Description | +|-----|---------|-------------| +| `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` | Min turns between dialectic API calls | +| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) | + +The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. + +### Memory-context sanitization + +Honcho sanitizes the `memory-context` block before injection to prevent prompt injection and malformed content: + +- Strips XML/HTML tags from user-authored conclusions +- Normalizes whitespace and control characters +- Truncates individual conclusions that exceed `messageMaxChars` +- Escapes delimiter sequences that could break the system prompt structure + +This fix addresses edge cases where raw user conclusions containing markup or special characters could corrupt the injected context block. ## Troubleshooting @@ -221,6 +402,12 @@ Observation config is synced from the server on each session init. Start a new s ### Messages truncated Messages over `messageMaxChars` (default 25k) are automatically chunked with `[continued]` markers. If you're hitting this often, check if tool results or skill content is inflating message size. +### Context injection too large +If you see warnings about context budget exceeded, lower `contextTokens` or reduce `dialecticDepth`. The session summary is trimmed first when the budget is tight. + +### Session summary missing +Session summary requires at least one prior turn in the current Honcho session. On cold start (new session, no history), the summary is omitted and Honcho uses the cold-start prompt strategy instead. + ## CLI Commands | Command | Description | diff --git a/optional-skills/creative/concept-diagrams/SKILL.md b/optional-skills/creative/concept-diagrams/SKILL.md new file mode 100644 index 0000000000..03497c0c2f --- /dev/null +++ b/optional-skills/creative/concept-diagrams/SKILL.md @@ -0,0 +1,361 @@ +--- +name: concept-diagrams +description: Generate flat, minimal light/dark-aware SVG diagrams as standalone HTML files, using a unified educational visual language with 9 semantic color ramps, sentence-case typography, and automatic dark mode. Best suited for educational and non-software visuals — physics setups, chemistry mechanisms, math curves, physical objects (aircraft, turbines, smartphones, mechanical watches), anatomy, floor plans, cross-sections, narrative journeys (lifecycle of X, process of Y), hub-spoke system integrations (smart city, IoT), and exploded layer views. If a more specialized skill exists for the subject (dedicated software/cloud architecture, hand-drawn sketches, animated explainers, etc.), prefer that — otherwise this skill can also serve as a general-purpose SVG diagram fallback with a clean educational look. Ships with 15 example diagrams. +version: 0.1.0 +author: v1k22 (original PR), ported into hermes-agent +license: MIT +dependencies: [] +metadata: + hermes: + tags: [diagrams, svg, visualization, education, physics, chemistry, engineering] + related_skills: [architecture-diagram, excalidraw, generative-widgets] +--- + +# Concept Diagrams + +Generate production-quality SVG diagrams with a unified flat, minimal design system. Output is a single self-contained HTML file that renders identically in any modern browser, with automatic light/dark mode. + +## Scope + +**Best suited for:** +- Physics setups, chemistry mechanisms, math curves, biology +- Physical objects (aircraft, turbines, smartphones, mechanical watches, cells) +- Anatomy, cross-sections, exploded layer views +- Floor plans, architectural conversions +- Narrative journeys (lifecycle of X, process of Y) +- Hub-spoke system integrations (smart city, IoT networks, electricity grids) +- Educational / textbook-style visuals in any domain +- Quantitative charts (grouped bars, energy profiles) + +**Look elsewhere first for:** +- Dedicated software / cloud infrastructure architecture with a dark tech aesthetic (consider `architecture-diagram` if available) +- Hand-drawn whiteboard sketches (consider `excalidraw` if available) +- Animated explainers or video output (consider an animation skill) + +If a more specialized skill is available for the subject, prefer that. If none fits, this skill can serve as a general-purpose SVG diagram fallback — the output will carry the clean educational aesthetic described below, which is a reasonable default for almost any subject. + +## Workflow + +1. Decide on the diagram type (see Diagram Types below). +2. Lay out components using the Design System rules. +3. Write the full HTML page using `templates/template.html` as the wrapper — paste your SVG where the template says ``. +4. Save as a standalone `.html` file (for example `~/my-diagram.html` or `./my-diagram.html`). +5. User opens it directly in a browser — no server, no dependencies. + +Optional: if the user wants a browsable gallery of multiple diagrams, see "Local Preview Server" at the bottom. + +Load the HTML template: +``` +skill_view(name="concept-diagrams", file_path="templates/template.html") +``` + +The template embeds the full CSS design system (`c-*` color classes, text classes, light/dark variables, arrow marker styles). The SVG you generate relies on these classes being present on the hosting page. + +--- + +## Design System + +### Philosophy + +- **Flat**: no gradients, drop shadows, blur, glow, or neon effects. +- **Minimal**: show the essential. No decorative icons inside boxes. +- **Consistent**: same colors, spacing, typography, and stroke widths across every diagram. +- **Dark-mode ready**: all colors auto-adapt via CSS classes — no per-mode SVG. + +### Color Palette + +9 color ramps, each with 7 stops. Put the class name on a `` or shape element; the template CSS handles both modes. + +| Class | 50 (lightest) | 100 | 200 | 400 | 600 | 800 | 900 (darkest) | +|------------|---------------|---------|---------|---------|---------|---------|---------------| +| `c-purple` | #EEEDFE | #CECBF6 | #AFA9EC | #7F77DD | #534AB7 | #3C3489 | #26215C | +| `c-teal` | #E1F5EE | #9FE1CB | #5DCAA5 | #1D9E75 | #0F6E56 | #085041 | #04342C | +| `c-coral` | #FAECE7 | #F5C4B3 | #F0997B | #D85A30 | #993C1D | #712B13 | #4A1B0C | +| `c-pink` | #FBEAF0 | #F4C0D1 | #ED93B1 | #D4537E | #993556 | #72243E | #4B1528 | +| `c-gray` | #F1EFE8 | #D3D1C7 | #B4B2A9 | #888780 | #5F5E5A | #444441 | #2C2C2A | +| `c-blue` | #E6F1FB | #B5D4F4 | #85B7EB | #378ADD | #185FA5 | #0C447C | #042C53 | +| `c-green` | #EAF3DE | #C0DD97 | #97C459 | #639922 | #3B6D11 | #27500A | #173404 | +| `c-amber` | #FAEEDA | #FAC775 | #EF9F27 | #BA7517 | #854F0B | #633806 | #412402 | +| `c-red` | #FCEBEB | #F7C1C1 | #F09595 | #E24B4A | #A32D2D | #791F1F | #501313 | + +#### Color Assignment Rules + +Color encodes **meaning**, not sequence. Never cycle through colors like a rainbow. + +- Group nodes by **category** — all nodes of the same type share one color. +- Use `c-gray` for neutral/structural nodes (start, end, generic steps, users). +- Use **2-3 colors per diagram**, not 6+. +- Prefer `c-purple`, `c-teal`, `c-coral`, `c-pink` for general categories. +- Reserve `c-blue`, `c-green`, `c-amber`, `c-red` for semantic meaning (info, success, warning, error). + +Light/dark stop mapping (handled by the template CSS — just use the class): +- Light mode: 50 fill + 600 stroke + 800 title / 600 subtitle +- Dark mode: 800 fill + 200 stroke + 100 title / 200 subtitle + +### Typography + +Only two font sizes. No exceptions. + +| Class | Size | Weight | Use | +|-------|------|--------|-----| +| `th` | 14px | 500 | Node titles, region labels | +| `ts` | 12px | 400 | Subtitles, descriptions, arrow labels | +| `t` | 14px | 400 | General text | + +- **Sentence case always.** Never Title Case, never ALL CAPS. +- Every `` MUST carry a class (`t`, `ts`, or `th`). No unclassed text. +- `dominant-baseline="central"` on all text inside boxes. +- `text-anchor="middle"` for centered text in boxes. + +**Width estimation (approx):** +- 14px weight 500: ~8px per character +- 12px weight 400: ~6.5px per character +- Always verify: `box_width >= (char_count × px_per_char) + 48` (24px padding each side) + +### Spacing & Layout + +- **ViewBox**: `viewBox="0 0 680 H"` where H = content height + 40px buffer. +- **Safe area**: x=40 to x=640, y=40 to y=(H-40). +- **Between boxes**: 60px minimum gap. +- **Inside boxes**: 24px horizontal padding, 12px vertical padding. +- **Arrowhead gap**: 10px between arrowhead and box edge. +- **Single-line box**: 44px height. +- **Two-line box**: 56px height, 18px between title and subtitle baselines. +- **Container padding**: 20px minimum inside every container. +- **Max nesting**: 2-3 levels deep. Deeper gets unreadable at 680px width. + +### Stroke & Shape + +- **Stroke width**: 0.5px on all node borders. Not 1px, not 2px. +- **Rect rounding**: `rx="8"` for nodes, `rx="12"` for inner containers, `rx="16"` to `rx="20"` for outer containers. +- **Connector paths**: MUST have `fill="none"`. SVG defaults to `fill: black` otherwise. + +### Arrow Marker + +Include this `` block at the start of **every** SVG: + +```xml + + + + + +``` + +Use `marker-end="url(#arrow)"` on lines. The arrowhead inherits the line color via `context-stroke`. + +### CSS Classes (Provided by the Template) + +The template page provides: + +- Text: `.t`, `.ts`, `.th` +- Neutral: `.box`, `.arr`, `.leader`, `.node` +- Color ramps: `.c-purple`, `.c-teal`, `.c-coral`, `.c-pink`, `.c-gray`, `.c-blue`, `.c-green`, `.c-amber`, `.c-red` (all with automatic light/dark mode) + +You do **not** need to redefine these — just apply them in your SVG. The template file contains the full CSS definitions. + +--- + +## SVG Boilerplate + +Every SVG inside the template page starts with this exact structure: + +```xml + + + + + + + + + + +``` + +Replace `{HEIGHT}` with the actual computed height (last element bottom + 40px). + +### Node Patterns + +**Single-line node (44px):** +```xml + + + Service name + +``` + +**Two-line node (56px):** +```xml + + + Service name + Short description + +``` + +**Connector (no label):** +```xml + +``` + +**Container (dashed or solid):** +```xml + + + Container label + Subtitle info + +``` + +--- + +## Diagram Types + +Choose the layout that fits the subject: + +1. **Flowchart** — CI/CD pipelines, request lifecycles, approval workflows, data processing. Single-direction flow (top-down or left-right). Max 4-5 nodes per row. +2. **Structural / Containment** — Cloud infrastructure nesting, system architecture with layers. Large outer containers with inner regions. Dashed rects for logical groupings. +3. **API / Endpoint Map** — REST routes, GraphQL schemas. Tree from root, branching to resource groups, each containing endpoint nodes. +4. **Microservice Topology** — Service mesh, event-driven systems. Services as nodes, arrows for communication patterns, message queues between. +5. **Data Flow** — ETL pipelines, streaming architectures. Left-to-right flow from sources through processing to sinks. +6. **Physical / Structural** — Vehicles, buildings, hardware, anatomy. Use shapes that match the physical form — `` for curved bodies, `` for tapered shapes, ``/`` for cylindrical parts, nested `` for compartments. See `references/physical-shape-cookbook.md`. +7. **Infrastructure / Systems Integration** — Smart cities, IoT networks, multi-domain systems. Hub-spoke layout with central platform connecting subsystems. Semantic line styles (`.data-line`, `.power-line`, `.water-pipe`, `.road`). See `references/infrastructure-patterns.md`. +8. **UI / Dashboard Mockups** — Admin panels, monitoring dashboards. Screen frame with nested chart/gauge/indicator elements. See `references/dashboard-patterns.md`. + +For physical, infrastructure, and dashboard diagrams, load the matching reference file before generating — each one provides ready-made CSS classes and shape primitives. + +--- + +## Validation Checklist + +Before finalizing any SVG, verify ALL of the following: + +1. Every `` has class `t`, `ts`, or `th`. +2. Every `` inside a box has `dominant-baseline="central"`. +3. Every connector `` or `` used as arrow has `fill="none"`. +4. No arrow line crosses through an unrelated box. +5. `box_width >= (longest_label_chars × 8) + 48` for 14px text. +6. `box_width >= (longest_label_chars × 6.5) + 48` for 12px text. +7. ViewBox height = bottom-most element + 40px. +8. All content stays within x=40 to x=640. +9. Color classes (`c-*`) are on `` or shape elements, never on `` connectors. +10. Arrow `` block is present. +11. No gradients, shadows, blur, or glow effects. +12. Stroke width is 0.5px on all node borders. + +--- + +## Output & Preview + +### Default: standalone HTML file + +Write a single `.html` file the user can open directly. No server, no dependencies, works offline. Pattern: + +```python +# 1. Load the template +template = skill_view("concept-diagrams", "templates/template.html") + +# 2. Fill in title, subtitle, and paste your SVG +html = template.replace( + "", "SN2 reaction mechanism" +).replace( + "", "Bimolecular nucleophilic substitution" +).replace( + "", svg_content +) + +# 3. Write to a user-chosen path (or ./ by default) +write_file("./sn2-mechanism.html", html) +``` + +Tell the user how to open it: + +``` +# macOS +open ./sn2-mechanism.html +# Linux +xdg-open ./sn2-mechanism.html +``` + +### Optional: local preview server (multi-diagram gallery) + +Only use this when the user explicitly wants a browsable gallery of multiple diagrams. + +**Rules:** +- Bind to `127.0.0.1` only. Never `0.0.0.0`. Exposing diagrams on all network interfaces is a security hazard on shared networks. +- Pick a free port (do NOT hard-code one) and tell the user the chosen URL. +- The server is optional and opt-in — prefer the standalone HTML file first. + +Recommended pattern (lets the OS pick a free ephemeral port): + +```bash +# Put each diagram in its own folder under .diagrams/ +mkdir -p .diagrams/sn2-mechanism +# ...write .diagrams/sn2-mechanism/index.html... + +# Serve on loopback only, free port +cd .diagrams && python3 -c " +import http.server, socketserver +with socketserver.TCPServer(('127.0.0.1', 0), http.server.SimpleHTTPRequestHandler) as s: + print(f'Serving at http://127.0.0.1:{s.server_address[1]}/') + s.serve_forever() +" & +``` + +If the user insists on a fixed port, use `127.0.0.1:` — still never `0.0.0.0`. Document how to stop the server (`kill %1` or `pkill -f "http.server"`). + +--- + +## Examples Reference + +The `examples/` directory ships 15 complete, tested diagrams. Browse them for working patterns before writing a new diagram of a similar type: + +| File | Type | Demonstrates | +|------|------|--------------| +| `hospital-emergency-department-flow.md` | Flowchart | Priority routing with semantic colors | +| `feature-film-production-pipeline.md` | Flowchart | Phased workflow, horizontal sub-flows | +| `automated-password-reset-flow.md` | Flowchart | Auth flow with error branches | +| `autonomous-llm-research-agent-flow.md` | Flowchart | Loop-back arrows, decision branches | +| `place-order-uml-sequence.md` | Sequence | UML sequence diagram style | +| `commercial-aircraft-structure.md` | Physical | Paths, polygons, ellipses for realistic shapes | +| `wind-turbine-structure.md` | Physical cross-section | Underground/above-ground separation, color coding | +| `smartphone-layer-anatomy.md` | Exploded view | Alternating left/right labels, layered components | +| `apartment-floor-plan-conversion.md` | Floor plan | Walls, doors, proposed changes in dotted red | +| `banana-journey-tree-to-smoothie.md` | Narrative journey | Winding path, progressive state changes | +| `cpu-ooo-microarchitecture.md` | Hardware pipeline | Fan-out, memory hierarchy sidebar | +| `sn2-reaction-mechanism.md` | Chemistry | Molecules, curved arrows, energy profile | +| `smart-city-infrastructure.md` | Hub-spoke | Semantic line styles per system | +| `electricity-grid-flow.md` | Multi-stage flow | Voltage hierarchy, flow markers | +| `ml-benchmark-grouped-bar-chart.md` | Chart | Grouped bars, dual axis | + +Load any example with: +``` +skill_view(name="concept-diagrams", file_path="examples/") +``` + +--- + +## Quick Reference: What to Use When + +| User says | Diagram type | Suggested colors | +|-----------|--------------|------------------| +| "show the pipeline" | Flowchart | gray start/end, purple steps, red errors, teal deploy | +| "draw the data flow" | Data pipeline (left-right) | gray sources, purple processing, teal sinks | +| "visualize the system" | Structural (containment) | purple container, teal services, coral data | +| "map the endpoints" | API tree | purple root, one ramp per resource group | +| "show the services" | Microservice topology | gray ingress, teal services, purple bus, coral workers | +| "draw the aircraft/vehicle" | Physical | paths, polygons, ellipses for realistic shapes | +| "smart city / IoT" | Hub-spoke integration | semantic line styles per subsystem | +| "show the dashboard" | UI mockup | dark screen, chart colors: teal, purple, coral for alerts | +| "power grid / electricity" | Multi-stage flow | voltage hierarchy (HV/MV/LV line weights) | +| "wind turbine / turbine" | Physical cross-section | foundation + tower cutaway + nacelle color-coded | +| "journey of X / lifecycle" | Narrative journey | winding path, progressive state changes | +| "layers of X / exploded" | Exploded layer view | vertical stack, alternating labels | +| "CPU / pipeline" | Hardware pipeline | vertical stages, fan-out to execution ports | +| "floor plan / apartment" | Floor plan | walls, doors, proposed changes in dotted red | +| "reaction mechanism" | Chemistry | atoms, bonds, curved arrows, transition state, energy profile | diff --git a/optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md b/optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md new file mode 100644 index 0000000000..7c11d3401e --- /dev/null +++ b/optional-skills/creative/concept-diagrams/examples/apartment-floor-plan-conversion.md @@ -0,0 +1,244 @@ +# Apartment Floor Plan: 3 BHK to 4 BHK Conversion + +An architectural floor plan showing a 1,500 sq ft apartment with proposed modifications to convert from 3 BHK to 4 BHK. Demonstrates architectural drawing conventions, room layouts, proposed changes with dotted lines, and area comparison tables. + +## Key Patterns Used + +- **Architectural floor plan**: Top-down view with walls, doors, windows +- **Proposed modifications**: Dotted red lines for new walls +- **Room color coding**: Light fills to distinguish room types +- **Circulation paths**: Arrows showing new access routes +- **Data table**: Before/after area comparison with highlighting +- **Architectural symbols**: North arrow, scale bar, door swings + +## Diagram Type + +This is an **architectural floor plan** with: +- **Plan view**: Top-down orthographic projection +- **Overlay technique**: Existing structure + proposed changes +- **Quantitative data**: Area measurements and comparison table + +## Architectural Drawing Elements + +### Wall Styles + +```xml + + + + + + + + +``` + +```css +.wall { stroke: var(--text-primary); stroke-width: 6; fill: none; stroke-linecap: square; } +.wall-thin { stroke: var(--text-primary); stroke-width: 3; fill: none; } +.proposed-wall { stroke: #A32D2D; stroke-width: 4; fill: none; stroke-dasharray: 8 4; } +``` + +### Door Symbols + +```xml + + + + + + + + + + + + + +``` + +```css +.door { stroke: var(--text-secondary); stroke-width: 1.5; fill: none; } +.door-swing { stroke: var(--text-tertiary); stroke-width: 1; fill: none; stroke-dasharray: 3 2; } +``` + +### Window Symbols + +```xml + + + + + + + +``` + +```css +.window { stroke: var(--text-primary); stroke-width: 1; fill: var(--bg-primary); } +.window-glass { stroke: #378ADD; stroke-width: 2; fill: none; } +``` + +### Room Fills + +```xml + + + + + + + + + +``` + +```css +.room-master { fill: rgba(206, 203, 246, 0.3); } /* purple tint */ +.room-bed2 { fill: rgba(159, 225, 203, 0.3); } /* teal tint */ +.room-bed3 { fill: rgba(250, 199, 117, 0.3); } /* amber tint */ +.room-living { fill: rgba(245, 196, 179, 0.3); } /* coral tint */ +.room-kitchen { fill: rgba(237, 147, 177, 0.3); } /* pink tint */ +.room-bath { fill: rgba(133, 183, 235, 0.3); } /* blue tint */ +.room-new { fill: rgba(163, 45, 45, 0.15); } /* red tint for proposed */ +``` + +### Support Fixtures + +```xml + + +Counter + + + +``` + +```css +.balcony { fill: none; stroke: var(--text-secondary); stroke-width: 2; stroke-dasharray: 6 3; } +.balcony-fill { fill: rgba(93, 202, 165, 0.1); } +``` + +### Room Labels + +```xml + +MASTER +BEDROOM +195 sq ft + + +BEDROOM 4 +(NEW) +``` + +```css +.room-label { font-family: system-ui; font-size: 11px; fill: var(--text-primary); font-weight: 500; } +.area-label { font-family: system-ui; font-size: 9px; fill: var(--text-tertiary); } +``` + +### Circulation Arrow + +```xml + + + + + + + +New corridor access +``` + +```css +.circulation { stroke: #3B6D11; stroke-width: 2; fill: none; } +.circulation-fill { fill: #3B6D11; } +``` + +### North Arrow and Scale Bar + +```xml + + + + + N + + + + + + + + + 0 + 5' + 10' + +``` + +## Area Comparison Table + +### Table Structure + +```xml + + +Room + + + +Master Bedroom +195 + + + + + + +Bedroom 4 (NEW) ++100 + + + +TOTAL CARPET AREA +``` + +```css +.table-header { fill: var(--bg-secondary); } +.table-row { fill: var(--bg-primary); stroke: var(--border); stroke-width: 0.5; } +.table-row-alt { fill: var(--bg-tertiary); stroke: var(--border); stroke-width: 0.5; } +.table-highlight { fill: rgba(163, 45, 45, 0.1); stroke: #A32D2D; stroke-width: 0.5; } +``` + +## Layout Notes + +- **ViewBox**: 800×780 (portrait for floor plan + table) +- **Scale**: 10px = 1 foot (apartment ~50ft × 33ft) +- **Floor plan origin**: Offset at (50, 60) for margins +- **Wall thickness**: 6px outer, 3px inner (represents ~6" walls) +- **Room labels**: Centered in each room with area below +- **Table placement**: Below floor plan with full width + +## Color Coding + +| Element | Color | Usage | +|---------|-------|-------| +| Proposed walls | Red (#A32D2D) dotted | New construction | +| New room fill | Red 15% opacity | Bedroom 4 area | +| Circulation | Green (#3B6D11) | New access path | +| Window glass | Blue (#378ADD) | Glass indication | +| Bedrooms | Purple/Teal/Amber tints | Room differentiation | +| Wet areas | Blue tint | Bathrooms | +| Living | Coral tint | Common areas | + +## When to Use This Pattern + +Use this diagram style for: +- Apartment/house floor plans +- Office layout planning +- Renovation proposals showing before/after +- Space planning with area calculations +- Real estate marketing materials +- Interior design presentations +- Building permit documentation diff --git a/optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md b/optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md new file mode 100644 index 0000000000..86cd1cc078 --- /dev/null +++ b/optional-skills/creative/concept-diagrams/examples/automated-password-reset-flow.md @@ -0,0 +1,276 @@ +# Automated Password Reset Flow + +A two-section flowchart tracing the full user journey for a web application password reset: the initial request phase (forgot password → email check → token generation) and the reset-form phase (link click → new password entry → token/password validation). Demonstrates multi-exit decision diamonds, a three-column branching layout, a loop-back path, and a cross-section separator arrow. + +## Key Patterns Used + +- **Three-column layout**: Left column (error/terminal branches at cx=115), center column (main happy path at cx=340), right column (expired-token branch at cx=552) — allows side branches to live at the same y-level as center nodes without overlap +- **Decision diamonds with ``**: Each decision uses a `` wrapper containing a `` and centered ``; the diamond points are computed as `cx±hw, cy±hh` (hw=100, hh=28) +- **Pill-shaped terminals**: Start and end nodes use `rx=22` on their `` to signal entry/exit points; all mid-flow process nodes use `rx=8` +- **Three-branch decision paths**: Each diamond has a "Yes" branch (down, short ``) and a "No" branch (`` going horizontal then vertical to a side column) +- **Loop-back path**: Mismatch error node loops back to the password-entry node via a routing corridor at x=215 — a 5-px gap between the left column (right edge x=210) and center column (left edge x=220); the path exits the bottom of the error node, drops below it, travels right to x=215, then goes up to the target node's center y, then right 5 px into the node's left edge +- **Section separator**: A dashed horizontal `` at y=452 splits the two phases; the connecting arrow crosses it with a faded label ("user receives email") to preserve flow continuity +- **Italic annotation**: The exact UX copy for the generic message ("If that email exists…") is shown as a faded italic `ts` text block below the left-branch terminal node +- **Legend row**: Five inline swatches (gray, purple, teal, red, amber diamond) at the bottom explain the color-to-role mapping + +## Diagram + +```xml + + + + + + + + + + + Section 1 — Forgot password request + + + + + User: "Forgot password" + + + + + + + + Enter email address + + + + + + + + Email in system? + + + + + No + + + + Yes + + + + + + + Generic message shown + Email sent if found + + + + + + + + Request handled + + + + "If that email exists, a reset + link has been sent." + + + + + + + Generate unique token + Time-limited, cryptographic + + + + + + + + Store token + user ID + + + + + + + + Send reset link via email + + + + + + + + user receives email + + Section 2 — Password reset form + + + + + + + User clicks reset link + + + + + + + + Enter new password ×2 + Confirm both passwords match + + + + + + + + Token expired? + + + + + Yes + + + + No + + + + + + + Token expired + Show expiry error + + + + + + + + End — request again + + + + + + Passwords match? + + + + + No + + + + Yes + + + + + + + Password mismatch + Passwords do not match + + + + + retry + + + + + + + Reset password + Invalidate used token + + + + + + + + Password reset complete + + + + Legend — + + User action + + System process + + Email / success + + Error state + + Decision + + +``` + +## Custom CSS + +Add these classes to the hosting page ` + + +
+

+

+ +
+ + diff --git a/optional-skills/creative/touchdesigner-mcp/SKILL.md b/optional-skills/creative/touchdesigner-mcp/SKILL.md new file mode 100644 index 0000000000..d0bd348afc --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/SKILL.md @@ -0,0 +1,339 @@ +--- +name: touchdesigner-mcp +description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools." +version: 1.0.0 +author: kshitijk4poor +license: MIT +metadata: + hermes: + tags: [TouchDesigner, MCP, twozero, creative-coding, real-time-visuals, generative-art, audio-reactive, VJ, installation, GLSL] + related_skills: [native-mcp, ascii-video, manim-video, hermes-video] + +--- + +# TouchDesigner Integration (twozero MCP) + +## CRITICAL RULES + +1. **NEVER guess parameter names.** Call `td_get_par_info` for the op type FIRST. Your training data is wrong for TD 2025.32. +2. **If `tdAttributeError` fires, STOP.** Call `td_get_operator_info` on the failing node before continuing. +3. **NEVER hardcode absolute paths** in script callbacks. Use `me.parent()` / `scriptOp.parent()`. +4. **Prefer native MCP tools over td_execute_python.** Use `td_create_operator`, `td_set_operator_pars`, `td_get_errors` etc. Only fall back to `td_execute_python` for complex multi-step logic. +5. **Call `td_get_hints` before building.** It returns patterns specific to the op type you're working with. + +## Architecture + +``` +Hermes Agent -> MCP (Streamable HTTP) -> twozero.tox (port 40404) -> TD Python +``` + +36 native tools. Free plugin (no payment/license — confirmed April 2026). +Context-aware (knows selected OP, current network). +Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PID, project name, TD version. + +## Setup (Automated) + +Run the setup script to handle everything: + +```bash +bash "${HERMES_HOME:-$HOME/.hermes}/skills/creative/touchdesigner-mcp/scripts/setup.sh" +``` + +The script will: +1. Check if TD is running +2. Download twozero.tox if not already cached +3. Add `twozero_td` MCP server to Hermes config (if missing) +4. Test the MCP connection on port 40404 +5. Report what manual steps remain (drag .tox into TD, enable MCP toggle) + +### Manual steps (one-time, cannot be automated) + +1. **Drag `~/Downloads/twozero.tox` into the TD network editor** → click Install +2. **Enable MCP:** click twozero icon → Settings → mcp → "auto start MCP" → Yes +3. **Restart Hermes session** to pick up the new MCP server + +After setup, verify: +```bash +nc -z 127.0.0.1 40404 && echo "twozero MCP: READY" +``` + +## Environment Notes + +- **Non-Commercial TD** caps resolution at 1280×1280. Use `outputresolution = 'custom'` and set width/height explicitly. +- **Codecs:** `prores` (preferred on macOS) or `mjpa` as fallback. H.264/H.265/AV1 require a Commercial license. +- Always call `td_get_par_info` before setting params — names vary by TD version (see CRITICAL RULES #1). + +## Workflow + +### Step 0: Discover (before building anything) + +``` +Call td_get_par_info with op_type for each type you plan to use. +Call td_get_hints with the topic you're building (e.g. "glsl", "audio reactive", "feedback"). +Call td_get_focus to see where the user is and what's selected. +Call td_get_network to see what already exists. +``` + +No temp nodes, no cleanup. This replaces the old discovery dance entirely. + +### Step 1: Clean + Build + +**IMPORTANT: Split cleanup and creation into SEPARATE MCP calls.** Destroying and recreating same-named nodes in one `td_execute_python` script causes "Invalid OP object" errors. See pitfalls #11b. + +Use `td_create_operator` for each node (handles viewport positioning automatically): + +``` +td_create_operator(type="noiseTOP", parent="/project1", name="bg", parameters={"resolutionw": 1280, "resolutionh": 720}) +td_create_operator(type="levelTOP", parent="/project1", name="brightness") +td_create_operator(type="nullTOP", parent="/project1", name="out") +``` + +For bulk creation or wiring, use `td_execute_python`: + +```python +# td_execute_python script: +root = op('/project1') +nodes = [] +for name, optype in [('bg', noiseTOP), ('fx', levelTOP), ('out', nullTOP)]: + n = root.create(optype, name) + nodes.append(n.path) +# Wire chain +for i in range(len(nodes)-1): + op(nodes[i]).outputConnectors[0].connect(op(nodes[i+1]).inputConnectors[0]) +result = {'created': nodes} +``` + +### Step 2: Set Parameters + +Prefer the native tool (validates params, won't crash): + +``` +td_set_operator_pars(path="/project1/bg", parameters={"roughness": 0.6, "monochrome": true}) +``` + +For expressions or modes, use `td_execute_python`: + +```python +op('/project1/time_driver').par.colorr.expr = "absTime.seconds % 1000.0" +``` + +### Step 3: Wire + +Use `td_execute_python` — no native wire tool exists: + +```python +op('/project1/bg').outputConnectors[0].connect(op('/project1/fx').inputConnectors[0]) +``` + +### Step 4: Verify + +``` +td_get_errors(path="/project1", recursive=true) +td_get_perf() +td_get_operator_info(path="/project1/out", detail="full") +``` + +### Step 5: Display / Capture + +``` +td_get_screenshot(path="/project1/out") +``` + +Or open a window via script: + +```python +win = op('/project1').create(windowCOMP, 'display') +win.par.winop = op('/project1/out').path +win.par.winw = 1280; win.par.winh = 720 +win.par.winopen.pulse() +``` + +## MCP Tool Quick Reference + +**Core (use these most):** +| Tool | What | +|------|------| +| `td_execute_python` | Run arbitrary Python in TD. Full API access. | +| `td_create_operator` | Create node with params + auto-positioning | +| `td_set_operator_pars` | Set params safely (validates, won't crash) | +| `td_get_operator_info` | Inspect one node: connections, params, errors | +| `td_get_operators_info` | Inspect multiple nodes in one call | +| `td_get_network` | See network structure at a path | +| `td_get_errors` | Find errors/warnings recursively | +| `td_get_par_info` | Get param names for an OP type (replaces discovery) | +| `td_get_hints` | Get patterns/tips before building | +| `td_get_focus` | What network is open, what's selected | + +**Read/Write:** +| Tool | What | +|------|------| +| `td_read_dat` | Read DAT text content | +| `td_write_dat` | Write/patch DAT content | +| `td_read_chop` | Read CHOP channel values | +| `td_read_textport` | Read TD console output | + +**Visual:** +| Tool | What | +|------|------| +| `td_get_screenshot` | Capture one OP viewer to file | +| `td_get_screenshots` | Capture multiple OPs at once | +| `td_get_screen_screenshot` | Capture actual screen via TD | +| `td_navigate_to` | Jump network editor to an OP | + +**Search:** +| Tool | What | +|------|------| +| `td_find_op` | Find ops by name/type across project | +| `td_search` | Search code, expressions, string params | + +**System:** +| Tool | What | +|------|------| +| `td_get_perf` | Performance profiling (FPS, slow ops) | +| `td_list_instances` | List all running TD instances | +| `td_get_docs` | In-depth docs on a TD topic | +| `td_agents_md` | Read/write per-COMP markdown docs | +| `td_reinit_extension` | Reload extension after code edit | +| `td_clear_textport` | Clear console before debug session | + +**Input Automation:** +| Tool | What | +|------|------| +| `td_input_execute` | Send mouse/keyboard to TD | +| `td_input_status` | Poll input queue status | +| `td_input_clear` | Stop input automation | +| `td_op_screen_rect` | Get screen coords of a node | +| `td_click_screen_point` | Click a point in a screenshot | + +See `references/mcp-tools.md` for full parameter schemas. + +## Key Implementation Rules + +**GLSL time:** No `uTDCurrentTime` in GLSL TOP. Use the Values page: +```python +# Call td_get_par_info(op_type="glslTOP") first to confirm param names +td_set_operator_pars(path="/project1/shader", parameters={"value0name": "uTime"}) +# Then set expression via script: +# op('/project1/shader').par.value0.expr = "absTime.seconds" +# In GLSL: uniform float uTime; +``` + +Fallback: Constant TOP in `rgba32float` format (8-bit clamps to 0-1, freezing the shader). + +**Feedback TOP:** Use `top` parameter reference, not direct input wire. "Not enough sources" resolves after first cook. "Cook dependency loop" warning is expected. + +**Resolution:** Non-Commercial caps at 1280×1280. Use `outputresolution = 'custom'`. + +**Large shaders:** Write GLSL to `/tmp/file.glsl`, then use `td_write_dat` or `td_execute_python` to load. + +**Vertex/Point access (TD 2025.32):** `point.P[0]`, `point.P[1]`, `point.P[2]` — NOT `.x`, `.y`, `.z`. + +**Extensions:** `ext0object` format is `"op('./datName').module.ClassName(me)"` in CONSTANT mode. After editing extension code with `td_write_dat`, call `td_reinit_extension`. + +**Script callbacks:** ALWAYS use relative paths via `me.parent()` / `scriptOp.parent()`. + +**Cleaning nodes:** Always `list(root.children)` before iterating + `child.valid` check. + +## Recording / Exporting Video + +```python +# via td_execute_python: +root = op('/project1') +rec = root.create(moviefileoutTOP, 'recorder') +op('/project1/out').outputConnectors[0].connect(rec.inputConnectors[0]) +rec.par.type = 'movie' +rec.par.file = '/tmp/output.mov' +rec.par.videocodec = 'prores' # Apple ProRes — NOT license-restricted on macOS +rec.par.record = True # start +# rec.par.record = False # stop (call separately later) +``` + +H.264/H.265/AV1 need Commercial license. Use `prores` on macOS or `mjpa` as fallback. +Extract frames: `ffmpeg -i /tmp/output.mov -vframes 120 /tmp/frames/frame_%06d.png` + +**TOP.save() is useless for animation** — captures same GPU texture every time. Always use MovieFileOut. + +### Before Recording: Checklist + +1. **Verify FPS > 0** via `td_get_perf`. If FPS=0 the recording will be empty. See pitfalls #38-39. +2. **Verify shader output is not black** via `td_get_screenshot`. Black output = shader error or missing input. See pitfalls #8, #40. +3. **If recording with audio:** cue audio to start first, then delay recording by 3 frames. See pitfalls #19. +4. **Set output path before starting record** — setting both in the same script can race. + +## Audio-Reactive GLSL (Proven Recipe) + +### Correct signal chain (tested April 2026) + +``` +AudioFileIn CHOP (playmode=sequential) + → AudioSpectrum CHOP (FFT=512, outputmenu=setmanually, outlength=256, timeslice=ON) + → Math CHOP (gain=10) + → CHOP to TOP (dataformat=r, layout=rowscropped) + → GLSL TOP input 1 (spectrum texture, 256x2) + +Constant TOP (rgba32float, time) → GLSL TOP input 0 +GLSL TOP → Null TOP → MovieFileOut +``` + +### Critical audio-reactive rules (empirically verified) + +1. **TimeSlice must stay ON** for AudioSpectrum. OFF = processes entire audio file → 24000+ samples → CHOP to TOP overflow. +2. **Set Output Length manually** to 256 via `outputmenu='setmanually'` and `outlength=256`. Default outputs 22050 samples. +3. **DO NOT use Lag CHOP for spectrum smoothing.** Lag CHOP operates in timeslice mode and expands 256 samples to 2400+, averaging all values to near-zero (~1e-06). The shader receives no usable data. This was the #1 audio sync failure in testing. +4. **DO NOT use Filter CHOP either** — same timeslice expansion problem with spectrum data. +5. **Smoothing belongs in the GLSL shader** if needed, via temporal lerp with a feedback texture: `mix(prevValue, newValue, 0.3)`. This gives frame-perfect sync with zero pipeline latency. +6. **CHOP to TOP dataformat = 'r'**, layout = 'rowscropped'. Spectrum output is 256x2 (stereo). Sample at y=0.25 for first channel. +7. **Math gain = 10** (not 5). Raw spectrum values are ~0.19 in bass range. Gain of 10 gives usable ~5.0 for the shader. +8. **No Resample CHOP needed.** Control output size via AudioSpectrum's `outlength` param directly. + +### GLSL spectrum sampling + +```glsl +// Input 0 = time (1x1 rgba32float), Input 1 = spectrum (256x2) +float iTime = texture(sTD2DInputs[0], vec2(0.5)).r; + +// Sample multiple points per band and average for stability: +// NOTE: y=0.25 for first channel (stereo texture is 256x2, first row center is 0.25) +float bass = (texture(sTD2DInputs[1], vec2(0.02, 0.25)).r + + texture(sTD2DInputs[1], vec2(0.05, 0.25)).r) / 2.0; +float mid = (texture(sTD2DInputs[1], vec2(0.2, 0.25)).r + + texture(sTD2DInputs[1], vec2(0.35, 0.25)).r) / 2.0; +float hi = (texture(sTD2DInputs[1], vec2(0.6, 0.25)).r + + texture(sTD2DInputs[1], vec2(0.8, 0.25)).r) / 2.0; +``` + +See `references/network-patterns.md` for complete build scripts + shader code. + +## Operator Quick Reference + +| Family | Color | Python class / MCP type | Suffix | +|--------|-------|-------------|--------| +| TOP | Purple | noiseTOP, glslTOP, compositeTOP, levelTop, blurTOP, textTOP, nullTOP | TOP | +| CHOP | Green | audiofileinCHOP, audiospectrumCHOP, mathCHOP, lfoCHOP, constantCHOP | CHOP | +| SOP | Blue | gridSOP, sphereSOP, transformSOP, noiseSOP | SOP | +| DAT | White | textDAT, tableDAT, scriptDAT, webserverDAT | DAT | +| MAT | Yellow | phongMAT, pbrMAT, glslMAT, constMAT | MAT | +| COMP | Gray | geometryCOMP, containerCOMP, cameraCOMP, lightCOMP, windowCOMP | COMP | + +## Security Notes + +- MCP runs on localhost only (port 40404). No authentication — any local process can send commands. +- `td_execute_python` has unrestricted access to the TD Python environment and filesystem as the TD process user. +- `setup.sh` downloads twozero.tox from the official 404zero.com URL. Verify the download if concerned. +- The skill never sends data outside localhost. All MCP communication is local. + +## References + +| File | What | +|------|------| +| `references/pitfalls.md` | Hard-won lessons from real sessions | +| `references/operators.md` | All operator families with params and use cases | +| `references/network-patterns.md` | Recipes: audio-reactive, generative, GLSL, instancing | +| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas | +| `references/python-api.md` | TD Python: op(), scripting, extensions | +| `references/troubleshooting.md` | Connection diagnostics, debugging | +| `scripts/setup.sh` | Automated setup script | + +--- + +> You're not writing code. You're conducting light. diff --git a/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md new file mode 100644 index 0000000000..ec90076cb2 --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/references/mcp-tools.md @@ -0,0 +1,382 @@ +# twozero MCP Tools Reference + +36 tools from twozero MCP v2.774+ (April 2026). +All tools accept an optional `target_instance` param for multi-TD-instance scenarios. + +## Execution & Scripting + +### td_execute_python + +Execute Python code inside TouchDesigner and return the result. Has full access to TD Python API (op, project, app, etc). Print statements and the last expression value are captured. Best for: wiring connections (inputConnectors), setting expressions (par.X.expr/mode), querying parameter names, and batch creation scripts (5+ operators). For creating 1-4 operators, prefer td_create_operator instead. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `code` | string | yes | Python code to execute in TouchDesigner | + +## Network & Structure + +### td_get_network + +Get the operator network structure in TouchDesigner (TD) at a given path. Returns compact list: name OPType flags. First line is full path of queried op. Flags: ch:N=children count, !cook=allowCooking off, bypass, private=isPrivate, blocked:reason, "comment text". depth=0 (default) = current level only. depth=1 = one level of children (indented). To explore deeper, call again on a specific COMP path. System operators (/ui, /sys) are hidden by default. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | no | Network path to inspect, e.g. '/' or '/project1' | +| `depth` | integer | no | How many levels deep to recurse. 0=current level only (recommended), 1=include direct children of COMPs | +| `includeSystem` | boolean | no | Include system operators (/ui, /sys). Default false. | +| `nodeXY` | boolean | no | Include nodeX,nodeY coordinates. Default false. | + +### td_create_operator + +Create a new operator (node) in TouchDesigner (TD). Preferred way to create operators — handles viewport positioning, viewer flag, and docked ops automatically. For batch creation (5+ ops), you may use td_execute_python with a script instead, but then call td_get_hints('construction') first for correct parameter names and layout rules. Supports all TD operator types: TOP, CHOP, SOP, DAT, COMP, MAT. If parent is omitted, creates in the currently open network at the user's viewport position. When building a container: first create baseCOMP (no parent), then create children with parent=compPath. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `type` | string | yes | Operator type, e.g. 'textDAT', 'constantCHOP', 'noiseTOP', 'transformTOP', 'baseCOMP' | +| `parent` | string | no | Path to the parent operator. If omitted, uses the currently open network in TD. | +| `name` | string | no | Name for the new operator (optional, TD auto-names if omitted) | +| `parameters` | object | no | Key-value pairs of parameters to set on the created operator | + +### td_find_op + +Find operators by name and/or type across the project. Returns TSV: path, OPType, flags. Flags: bypass, !cook, private, blocked:reason. Use td_search to search inside code/expressions; use td_find_op to find operators themselves. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | no | Substring to match in operator name (case-insensitive). E.g. 'noise' finds noise1, noise2, myNoise. | +| `type` | string | no | Substring to match in OPType (case-insensitive). E.g. 'noiseTOP', 'baseCOMP', 'CHOP'. Use exact type for precision or partial for broader matches. | +| `root` | string | no | Root operator path to search from. Default '/project1'. | +| `max_results` | number | no | Maximum results to return. Default 50. | +| `max_depth` | number | no | Max recursion depth from root. Default unlimited. | +| `detail` | `basic` / `summary` | no | Result detail level. 'basic' = name/path/type (fast). 'summary' = + connections, non-default pars, expressions. Default 'basic'. | + +### td_search + +Search for text across all code (DAT scripts), parameter expressions, and string parameter values in the TD project. Returns TSV: path, kind (code/expression/parameter/ref), line, text. JSON when context>0. Words are OR-matched. Use quotes for exact phrases: 'GetLogin "op('login')"'. Use count_only=true to quickly check if something is referenced without fetching full results. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `query` | string | yes | Search query. Multiple words = OR (any match). Wrap in quotes for exact phrase. Example: 'GetLogin getLogin' finds either. | +| `root` | string | no | Root operator path to search from. Default '/project1'. | +| `scope` | `all` / `code` / `editable` / `expressions` / `parameters` | no | What to search. 'code' = DAT scripts only (fast, ~0.05s). 'editable' = only editable code (skips inherited/ref DATs). 'expressions' = parameter expressions only. 'parameters' = string parameter values only. 'all' = everything (slow, ~1.5s due to parameter scan). Default 'all'. | +| `case_sensitive` | boolean | no | Case-sensitive matching. Default false. | +| `max_results` | number | no | Maximum results to return. Default 50. | +| `context` | number | no | Lines to show before/after each code match. Saves td_read_dat calls. Default 0. | +| `count_only` | boolean | no | Return only match count, not results. Fast existence check. | +| `max_depth` | number | no | Max recursion depth from root. Default unlimited. | + +### td_navigate_to + +Navigate the TouchDesigner Network Editor viewport to show a specific operator. Opens the operator's parent network and centers the view on it. Use this to show the user where a problem is, or to navigate to an operator before modifying it. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the operator to navigate to, e.g. '/project1/noise1' | + +## Operator Inspection + +### td_get_operator_info + +Get information about a specific operator (node) in TouchDesigner (TD). detail='summary': connections, non-default pars, expressions, CHOP channels (compact). detail='full': all of the above PLUS every parameter with value/default/label. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Full path to the operator, e.g. '/project1/noise1' | +| `detail` | `summary` / `full` | no | Level of detail. 'summary' = connections, expressions, non-default pars, custom pars (pulse marked), CHOP channels. 'full' = summary + all parameters. Default 'full'. | + +### td_get_operators_info + +Get information about multiple operators in one call. Returns an array of operator info objects. Use instead of calling td_get_operator_info multiple times. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `paths` | array | yes | Array of full operator paths, e.g. ['/project1/null1', '/project1/null2'] | +| `detail` | `summary` / `full` | no | Level of detail. Default 'summary'. | + +### td_get_par_info + +Get parameter names and details for a TouchDesigner operator type. Without specific pars: returns compact list of all parameters with their names, types, and menu options. With pars: returns full details (help text, menu values, style) for specific parameters. Use this when you need to know exact parameter names before setting them. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `op_type` | string | yes | TD operator type name, e.g. 'noiseTOP', 'blurTOP', 'lfoCHOP', 'compositeTOP' | +| `pars` | array | no | Optional list of specific parameter names to get full details for | + +## Parameter Setting + +### td_set_operator_pars + +Set parameters and flags on an operator in TouchDesigner (TD). Safer than td_execute_python for simple parameter changes. Can set values, toggle bypass/viewer, without writing Python code. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the operator | +| `parameters` | object | no | Key-value pairs of parameters to set | +| `bypass` | boolean | no | Set bypass state of the operator (not available on COMPs) | +| `viewer` | boolean | no | Set viewer state of the operator | +| `allowCooking` | boolean | no | Set cooking flag on a COMP. When False, internal network stops cooking (0 CPU). COMP-only. | + +## Data Read/Write + +### td_read_dat + +Read the text content of a DAT operator in TouchDesigner (TD). Returns content with line numbers. Use to read scripts, extensions, GLSL shaders, table data. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the DAT operator | +| `start_line` | integer | no | Start line (1-based). Omit to read from beginning. | +| `end_line` | integer | no | End line (inclusive). Omit to read to end. | + +### td_write_dat + +Write or patch text content of a DAT operator in TouchDesigner (TD). Can do full replacement or StrReplace-style patching (old_text -> new_text). Use for editing scripts, extensions, shaders. Does NOT reinit extensions automatically. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the DAT operator | +| `text` | string | no | Full replacement text. Use this OR old_text+new_text, not both. | +| `old_text` | string | no | Text to find and replace (must be unique in the DAT) | +| `new_text` | string | no | Replacement text | +| `replace_all` | boolean | no | If true, replaces ALL occurrences of old_text (default: false, requires unique match) | + +### td_read_chop + +Read CHOP channel sample data. Returns channel values as arrays. Use when you need the actual sample values (animation curves, lookup tables, waveforms), not just the summary from td_get_operator_info. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the CHOP operator | +| `channels` | array | no | Channel names to read. Omit to read all channels. | +| `start` | integer | no | Start sample index (0-based). Omit to read from beginning. | +| `end` | integer | no | End sample index (inclusive). Omit to read to end. | + +### td_read_textport + +Read the last N lines from the TouchDesigner (TD) log/textport (console output). Use this to see errors, warnings and print output from TD. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `lines` | integer | no | Number of recent lines to return | + +### td_clear_textport + +Clear the MCP textport log buffer. Use this before starting a debug session or an edit-run-check loop to keep td_read_textport output focused and minimal. + +No parameters (other than optional `target_instance`). + +## Visual Capture + +### td_get_screenshot + +Get a screenshot of an operator's viewer in TouchDesigner (TD). Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Shows what the operator looks like in its viewer (TOP output, CHOP waveform graph, SOP geometry, DAT table, parameter UI, etc). Use this to visually inspect any operator, or to generate images via TD for use in your project. TWO-STEP ASYNC USAGE: Step 1 — call with 'path' to start: returns {'status': 'pending', 'requestId': '...'}. Step 2 — call with 'request_id' to retrieve: returns {'file': '/tmp/.../opname_id.jpg'}. Then read the file to see the image. If step 2 still returns pending, make one other tool call then retry. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | no | Full operator path to screenshot, e.g. '/project1/noise1'. Required for step 1. | +| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. | +| `max_size` | integer | no | Max pixel size for the longer side (default 512). Use 0 for original operator resolution (useful for pixel-accurate UI work). Higher values (e.g. 1024) for more detail. | +| `output_path` | string | no | Optional absolute path where the image should be saved (e.g. '/Users/me/project/render.png'). If omitted, saved to /tmp/pisang_mcp/screenshots/. Use absolute paths — TD's working directory may differ from the agent's. | +| `as_top` | boolean | no | If true, captures the operator directly as a TOP (bypasses the viewer renderer), preserving alpha/transparency. Only works for TOP operators — if the target is not a TOP, falls back to the viewer automatically. Use this when you need a clean PNG with alpha, e.g. to save a generated image for use in another project. | +| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). | + +### td_get_screenshots + +Get screenshots of multiple operators in one batch. Saves images to files and returns file paths. Use your file-reading tool to view images. TWO-STEP ASYNC USAGE: Step 1 — call with 'paths' array to start: returns {'status': 'pending', 'batchId': '...', 'total': N}. Step 2 — call with 'batch_id' to retrieve: returns {'files': [{op, file}, ...]}. Then read the files to see the images. If still processing returns {'status': 'pending', 'ready': K, 'total': N}. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `paths` | array | no | List of full operator paths to screenshot. Required for step 1. | +| `batch_id` | string | no | Batch ID from step 1 to retrieve completed screenshots. | +| `max_size` | integer | no | Max pixel size for longer side (default 512). Use 0 for original resolution. | +| `as_top` | boolean | no | If true, captures TOP operators directly (preserves alpha). Non-TOP operators fall back to viewer. | +| `output_dir` | string | no | Optional absolute path to a directory. Each screenshot saved as .jpg or .png inside it and kept on disk. | +| `format` | `auto` / `jpg` / `png` | no | Image format. 'auto' (default): JPEG for viewer mode, PNG for as_top=true. 'jpg': always JPEG (smaller). 'png': always PNG (lossless). | + +### td_get_screen_screenshot + +Capture a screenshot of the actual screen via TD's screenGrabTOP. Saves the image to a file and returns the file path. Use your file-reading tool to view the image. Unlike td_get_screenshot (operator viewer), this shows what the user literally sees on their monitor — TD windows, UI panels, everything. Use when simulating mouse/keyboard input to verify what happened on screen. Workflow: td_get_screen_screenshot → read file → td_input_execute → wait idle → td_get_screen_screenshot again. TWO-STEP ASYNC: Step 1 — call without request_id: returns {'status':'pending','requestId':'...'}. Step 2 — call with request_id: returns {'file': '/tmp/.../screen_id.jpg', 'info': '...metadata...'}. Then read the file to see the image. The requestId also stays usable with td_screen_point_to_global for later coordinate lookup. crop_x/y/w/h are in ACTUAL SCREEN PIXELS (not image pixels). Crops exceeding screen bounds are auto-clamped. SMART DEFAULTS: max_size is auto when omitted — 1920 for full screen (good overview), max(crop_w,crop_h) for cropped (guarantees 1:1 scale). At 1:1 scale: screen_coord = crop_origin + image_pixel. Otherwise use the formula from metadata. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `request_id` | string | no | Request ID from step 1 to retrieve the completed screenshot. | +| `max_size` | integer | no | Max pixel size for the longer side. Auto when omitted: 1920 for full screen, max(crop_w,crop_h) for cropped (1:1). Set explicitly to override. | +| `crop_x` | integer | no | Left edge in screen pixels. | +| `crop_y` | integer | no | Top edge in screen pixels (y=0 at top of screen). | +| `crop_w` | integer | no | Width in pixels. | +| `crop_h` | integer | no | Height in pixels. | +| `display` | integer | no | Screen index (default 0 = primary display). | + +## Context & Focus + +### td_get_focus + +Get the current user focus in TouchDesigner (TD): which network is open, selected operators, current operator, and rollover (what is under the mouse cursor). IMPORTANT: when the user says 'this operator' or 'вот этот', they mean the SELECTED/CURRENT operator, NOT the rollover. Rollover is just incidental mouse position and should be ignored for intent. Pass screenshots=true to immediately start a screenshot batch for all selected operators — response includes a 'screenshots' field with batchId; retrieve with td_get_screenshots(batch_id=...). + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `screenshots` | boolean | no | If true, start a screenshot batch for all selected operators. Retrieve with td_get_screenshots(batch_id=...). | +| `max_size` | integer | no | Max screenshot size when screenshots=true (default 512). | +| `as_top` | boolean | no | Passed to the screenshot batch when screenshots=true. | + +### td_get_errors + +Find errors and warnings in TouchDesigner (TD) operators. Checks operator errors, warnings, AND broken parameter expressions (missing channels, bad references, etc). Also includes recent script errors from the log (tracebacks), grouped and deduplicated — e.g. 1000 identical mouse-move errors shown as ×1000 with one entry. If path is given, checks that operator and its children. If no path, checks the currently open network. Use '/' for entire project. Use when user says something is broken, has errors, red nodes, горит ошибка, etc. TIP: call td_clear_textport before reproducing an error to keep log focused. TIP: combine with td_get_perf when user says 'тупит/лагает' to check both errors and performance. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | no | Path to check. If omitted, checks the current network. Use '/' to scan entire project. | +| `recursive` | boolean | no | Check children recursively (default true) | +| `include_log` | boolean | no | Include recent script errors from log, grouped by unique signature (default true). Use td_clear_textport before reproducing an error to keep results focused. | + +### td_get_perf + +Get performance data from TouchDesigner (TD). Returns TSV: header with fps/budget/memory summary, then slowest operators sorted by cook time. Columns: path, OPType, cpu/cook(ms), gpu/cook(ms), cpu/s, gpu/s, rate, flags. Use when user reports lag, low FPS, slow performance, тупит, тормозит. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | no | Path to profile. If omitted, profiles the current network. Use '/' for entire project. | +| `top` | integer | no | Number of slowest operators to return | + +## Documentation + +### td_get_docs + +Get comprehensive documentation on a TouchDesigner topic. Unlike td_get_hints (compact tips), this returns in-depth reference material. Call without arguments to see available topics with descriptions. Call with a topic name to get the full documentation. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `topic` | string | no | Topic to get docs for. Omit to list available topics. | + +### td_get_hints + +Get TouchDesigner tips and common patterns for a topic. Call this BEFORE creating operators or writing TD Python code to learn correct parameter names, expressions, and idiomatic approaches. Available topics: animation, noise, connections, parameters, scripting, construction, ui_analysis, panel_layout, screenshots, input_simulation, undo. IMPORTANT: always call with topic='construction' before building multi-operator setups to get correct TOP/CHOP parameter names, compositeTOP input ordering, and layout guidelines. IMPORTANT: always call with topic='input_simulation' before using td_input_execute to learn focus recovery, coordinate systems, and testing workflow. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `topic` | string | yes | Topic to get hints for. Available: 'animation', 'noise', 'connections', 'parameters', 'scripting', 'construction', 'ui_analysis', 'panel_layout', 'screenshots', 'input_simulation', 'undo', 'networking', 'all' | + +### td_agents_md + +Read, write, or update the agents_md documentation inside a COMP container. agents_md is a Markdown textDAT describing the container's purpose, structure, and conventions. action='read': returns content + staleness check (compares documented children vs live state). action='update': refreshes auto-generated sections (children list, connections) from live state, preserves human-written sections. action='write': sets full content, creates the DAT if missing. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the COMP container | +| `action` | `read` / `update` / `write` | yes | read=get content+staleness, update=refresh auto sections, write=set content | +| `content` | string | no | Markdown content (only for action='write') | + +## Input Automation + +### td_input_execute + +Send a sequence of mouse/keyboard commands to TouchDesigner. Commands execute sequentially with smooth bezier movement. Returns immediately — poll td_input_status() until status='idle' before proceeding. Command types: 'focus' — bring TD to foreground. 'move' — smooth mouse move: {type,x,y,duration,easing}. 'click' — click: {type,x,y,button,hold,duration,easing}. hold=seconds to hold down. duration=smooth move before click. 'dblclick' — double click: {type,x,y,duration}. 'mousedown'/'mouseup' — {type,x,y,button}. 'key' — keystroke: {type,keys} e.g. 'ctrl+z','tab','escape','shift+f5'. Requires Accessibility permission on Mac. 'type' — human-like typing: {type,text,wpm,variance} — layout-independent Unicode, variable timing. 'wait' — pause: {type,duration}. 'scroll' — {type,x,y,dx,dy,steps} — human-like scroll: moves mouse to (x,y) first, then sends dy (vertical, +up) and dx (horizontal, +right) as multiple ticks with natural timing. steps=4 by default. Mouse commands may include coord_space='logical' (default) or coord_space='physical'. On macOS, 'physical' means actual screen pixels from td_get_screen_screenshot and is converted to CGEvent logical coords automatically. Top-level coord_space applies to commands that do not override it. on_error: 'stop' (default) clears queue on error; 'continue' skips failed command. IMPORTANT: call td_get_hints('input_simulation') before first use to learn focus recovery, coordinate systems, and testing workflow. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `commands` | array | yes | List of command dicts to execute in sequence. | +| `coord_space` | `logical` / `physical` | no | Default coordinate space for mouse commands that do not specify their own coord_space. 'logical' uses CGEvent coords directly. 'physical' uses actual screen pixels from td_get_screen_screenshot and is auto-converted on macOS. | +| `on_error` | `stop` / `continue` | no | What to do on error. Default 'stop'. | + +### td_input_status + +Get current status of the td_input command queue. Poll this after td_input_execute until status='idle'. Returns: status ('idle'/'running'), current command, queue_remaining, last error. + +No parameters (other than optional `target_instance`). + +### td_input_clear + +Clear the td_input command queue and stop current execution immediately. + +No parameters (other than optional `target_instance`). + +### td_op_screen_rect + +Get the screen coordinates of an operator node in the network editor. Returns {x,y,w,h,cx,cy} where cx,cy is the center for clicking. Use this to find where to click on a specific operator. Only works if the operator's parent network is currently open in a network editor pane. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Full path to the operator, e.g. '/project1/myComp/noise1' | + +### td_click_screen_point + +Resolve a point inside a previous td_get_screen_screenshot result and click it. Pass the screenshot request_id plus either normalized u/v or image_x/image_y. Queues a td_input click using physical screen coordinates, so it works directly with screenshot-derived points. Use duration/easing to control the cursor travel before the click. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. | +| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. | +| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. | +| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. | +| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. | +| `button` | `left` / `right` / `middle` | no | Mouse button to click. Default left. | +| `hold` | number | no | Seconds to hold the mouse button down before releasing. | +| `duration` | number | no | Seconds for the cursor to travel to the target before clicking. | +| `easing` | `linear` / `ease-in` / `ease-out` / `ease-in-out` | no | Cursor movement easing for the pre-click travel. | +| `focus` | boolean | no | If true, bring TD to the front before clicking and wait briefly for focus to settle. | + +### td_screen_point_to_global + +Convert a point inside a previous td_get_screen_screenshot result into absolute screen coordinates. Pass the screenshot request_id plus either normalized u/v (0..1 inside that screenshot region) or image_x/image_y in returned image pixels. Returns absolute physical screen coordinates, logical coordinates, and a ready-to-use td_input_execute payload. Metadata is kept for the most recent screen screenshots so multiple agents can resolve points later by request_id. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `request_id` | string | yes | Request ID originally returned by td_get_screen_screenshot. | +| `u` | number | no | Normalized horizontal position inside the screenshot region (0=left, 1=right). Use with v. | +| `v` | number | no | Normalized vertical position inside the screenshot region (0=top, 1=bottom). Use with u. | +| `image_x` | number | no | Horizontal pixel coordinate inside the returned screenshot image. Use with image_y. | +| `image_y` | number | no | Vertical pixel coordinate inside the returned screenshot image. Use with image_x. | + +## System + +### td_list_instances + +List all running TouchDesigner (TD) instances with active MCP servers. Returns port, project name, PID, and instanceId for each instance. Call this at the start of every conversation to discover available instances and choose which one to work with. instanceId is stable for the lifetime of a TD process and is used as target_instance in all other tool calls. + +No parameters (other than optional `target_instance`). + +### td_project_quit + +Save and/or close the current TouchDesigner (TD) project. Can save before closing. Reports if project has unsaved changes. To close a different instance, pass target_instance=instanceId. WARNING: this will shut down the MCP server on that instance. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `save` | boolean | no | Save the project before closing. Default true. | +| `force` | boolean | no | Force close without save dialog. Default false. | + +### td_reinit_extension + +Reinitialize an extension on a COMP in TouchDesigner (TD). Call this AFTER finishing all code edits via td_write_dat to apply changes. Do NOT call after every small edit - batch your changes first. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `path` | string | yes | Path to the COMP with the extension | + +### td_dev_log + +Read the last N entries from the MCP dev log. Only available when Devmode is enabled. Shows request/response history. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `count` | integer | no | Number of recent log entries to return | + +### td_clear_dev_log + +Clear the current MCP dev log by closing the old file and starting a fresh one. Only available when Devmode is enabled. + +No parameters (other than optional `target_instance`). + +### td_test_session + +Manage test sessions, bug reports, and conversation export. IMPORTANT: Do NOT proactively suggest exporting chat or submitting reports. These are tools for specific situations: - export_chat / submit_report: ONLY when the user encounters a BUG with the plugin or TouchDesigner and wants to report it, or when the user explicitly asks to export the conversation. Never suggest this at session end or as routine action. USER PHRASES → ACTIONS: 'разбор тестовых сессий' / 'analyze test sessions' → list, then pull, read meta.json → index.jsonl → calls/. 'разбор репортов' / 'analyze user reports' → list with session='user', then pull by name. 'экспортируй чат' / 'export chat' → (1) export_chat_id → marker, (2) export_chat with session=marker. 'сообщи о проблеме' / 'report bug' → export chat, review for privacy, then submit_report with summary + tags + result_op=file_path. ACTIONS: export_chat_id | export_chat | submit_report | start | note | import_chat | end | list | pull. list: default=auto-detect repo. session='user' for user_reports (dev only). pull: auto-searches both repos. Auto-detects dev vs user Hub access. + +| Param | Type | Required | Description | +|-------|------|----------|-------------| +| `action` | `export_chat_id` / `export_chat` / `submit_report` / `start` / `note` / `import_chat` / `end` / `list` / `pull` | yes | Action: export_chat_id / export_chat / submit_report / start / note / import_chat / end / list / pull | +| `prompt` | string | no | (start) The test prompt/task description | +| `tags` | array | no | (start) Tags for categorization, e.g. ['ui', 'layout'] | +| `text` | string | no | (note) Observation text. (import_chat) Full conversation text. | +| `outcome` | `success` / `partial` / `failure` | no | (end) Result: success / partial / failure | +| `summary` | string | no | (end) Brief summary of what happened | +| `result_op` | string | no | (end) Path to operator to save as result.tox | +| `session` | string | no | (pull) Session name or substring to download | diff --git a/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md new file mode 100644 index 0000000000..cb04fd54d5 --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/references/network-patterns.md @@ -0,0 +1,966 @@ +# TouchDesigner Network Patterns + +Complete network recipes for common creative coding tasks. Each pattern shows the operator chain, MCP tool calls to build it, and key parameter settings. + +## Audio-Reactive Visuals + +### Pattern 1: Audio Spectrum -> Noise Displacement + +Audio drives noise parameters for organic, music-responsive textures. + +``` +Audio File In CHOP -> Audio Spectrum CHOP -> Math CHOP (scale) + | + v (export to noise params) + Noise TOP -> Level TOP -> Feedback TOP -> Composite TOP -> Null TOP (out) + ^ | + |________________| +``` + +**MCP Build Sequence:** + +``` +1. td_create_operator(parent="/project1", type="audiofileinChop", name="audio_in") +2. td_create_operator(parent="/project1", type="audiospectrumChop", name="spectrum") +3. td_create_operator(parent="/project1", type="mathChop", name="spectrum_scale") +4. td_create_operator(parent="/project1", type="noiseTop", name="noise1") +5. td_create_operator(parent="/project1", type="levelTop", name="level1") +6. td_create_operator(parent="/project1", type="feedbackTop", name="feedback1") +7. td_create_operator(parent="/project1", type="compositeTop", name="comp1") +8. td_create_operator(parent="/project1", type="nullTop", name="out") + +9. td_set_operator_pars(path="/project1/audio_in", + properties={"file": "/path/to/music.wav", "play": true}) +10. td_set_operator_pars(path="/project1/spectrum", + properties={"size": 512}) +11. td_set_operator_pars(path="/project1/spectrum_scale", + properties={"gain": 2.0, "postoff": 0.0}) +12. td_set_operator_pars(path="/project1/noise1", + properties={"type": 1, "monochrome": false, "resolutionw": 1280, "resolutionh": 720, + "period": 4.0, "harmonics": 3, "amp": 1.0}) +13. td_set_operator_pars(path="/project1/level1", + properties={"opacity": 0.95, "gamma1": 0.75}) +14. td_set_operator_pars(path="/project1/feedback1", + properties={"top": "/project1/comp1"}) +15. td_set_operator_pars(path="/project1/comp1", + properties={"operand": 0}) + +16. td_execute_python: """ +op('/project1/audio_in').outputConnectors[0].connect(op('/project1/spectrum')) +op('/project1/spectrum').outputConnectors[0].connect(op('/project1/spectrum_scale')) +op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1')) +op('/project1/level1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[0]) +op('/project1/feedback1').outputConnectors[0].connect(op('/project1/comp1').inputConnectors[1]) +op('/project1/comp1').outputConnectors[0].connect(op('/project1/out')) +""" + +17. td_execute_python: """ +# Export spectrum values to drive noise parameters +# This makes the noise react to audio frequencies +op('/project1/noise1').par.seed.expr = "op('/project1/spectrum_scale')['chan1']" +op('/project1/noise1').par.period.expr = "tdu.remap(op('/project1/spectrum_scale')['chan1'].eval(), 0, 1, 1, 8)" +""" +``` + +### Pattern 2: Beat Detection -> Visual Pulses + +Detect beats from audio and trigger visual events. + +``` +Audio Device In CHOP -> Audio Spectrum CHOP -> Math CHOP (isolate bass) + | + Trigger CHOP (envelope) + | + [export to visual params] +``` + +**Key parameter settings:** + +``` +# Isolate bass frequencies (20-200 Hz) +Math CHOP: chanop=1 (Add channels), range1low=0, range1high=10 + (first 10 FFT bins = bass frequencies with 512 FFT at 44100Hz) + +# ADSR envelope on each beat +Trigger CHOP: attack=0.02, peak=1.0, decay=0.3, sustain=0.0, release=0.1 + +# Export to visual: Scale, brightness, or color intensity +td_execute_python: "op('/project1/level1').par.brightness1.expr = \"1.0 + op('/project1/trigger1')['chan1'] * 0.5\"" +``` + +### Pattern 3: Multi-Band Audio -> Multi-Layer Visuals + +Split audio into frequency bands, drive different visual layers per band. + +``` +Audio In -> Spectrum -> Audio Band EQ (3 bands: bass, mid, treble) + | + +---------+---------+ + | | | + Bass Mids Treble + | | | + Noise TOP Circle TOP Text TOP + (slow,dark) (mid,warm) (fast,bright) + | | | + +-----+----+----+----+ + | | + Composite Composite + | + Out +``` + +### Pattern 3b: Audio-Reactive GLSL Fractal (Proven Recipe) + +Complete working recipe. Plays an MP3, runs FFT, feeds spectrum as a texture into a GLSL shader where inner fractal reacts to bass, outer to treble. + +**Network:** +``` +AudioFileIn CHOP → AudioSpectrum CHOP (FFT=512, outlength=256) + → Math CHOP (gain=10) → CHOP To TOP (256x2 spectrum texture, dataformat=r) + ↓ +Constant TOP (time, rgba32float) → GLSL TOP (input 0=time, input 1=spectrum) → Null → MovieFileOut + ↓ +AudioFileIn CHOP → Audio Device Out CHOP Record to .mov +``` + +**Build via td_execute_python (one call per step for reliability):** + +```python +# Step 1: Audio chain +# td_execute_python script: +td_execute_python(code=""" +root = op('/project1') +audio = root.create(audiofileinCHOP, 'audio_in') +audio.par.file = '/path/to/music.mp3' +audio.par.playmode = 0 # Locked to timeline +audio.par.volume = 0.5 + +spec = root.create(audiospectrumCHOP, 'spectrum') +audio.outputConnectors[0].connect(spec.inputConnectors[0]) + +math_n = root.create(mathCHOP, 'math_norm') +spec.outputConnectors[0].connect(math_n.inputConnectors[0]) +math_n.par.gain = 5 # boost signal + +resamp = root.create(resampleCHOP, 'resample_spec') +math_n.outputConnectors[0].connect(resamp.inputConnectors[0]) +resamp.par.timeslice = True +resamp.par.rate = 256 + +chop2top = root.create(choptoTOP, 'spectrum_tex') +chop2top.par.chop = resamp # CHOP To TOP has NO input connectors — use par.chop reference + +# Audio output (hear the music) +aout = root.create(audiodeviceoutCHOP, 'audio_out') +audio.outputConnectors[0].connect(aout.inputConnectors[0]) +result = 'audio chain ok' +""") + +# Step 2: Time driver (MUST be rgba32float — see pitfalls #6) +# td_execute_python script: +td_execute_python(code=""" +root = op('/project1') +td = root.create(constantTOP, 'time_driver') +td.par.format = 'rgba32float' +td.par.outputresolution = 'custom' +td.par.resolutionw = 1 +td.par.resolutionh = 1 +td.par.colorr.expr = "absTime.seconds % 1000.0" +td.par.colorg.expr = "int(absTime.seconds / 1000.0)" +result = 'time ok' +""") + +# Step 3: GLSL shader (write to /tmp, load from file) +# td_execute_python script: +td_execute_python(code=""" +root = op('/project1') +glsl = root.create(glslTOP, 'audio_shader') +glsl.par.outputresolution = 'custom' +glsl.par.resolutionw = 1280 +glsl.par.resolutionh = 720 + +sd = root.create(textDAT, 'shader_code') +sd.text = open('/tmp/my_shader.glsl').read() +glsl.par.pixeldat = sd + +# Wire: input 0 = time, input 1 = spectrum texture +op('/project1/time_driver').outputConnectors[0].connect(glsl.inputConnectors[0]) +op('/project1/spectrum_tex').outputConnectors[0].connect(glsl.inputConnectors[1]) +result = 'glsl ok' +""") + +# Step 4: Output + recorder +# td_execute_python script: +td_execute_python(code=""" +root = op('/project1') +out = root.create(nullTOP, 'output') +op('/project1/audio_shader').outputConnectors[0].connect(out.inputConnectors[0]) + +rec = root.create(moviefileoutTOP, 'recorder') +out.outputConnectors[0].connect(rec.inputConnectors[0]) +rec.par.type = 'movie' +rec.par.file = '/tmp/output.mov' +rec.par.videocodec = 'mjpa' +result = 'output ok' +""") +``` + +**GLSL shader pattern (audio-reactive fractal):** +```glsl +out vec4 fragColor; + +vec3 palette(float t) { + vec3 a = vec3(0.5); vec3 b = vec3(0.5); + vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557); + return a + b * cos(6.28318 * (c * t + d)); +} + +void main() { + // Input 0 = time (1x1 rgba32float constant) + // Input 1 = audio spectrum (256x2 CHOP To TOP, stereo — sample at y=0.25 for first channel) + vec4 td = texture(sTD2DInputs[0], vec2(0.5)); + float t = td.r + td.g * 1000.0; + + vec2 res = uTDOutputInfo.res.zw; + vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y); + vec2 uv0 = uv; + vec3 finalColor = vec3(0.0); + + float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r; + float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r; + + for (float i = 0.0; i < 4.0; i++) { + uv = fract(uv * (1.4 + bass * 0.3)) - 0.5; + float d = length(uv) * exp(-length(uv0)); + + // Sample spectrum at distance: inner=bass, outer=treble + float freq = texture(sTD2DInputs[1], vec2(clamp(d * 0.5, 0.0, 1.0), 0.25)).r; + + vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35); + d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0; + d = abs(d); + d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5); + finalColor += col * d; + } + + // Tone mapping + finalColor = finalColor / (finalColor + vec3(1.0)); + fragColor = TDOutputSwizzle(vec4(finalColor, 1.0)); +} +``` + +**Key insights from testing:** +- `spectrum_tex` (CHOP To TOP) produces a 256x2 texture — x position = frequency, y=0.25 for first channel +- Sampling at `vec2(0.05, 0.0)` gets bass, `vec2(0.65, 0.0)` gets treble +- Sampling based on pixel distance (`d * 0.5`) makes inner fractal react to bass, outer to treble +- `bass * 0.3` in the `fract()` zoom makes the fractal breathe with kicks +- Math CHOP gain of 5 is needed because raw spectrum values are very small + +## Generative Art + +### Pattern 4: Feedback Loop with Transform + +Classic generative technique — texture evolves through recursive transformation. + +``` +Noise TOP -> Composite TOP -> Level TOP -> Null TOP (out) + ^ | + | v + Transform TOP <- Feedback TOP +``` + +**MCP Build Sequence:** + +``` +1. td_create_operator(parent="/project1", type="noiseTop", name="seed_noise") +2. td_create_operator(parent="/project1", type="compositeTop", name="mix") +3. td_create_operator(parent="/project1", type="transformTop", name="evolve") +4. td_create_operator(parent="/project1", type="feedbackTop", name="fb") +5. td_create_operator(parent="/project1", type="levelTop", name="color_correct") +6. td_create_operator(parent="/project1", type="nullTop", name="out") + +7. td_set_operator_pars(path="/project1/seed_noise", + properties={"type": 1, "monochrome": false, "period": 2.0, "amp": 0.3, + "resolutionw": 1280, "resolutionh": 720}) +8. td_set_operator_pars(path="/project1/mix", + properties={"operand": 27}) # 27 = Screen blend +9. td_set_operator_pars(path="/project1/evolve", + properties={"sx": 1.003, "sy": 1.003, "rz": 0.5, "extend": 2}) # slight zoom + rotate, repeat edges +10. td_set_operator_pars(path="/project1/fb", + properties={"top": "/project1/mix"}) +11. td_set_operator_pars(path="/project1/color_correct", + properties={"opacity": 0.98, "gamma1": 0.85}) + +12. td_execute_python: """ +op('/project1/seed_noise').outputConnectors[0].connect(op('/project1/mix').inputConnectors[0]) +op('/project1/fb').outputConnectors[0].connect(op('/project1/evolve')) +op('/project1/evolve').outputConnectors[0].connect(op('/project1/mix').inputConnectors[1]) +op('/project1/mix').outputConnectors[0].connect(op('/project1/color_correct')) +op('/project1/color_correct').outputConnectors[0].connect(op('/project1/out')) +""" +``` + +**Variations:** +- Change Transform: `rz` (rotation), `sx/sy` (zoom), `tx/ty` (drift) +- Change Composite operand: Screen (glow), Add (bright), Multiply (dark) +- Add HSV Adjust in the feedback loop for color evolution +- Add Blur for dreamlike softness +- Replace Noise with a GLSL TOP for custom seed patterns + +### Pattern 5: Instancing (Particle-Like Systems) + +Render thousands of copies of geometry, each with unique position/rotation/scale driven by CHOP data or DATs. + +``` +Table DAT (instance data) -> DAT to CHOP -> Geometry COMP (instancing on) -> Render TOP + + Sphere SOP (template geometry) + + Constant MAT (material) + + Camera COMP + + Light COMP +``` + +**MCP Build Sequence:** + +``` +1. td_create_operator(parent="/project1", type="tableDat", name="instance_data") +2. td_create_operator(parent="/project1", type="geometryComp", name="geo1") +3. td_create_operator(parent="/project1/geo1", type="sphereSop", name="sphere") +4. td_create_operator(parent="/project1", type="constMat", name="mat1") +5. td_create_operator(parent="/project1", type="cameraComp", name="cam1") +6. td_create_operator(parent="/project1", type="lightComp", name="light1") +7. td_create_operator(parent="/project1", type="renderTop", name="render1") + +8. td_execute_python: """ +import random, math +dat = op('/project1/instance_data') +dat.clear() +dat.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb']) +for i in range(500): + angle = i * 0.1 + r = 2 + i * 0.01 + dat.appendRow([ + str(math.cos(angle) * r), + str(math.sin(angle) * r), + str((i - 250) * 0.02), + '0.05', '0.05', '0.05', + str(random.random()), + str(random.random()), + str(random.random()) + ]) +""" + +9. td_set_operator_pars(path="/project1/geo1", + properties={"instancing": true, "instancechop": "", + "instancedat": "/project1/instance_data", + "material": "/project1/mat1"}) +10. td_set_operator_pars(path="/project1/render1", + properties={"camera": "/project1/cam1", "geometry": "/project1/geo1", + "light": "/project1/light1", + "resolutionw": 1280, "resolutionh": 720}) +11. td_set_operator_pars(path="/project1/cam1", + properties={"tz": 10}) +``` + +### Pattern 6: Reaction-Diffusion (GLSL) + +Classic Gray-Scott reaction-diffusion system running on the GPU. + +``` +Text DAT (GLSL code) -> GLSL TOP (resolution, dat reference) -> Feedback TOP + ^ | + |_______________________________________| + Level TOP (out) +``` + +**Key GLSL code (write to Text DAT via td_execute_python):** + +```glsl +// Gray-Scott reaction-diffusion +uniform float feed; // 0.037 +uniform float kill; // 0.06 +uniform float dA; // 1.0 +uniform float dB; // 0.5 + +layout(location = 0) out vec4 fragColor; + +void main() { + vec2 uv = vUV.st; + vec2 texel = 1.0 / uTDOutputInfo.res.zw; + + vec4 c = texture(sTD2DInputs[0], uv); + float a = c.r; + float b = c.g; + + // Laplacian (9-point stencil) + float lA = 0.0, lB = 0.0; + for(int dx = -1; dx <= 1; dx++) { + for(int dy = -1; dy <= 1; dy++) { + float w = (dx == 0 && dy == 0) ? -1.0 : (abs(dx) + abs(dy) == 1 ? 0.2 : 0.05); + vec4 s = texture(sTD2DInputs[0], uv + vec2(dx, dy) * texel); + lA += s.r * w; + lB += s.g * w; + } + } + + float reaction = a * b * b; + float newA = a + (dA * lA - reaction + feed * (1.0 - a)); + float newB = b + (dB * lB + reaction - (kill + feed) * b); + + fragColor = vec4(clamp(newA, 0.0, 1.0), clamp(newB, 0.0, 1.0), 0.0, 1.0); +} +``` + +## Video Processing + +### Pattern 7: Video Effects Chain + +Apply a chain of effects to a video file. + +``` +Movie File In TOP -> HSV Adjust TOP -> Level TOP -> Blur TOP -> Composite TOP -> Null TOP (out) + ^ + Text TOP ---+ +``` + +**MCP Build Sequence:** + +``` +1. td_create_operator(parent="/project1", type="moviefileinTop", name="video_in") +2. td_create_operator(parent="/project1", type="hsvadjustTop", name="color") +3. td_create_operator(parent="/project1", type="levelTop", name="levels") +4. td_create_operator(parent="/project1", type="blurTop", name="blur") +5. td_create_operator(parent="/project1", type="compositeTop", name="overlay") +6. td_create_operator(parent="/project1", type="textTop", name="title") +7. td_create_operator(parent="/project1", type="nullTop", name="out") + +8. td_set_operator_pars(path="/project1/video_in", + properties={"file": "/path/to/video.mp4", "play": true}) +9. td_set_operator_pars(path="/project1/color", + properties={"hueoffset": 0.1, "saturationmult": 1.3}) +10. td_set_operator_pars(path="/project1/levels", + properties={"brightness1": 1.1, "contrast": 1.2, "gamma1": 0.9}) +11. td_set_operator_pars(path="/project1/blur", + properties={"sizex": 2, "sizey": 2}) +12. td_set_operator_pars(path="/project1/title", + properties={"text": "My Video", "fontsizex": 48, "alignx": 1, "aligny": 1}) + +13. td_execute_python: """ +chain = ['video_in', 'color', 'levels', 'blur'] +for i in range(len(chain) - 1): + op(f'/project1/{chain[i]}').outputConnectors[0].connect(op(f'/project1/{chain[i+1]}')) +op('/project1/blur').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[0]) +op('/project1/title').outputConnectors[0].connect(op('/project1/overlay').inputConnectors[1]) +op('/project1/overlay').outputConnectors[0].connect(op('/project1/out')) +""" +``` + +### Pattern 8: Video Recording + +Record the output to a file. **H.264/H.265 require a Commercial license** — use Motion JPEG (`mjpa`) on Non-Commercial. + +``` +[any TOP chain] -> Null TOP -> Movie File Out TOP +``` + +```python +# Build via td_execute_python: +root = op('/project1') + +# Always put a Null TOP before the recorder +null_out = root.op('out') # or create one +rec = root.create(moviefileoutTOP, 'recorder') +null_out.outputConnectors[0].connect(rec.inputConnectors[0]) + +rec.par.type = 'movie' +rec.par.file = '/tmp/output.mov' +rec.par.videocodec = 'mjpa' # Motion JPEG — works on Non-Commercial + +# Start recording (par.record is a toggle — .record() method may not exist) +rec.par.record = True +# ... let TD run for desired duration ... +rec.par.record = False + +# For image sequences: +# rec.par.type = 'imagesequence' +# rec.par.imagefiletype = 'png' +# rec.par.file.expr = "'/tmp/frames/out' + me.fileSuffix" # fileSuffix REQUIRED +``` + +**Pitfalls:** +- Setting `par.file` + `par.record = True` in the same script may race — use `run("...", delayFrames=2)` +- `TOP.save()` called rapidly always captures the same frame — use MovieFileOut for animation +- See `pitfalls.md` #25-27 for full details + +### Pattern 8b: TD → External Pipeline (FFmpeg / Python / Post-Processing) + +Export TD visuals for use in another tool (ffmpeg, Python, ASCII art, etc.). This is the standard workflow when you need to composite TD output with external processing (ASCII conversion, Python shader chains, ML inference, etc.). + +**Step 1: Record to video in TD** + +```python +# Preferred: ProRes on macOS (lossless, Non-Commercial OK, ~55MB/s at 1280x720) +rec.par.videocodec = 'prores' +# Fallback for non-macOS: mjpa (Motion JPEG) +# rec.par.videocodec = 'mjpa' +rec.par.record = True +# ... wait N seconds ... +rec.par.record = False +``` + +**Step 2: Extract frames with ffmpeg** + +```bash +# Extract all frames at 30fps +ffmpeg -y -i /tmp/output.mov -vf 'fps=30' /tmp/frames/frame_%06d.png + +# Or extract a specific duration +ffmpeg -y -i /tmp/output.mov -t 25 -vf 'fps=30' /tmp/frames/frame_%06d.png + +# Or extract specific frame range +ffmpeg -y -i /tmp/output.mov -vf 'select=between(n\,0\,749)' -vsync vfr /tmp/frames/frame_%06d.png +``` + +**Step 3: Process frames in Python** + +```python +from PIL import Image +import os + +frames_dir = '/tmp/frames' +output_dir = '/tmp/processed' +os.makedirs(output_dir, exist_ok=True) + +for fname in sorted(os.listdir(frames_dir)): + if not fname.endswith('.png'): + continue + img = Image.open(os.path.join(frames_dir, fname)) + # ... apply your processing ... + img.save(os.path.join(output_dir, fname)) +``` + +**Step 4: Mux processed frames back with audio** + +```bash +# Create video from processed frames + audio with fade-out +ffmpeg -y \ + -framerate 30 -i /tmp/processed/frame_%06d.png \ + -i /tmp/audio.mp3 \ + -c:v libx264 -pix_fmt yuv420p -crf 18 \ + -c:a aac -b:a 192k \ + -shortest \ + -af 'afade=t=out:st=23:d=2' \ + /tmp/final_output.mp4 +``` + +**Key considerations:** +- Use ProRes for the TD recording step to avoid generation loss during compositing +- Extract at the target output framerate (not TD's render framerate) +- For audio-synced content, analyze the audio file separately in Python (scipy FFT) to get per-frame features (rms, spectral bands, beats) and drive compositing parameters +- Always verify TD FPS > 0 before recording (see pitfalls #37, #38) + +## Data Visualization + +### Pattern 9: Table Data -> Bar Chart via Instancing + +Visualize tabular data as a 3D bar chart. + +``` +Table DAT (data) -> Script DAT (transform to instance format) -> DAT to CHOP + | +Box SOP -> Geometry COMP (instancing from CHOP) -> Render TOP -> Null TOP (out) + + PBR MAT + + Camera COMP + + Light COMP +``` + +```python +# Script DAT code to transform data to instance positions +td_execute_python: """ +source = op('/project1/data_table') +instance = op('/project1/instance_transform') +instance.clear() +instance.appendRow(['tx', 'ty', 'tz', 'sx', 'sy', 'sz', 'cr', 'cg', 'cb']) + +for i in range(1, source.numRows): + value = float(source[i, 'value']) + name = source[i, 'name'] + instance.appendRow([ + str(i * 1.5), # x position (spread bars) + str(value / 2), # y position (center bar vertically) + '0', # z position + '1', str(value), '1', # scale (height = data value) + '0.2', '0.6', '1.0' # color (blue) + ]) +""" +``` + +### Pattern 9b: Audio-Reactive GLSL Fractal (Proven Recipe) + +Audio spectrum drives a GLSL fractal shader directly via a spectrum texture input. Bass thickens inner fractal lines, mids twist rotation, highs light outer edges. **Always run discovery (SKILL.md Step 0) before using any param names from these recipes — they may differ in your TD version.** + +``` +Audio File In CHOP → Audio Spectrum CHOP (FFT=512, outlength=256) + → Math CHOP (gain=10) + → CHOP To TOP (spectrum texture, 256x2, dataformat=r) + ↓ (input 1) +Constant TOP (rgba32float, time) → GLSL TOP (audio-reactive shader) → Null TOP + (input 0) ↑ + Text DAT (shader code) +``` + +**Build via td_execute_python (complete working script):** + +```python +# td_execute_python script: +td_execute_python(code=""" +import os +root = op('/project1') + +# Audio input +audio = root.create(audiofileinCHOP, 'audio_in') +audio.par.file = '/path/to/music.mp3' +audio.par.playmode = 0 # Locked to timeline + +# FFT analysis (output length manually set to 256 bins) +spectrum = root.create(audiospectrumCHOP, 'spectrum') +audio.outputConnectors[0].connect(spectrum.inputConnectors[0]) +spectrum.par.fftsize = '512' +spectrum.par.outputmenu = 'setmanually' +spectrum.par.outlength = 256 + +# THEN boost gain on the raw spectrum (NO Lag CHOP — see pitfall #34) +math = root.create(mathCHOP, 'math_norm') +spectrum.outputConnectors[0].connect(math.inputConnectors[0]) +math.par.gain = 10 + +# Spectrum → texture (256x2 image — stereo, sample at y=0.25 for first channel) +# NOTE: choptoTOP has NO input connectors — use par.chop reference! +spec_tex = root.create(choptoTOP, 'spectrum_tex') +spec_tex.par.chop = math +spec_tex.par.dataformat = 'r' +spec_tex.par.layout = 'rowscropped' + +# Time driver (rgba32float to avoid 0-1 clamping!) +time_drv = root.create(constantTOP, 'time_driver') +time_drv.par.format = 'rgba32float' +time_drv.par.outputresolution = 'custom' +time_drv.par.resolutionw = 1 +time_drv.par.resolutionh = 1 +time_drv.par.colorr.expr = "absTime.seconds % 1000.0" +time_drv.par.colorg.expr = "int(absTime.seconds / 1000.0)" + +# GLSL shader +glsl = root.create(glslTOP, 'audio_shader') +glsl.par.outputresolution = 'custom' +glsl.par.resolutionw = 1280; glsl.par.resolutionh = 720 + +shader_dat = root.create(textDAT, 'shader_code') +shader_dat.text = open('/tmp/shader.glsl').read() +glsl.par.pixeldat = shader_dat + +# Wire: input 0=time, input 1=spectrum +time_drv.outputConnectors[0].connect(glsl.inputConnectors[0]) +spec_tex.outputConnectors[0].connect(glsl.inputConnectors[1]) + +# Output + audio playback +out = root.create(nullTOP, 'output') +glsl.outputConnectors[0].connect(out.inputConnectors[0]) +audio_out = root.create(audiodeviceoutCHOP, 'audio_out') +audio.outputConnectors[0].connect(audio_out.inputConnectors[0]) + +result = 'network built' +""") +``` + +**GLSL shader (reads spectrum from input 1 texture):** + +```glsl +out vec4 fragColor; + +vec3 palette(float t) { + vec3 a = vec3(0.5); vec3 b = vec3(0.5); + vec3 c = vec3(1.0); vec3 d = vec3(0.263, 0.416, 0.557); + return a + b * cos(6.28318 * (c * t + d)); +} + +void main() { + vec4 td = texture(sTD2DInputs[0], vec2(0.5)); + float t = td.r + td.g * 1000.0; + + vec2 res = uTDOutputInfo.res.zw; + vec2 uv = (gl_FragCoord.xy * 2.0 - res) / min(res.x, res.y); + vec2 uv0 = uv; + vec3 finalColor = vec3(0.0); + + float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r; + float mids = texture(sTD2DInputs[1], vec2(0.25, 0.25)).r; + float highs = texture(sTD2DInputs[1], vec2(0.65, 0.25)).r; + + float ca = cos(t * (0.15 + mids * 0.3)); + float sa = sin(t * (0.15 + mids * 0.3)); + uv = mat2(ca, -sa, sa, ca) * uv; + + for (float i = 0.0; i < 4.0; i++) { + uv = fract(uv * (1.4 + bass * 0.3)) - 0.5; + float d = length(uv) * exp(-length(uv0)); + float freq = texture(sTD2DInputs[1], vec2(clamp(d*0.5, 0.0, 1.0), 0.25)).r; + vec3 col = palette(length(uv0) + i * 0.4 + t * 0.35); + d = sin(d * (7.0 + bass * 4.0) + t * 1.5) / 8.0; + d = abs(d); + d = pow(0.012 / d, 1.2 + freq * 0.8 + bass * 0.5); + finalColor += col * d; + } + + float glow = (0.03 + bass * 0.05) / (length(uv0) + 0.03); + finalColor += vec3(0.4, 0.1, 0.7) * glow * (0.6 + 0.4 * sin(t * 2.5)); + + float ring = abs(length(uv0) - 0.4 - mids * 0.3); + finalColor += vec3(0.1, 0.6, 0.8) * (0.005 / ring) * (0.2 + highs * 0.5); + + finalColor *= smoothstep(0.0, 1.0, 1.0 - dot(uv0*0.55, uv0*0.55)); + finalColor = finalColor / (finalColor + vec3(1.0)); + + fragColor = TDOutputSwizzle(vec4(finalColor, 1.0)); +} +``` + +**How spectrum sampling drives the visual:** +- `texture(sTD2DInputs[1], vec2(x, 0.0)).r` — x position = frequency (0=bass, 1=treble) +- Inner fractal iterations sample lower x → react to bass +- Outer iterations sample higher x → react to treble +- `bass * 0.3` on `fract()` scale → fractal zoom pulses with bass +- `bass * 4.0` on sin frequency → line density pulses with bass +- `mids * 0.3` on rotation speed → spiral twists faster during vocal/mid sections +- `highs * 0.5` on ring opacity → high-frequency sparkle on outer ring + +**Recording the output:** Use MovieFileOut TOP with `mjpa` codec (H.264 requires Commercial license). See pitfalls #25-27. + +## GLSL Shaders + +### Pattern 10: Custom Fragment Shader + +Write a custom visual effect as a GLSL fragment shader. + +``` +Text DAT (shader code) -> GLSL TOP -> Level TOP -> Null TOP (out) + + optional input TOPs for texture sampling +``` + +**Common GLSL uniforms available in TouchDesigner:** + +```glsl +// Automatically provided by TD +uniform vec4 uTDOutputInfo; // .res.zw = resolution + +// NOTE: uTDCurrentTime does NOT exist in TD 099! +// Feed time via a 1x1 Constant TOP (format=rgba32float): +// t.par.colorr.expr = "absTime.seconds % 1000.0" +// t.par.colorg.expr = "int(absTime.seconds / 1000.0)" +// Then read in GLSL: +// vec4 td = texture(sTD2DInputs[0], vec2(0.5)); +// float t = td.r + td.g * 1000.0; + +// Input textures (from connected TOP inputs) +uniform sampler2D sTD2DInputs[1]; // array of input samplers + +// From vertex shader +in vec3 vUV; // UV coordinates (0-1 range) +``` + +**Example: Plasma shader (using time from input texture)** + +```glsl +layout(location = 0) out vec4 fragColor; + +void main() { + vec2 uv = vUV.st; + // Read time from Constant TOP input 0 (rgba32float format) + vec4 td = texture(sTD2DInputs[0], vec2(0.5)); + float t = td.r + td.g * 1000.0; + + float v1 = sin(uv.x * 10.0 + t); + float v2 = sin(uv.y * 10.0 + t * 0.7); + float v3 = sin((uv.x + uv.y) * 10.0 + t * 1.3); + float v4 = sin(length(uv - 0.5) * 20.0 - t * 2.0); + + float v = (v1 + v2 + v3 + v4) * 0.25; + + vec3 color = vec3( + sin(v * 3.14159 + 0.0) * 0.5 + 0.5, + sin(v * 3.14159 + 2.094) * 0.5 + 0.5, + sin(v * 3.14159 + 4.189) * 0.5 + 0.5 + ); + + fragColor = vec4(color, 1.0); +} +``` + +### Pattern 11: Multi-Pass GLSL (Ping-Pong) + +For effects needing state across frames (particles, fluid, cellular automata), use GLSL Multi TOP with multiple passes or a Feedback TOP loop. + +``` +GLSL Multi TOP (pass 0: simulation, pass 1: rendering) + + Text DAT (simulation shader) + + Text DAT (render shader) + -> Level TOP -> Null TOP (out) + ^ + |__ Feedback TOP (feeds simulation state back) +``` + +## Interactive Installations + +### Pattern 12: Mouse/Touch -> Visual Response + +``` +Mouse In CHOP -> Math CHOP (normalize to 0-1) -> [export to visual params] + +# Or for touch/multi-touch: +Multi Touch In DAT -> Script CHOP (parse touches) -> [export to visual params] +``` + +```python +# Normalize mouse position to 0-1 range +td_execute_python: """ +op('/project1/noise1').par.offsetx.expr = "op('/project1/mouse_norm')['tx']" +op('/project1/noise1').par.offsety.expr = "op('/project1/mouse_norm')['ty']" +""" +``` + +### Pattern 13: OSC Control (from external software) + +``` +OSC In CHOP (port 7000) -> Select CHOP (pick channels) -> [export to visual params] +``` + +``` +1. td_create_operator(parent="/project1", type="oscinChop", name="osc_in") +2. td_set_operator_pars(path="/project1/osc_in", properties={"port": 7000}) + +# OSC messages like /frequency 440 will appear as channel "frequency" with value 440 +# Export to any parameter: +3. td_execute_python: "op('/project1/noise1').par.period.expr = \"op('/project1/osc_in')['frequency']\"" +``` + +### Pattern 14: MIDI Control (DJ/VJ) + +``` +MIDI In CHOP (device) -> Select CHOP -> [export channels to visual params] +``` + +Common MIDI mappings: +- CC channels (knobs/faders): continuous 0-127, map to float params +- Note On/Off: binary triggers, map to Trigger CHOP for envelopes +- Velocity: intensity/brightness + +## Live Performance + +### Pattern 15: Multi-Source VJ Setup + +``` +Source A (generative) ----+ +Source B (video) ---------+-- Switch/Cross TOP -- Level TOP -- Window COMP (output) +Source C (camera) --------+ + ^ + MIDI/OSC control selects active source and crossfade +``` + +```python +# MIDI CC1 controls which source is active (0-127 -> 0-2) +td_execute_python: """ +op('/project1/switch1').par.index.expr = "int(op('/project1/midi_in')['cc1'] / 42)" +""" + +# MIDI CC2 controls crossfade between current and next +td_execute_python: """ +op('/project1/cross1').par.cross.expr = "op('/project1/midi_in')['cc2'] / 127.0" +""" +``` + +### Pattern 16: Projection Mapping + +``` +Content TOPs ----+ + | +Stoner TOP (UV mapping) -> Composite TOP -> Window COMP (projector output) + or +Kantan Mapper COMP (external .tox) +``` + +For projection mapping, the key is: +1. Create your visual content as standard TOPs +2. Use Stoner TOP or a third-party mapping tool to UV-map content to physical surfaces +3. Output via Window COMP to the projector + +### Pattern 17: Cue System + +``` +Table DAT (cue list: cue_number, scene_name, duration, transition_type) + | +Script CHOP (cue state: current_cue, progress, next_cue_trigger) + | +[export to Switch/Cross TOPs to transition between scenes] +``` + +```python +td_execute_python: """ +# Simple cue system +cue_table = op('/project1/cue_list') +cue_state = op('/project1/cue_state') + +def advance_cue(): + current = int(cue_state.par.value0.val) + next_cue = min(current + 1, cue_table.numRows - 1) + cue_state.par.value0.val = next_cue + + scene = cue_table[next_cue, 'scene'] + duration = float(cue_table[next_cue, 'duration']) + + # Set crossfade target and duration + op('/project1/cross1').par.cross.val = 0 + # Animate cross to 1.0 over duration seconds + # (use a Timer CHOP or LFO CHOP for smooth animation) +""" +``` + +## Networking + +### Pattern 18: OSC Server/Client + +``` +# Sending OSC +OSC Out CHOP -> (network) -> external application + +# Receiving OSC +(network) -> OSC In CHOP -> Select CHOP -> [use values] +``` + +### Pattern 19: NDI Video Streaming + +``` +# Send video over network +[any TOP chain] -> NDI Out TOP (source name) + +# Receive video from network +NDI In TOP (select source) -> [process as normal TOP] +``` + +### Pattern 20: WebSocket Communication + +``` +WebSocket DAT -> Script DAT (parse JSON messages) -> [update visuals] +``` + +```python +td_execute_python: """ +ws = op('/project1/websocket1') +ws.par.address = 'ws://localhost:8080' +ws.par.active = True + +# In a DAT Execute callback (Script DAT watching WebSocket DAT): +# def onTableChange(dat): +# import json +# msg = json.loads(dat.text) +# op('/project1/noise1').par.seed.val = msg.get('seed', 0) +""" +``` diff --git a/optional-skills/creative/touchdesigner-mcp/references/operators.md b/optional-skills/creative/touchdesigner-mcp/references/operators.md new file mode 100644 index 0000000000..6aa716cb9a --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/references/operators.md @@ -0,0 +1,239 @@ +# TouchDesigner Operator Reference + +## Operator Families Overview + +TouchDesigner has 6 operator families. Each family processes a specific data type and is color-coded in the UI. Operators can only connect to others of the SAME family (with cross-family converters as the bridge). + +## TOPs — Texture Operators (Purple) + +2D image/texture processing on the GPU. The workhorse of visual output. + +### Generators (create images from nothing) + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Noise TOP | `noiseTop` | `type` (0-6), `monochrome`, `seed`, `period`, `harmonics`, `exponent`, `amp`, `offset`, `resolutionw/h` | Procedural noise textures — Perlin, Simplex, Sparse, etc. Foundation of generative art. | +| Constant TOP | `constantTop` | `colorr/g/b/a`, `resolutionw/h` | Solid color. Use as background or blend input. | +| Text TOP | `textTop` | `text`, `fontsizex`, `fontfile`, `alignx/y`, `colorr/g/b` | Render text to texture. Supports multi-line, word wrap. | +| Ramp TOP | `rampTop` | `type` (0=horizontal, 1=vertical, 2=radial, 3=circular), `phase`, `period` | Gradient textures for masking, color mapping. | +| Circle TOP | `circleTop` | `radiusx/y`, `centerx/y`, `width` | Circles, rings, ellipses. | +| Rectangle TOP | `rectangleTop` | `sizex/y`, `centerx/y`, `softness` | Rectangles with optional softness. | +| GLSL TOP | `glslTop` | `dat` (points to shader DAT), `resolutionw/h`, `outputformat`, custom uniforms | Custom fragment shaders. Most powerful TOP for custom visuals. | +| GLSL Multi TOP | `glslmultiTop` | `dat`, `numinputs`, `numoutputs`, `numcomputepasses` | Multi-pass GLSL with compute shaders. Advanced. | +| Render TOP | `renderTop` | `camera`, `geometry`, `lights`, `resolutionw/h` | Renders 3D scenes (SOPs + MATs + Camera/Light COMPs). | + +### Filters (modify a single input) + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Level TOP | `levelTop` | `opacity`, `brightness1/2`, `gamma1/2`, `contrast`, `invert`, `blacklevel/whitelevel` | Brightness, contrast, gamma, levels. Essential color correction. | +| Blur TOP | `blurTop` | `sizex/y`, `type` (0=Gaussian, 1=Box, 2=Bartlett) | Gaussian/box blur. | +| Transform TOP | `transformTop` | `tx/ty`, `sx/sy`, `rz`, `pivotx/y`, `extend` (0=Hold, 1=Zero, 2=Repeat, 3=Mirror) | Translate, scale, rotate textures. | +| HSV Adjust TOP | `hsvadjustTop` | `hueoffset`, `saturationmult`, `valuemult` | HSV color adjustments. | +| Lookup TOP | `lookupTop` | (input: texture + lookup table) | Color remapping via lookup table texture. | +| Edge TOP | `edgeTop` | `type` (0=Sobel, 1=Frei-Chen) | Edge detection. | +| Displace TOP | `displaceTop` | `scalex/y` | Pixel displacement using a second input as displacement map. | +| Flip TOP | `flipTop` | `flipx`, `flipy`, `flop` (diagonal) | Mirror/flip textures. | +| Crop TOP | `cropTop` | `cropleft/right/top/bottom` | Crop region of texture. | +| Resolution TOP | `resolutionTop` | `resolutionw/h`, `outputresolution` | Resize textures. | +| Null TOP | `nullTop` | (none significant) | Pass-through. Use for organization, referencing, feedback delay. | +| Cache TOP | `cacheTop` | `length`, `step` | Store N frames of history. Useful for trails, time effects. | + +### Compositors (combine multiple inputs) + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Composite TOP | `compositeTop` | `operand` (0-31: Over, Add, Multiply, Screen, etc.) | Blend two textures with standard compositing modes. | +| Over TOP | `overTop` | (simple alpha compositing) | Layer with alpha. Simpler than Composite. | +| Add TOP | `addTop` | (additive blend) | Additive blending. Great for glow, light effects. | +| Multiply TOP | `multiplyTop` | (multiplicative blend) | Multiply blend. Good for masking, darkening. | +| Switch TOP | `switchTop` | `index` (0-based) | Switch between multiple inputs by index. | +| Cross TOP | `crossTop` | `cross` (0.0-1.0) | Crossfade between two inputs. | + +### I/O (input/output) + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Movie File In TOP | `moviefileinTop` | `file`, `speed`, `trim`, `index` | Load video files, image sequences. | +| Movie File Out TOP | `moviefileoutTop` | `file`, `type` (codec), `record` (toggle) | Record/export video files. | +| NDI In TOP | `ndiinTop` | `sourcename` | Receive NDI video streams. | +| NDI Out TOP | `ndioutTop` | `sourcename` | Send NDI video streams. | +| Syphon Spout In/Out TOP | `syphonspoutinTop` / `syphonspoutoutTop` | `servername` | Inter-app texture sharing. | +| Video Device In TOP | `videodeviceinTop` | `device` | Webcam/capture card input. | +| Feedback TOP | `feedbackTop` | `top` (path to the TOP to feed back) | One-frame delay feedback. Essential for recursive effects. | + +### Converters + +| Operator | Type Name | Direction | Use | +|----------|-----------|-----------|-----| +| CHOP to TOP | `choptopTop` | CHOP -> TOP | Visualize channel data as texture (waveform, spectrum display). | +| TOP to CHOP | `topchopChop` | TOP -> CHOP | Sample texture pixels as channel data. | + +## CHOPs — Channel Operators (Green) + +Time-varying numeric data: audio, animation curves, sensor data, control signals. + +### Generators + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Constant CHOP | `constantChop` | `name0/value0`, `name1/value1`... | Static named channels. Control panel for parameters. | +| LFO CHOP | `lfoChop` | `frequency`, `type` (0=Sin, 1=Tri, 2=Square, 3=Ramp, 4=Pulse), `amp`, `offset`, `phase` | Low frequency oscillator. Animation driver. | +| Noise CHOP | `noiseChop` | `type`, `roughness`, `period`, `amp`, `seed`, `channels` | Smooth random motion. Organic animation. | +| Pattern CHOP | `patternChop` | `type` (0=Sine, 1=Triangle, ...), `length`, `cycles` | Generate waveform patterns. | +| Timer CHOP | `timerChop` | `length`, `play`, `cue`, `cycles` | Countdown/count-up timer with cue points. | +| Count CHOP | `countChop` | `threshold`, `limittype`, `limitmin/max` | Event counter with wrapping/clamping. | + +### Audio + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Audio File In CHOP | `audiofileinChop` | `file`, `volume`, `play`, `speed`, `trim` | Play audio files. | +| Audio Device In CHOP | `audiodeviceinChop` | `device`, `channels` | Live microphone/line input. | +| Audio Spectrum CHOP | `audiospectrumChop` | `size` (FFT size), `outputformat` (0=Power, 1=Magnitude) | FFT frequency analysis. | +| Audio Band EQ CHOP | `audiobandeqChop` | `bands`, `gaindb` per band | Frequency band isolation. | +| Audio Device Out CHOP | `audiodeviceoutChop` | `device` | Audio playback output. | + +### Math/Logic + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Math CHOP | `mathChop` | `preoff`, `gain`, `postoff`, `chanop` (0=Off, 1=Add, 2=Subtract, 3=Multiply...) | Math operations on channels. The Swiss army knife. | +| Logic CHOP | `logicChop` | `preop` (0=Off, 1=AND, 2=OR, 3=XOR, 4=NAND), `convert` | Boolean logic on channels. | +| Filter CHOP | `filterChop` | `type` (0=Low Pass, 1=Band Pass, 2=High Pass, 3=Notch), `cutofffreq`, `filterwidth` | Smooth, dampen, filter signals. | +| Lag CHOP | `lagChop` | `lag1/2`, `overshoot1/2` | Smooth transitions with overshoot. | +| Limit CHOP | `limitChop` | `type` (0=Clamp, 1=Loop, 2=ZigZag), `min/max` | Clamp or wrap channel values. | +| Speed CHOP | `speedChop` | (none significant) | Integrate values (velocity to position, acceleration to velocity). | +| Trigger CHOP | `triggerChop` | `attack`, `peak`, `decay`, `sustain`, `release` | ADSR envelope from trigger events. | +| Select CHOP | `selectChop` | `chop` (path), `channames` | Reference channels from another CHOP. | +| Merge CHOP | `mergeChop` | `align` (0=Extend, 1=Trim to First, 2=Trim to Shortest) | Combine channels from multiple CHOPs. | +| Null CHOP | `nullChop` | (none significant) | Pass-through for organization and referencing. | + +### Input Devices + +| Operator | Type Name | Use | +|----------|-----------|-----| +| Mouse In CHOP | `mouseinChop` | Mouse position, buttons, wheel. | +| Keyboard In CHOP | `keyboardinChop` | Keyboard key states. | +| MIDI In CHOP | `midiinChop` | MIDI note/CC input. | +| OSC In CHOP | `oscinChop` | OSC message input (network). | + +## SOPs — Surface Operators (Blue) + +3D geometry: points, polygons, NURBS, meshes. + +### Generators + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Grid SOP | `gridSop` | `rows`, `cols`, `sizex/y`, `type` (0=Polygon, 1=Mesh, 2=NURBS) | Flat grid mesh. Foundation for displacement, instancing. | +| Sphere SOP | `sphereSop` | `type`, `rows`, `cols`, `radius` | Sphere geometry. | +| Box SOP | `boxSop` | `sizex/y/z` | Box geometry. | +| Torus SOP | `torusSop` | `radiusx/y`, `rows`, `cols` | Donut shape. | +| Circle SOP | `circleSop` | `type`, `radius`, `divs` | Circle/ring geometry. | +| Line SOP | `lineSop` | `dist`, `points` | Line segments. | +| Text SOP | `textSop` | `text`, `fontsizex`, `fontfile`, `extrude` | 3D text geometry. | + +### Modifiers + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Transform SOP | `transformSop` | `tx/ty/tz`, `rx/ry/rz`, `sx/sy/sz` | Transform geometry (translate, rotate, scale). | +| Noise SOP | `noiseSop` | `type`, `amp`, `period`, `roughness` | Deform geometry with noise. | +| Sort SOP | `sortSop` | `ptsort`, `primsort` | Reorder points/primitives. | +| Facet SOP | `facetSop` | `unique`, `consolidate`, `computenormals` | Normals, consolidation, unique points. | +| Merge SOP | `mergeSop` | (none significant) | Combine multiple geometry inputs. | +| Null SOP | `nullSop` | (none significant) | Pass-through. | + +## DATs — Data Operators (White) + +Text, tables, scripts, network data. + +### Core + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Table DAT | `tableDat` | (edit content directly) | Spreadsheet-like data tables. | +| Text DAT | `textDat` | (edit content directly) | Arbitrary text content. Shader code, configs, scripts. | +| Script DAT | `scriptDat` | `language` (0=Python, 1=C++) | Custom callbacks and DAT processing. | +| CHOP Execute DAT | `chopexecDat` | `chop` (path to watch), callbacks | Trigger Python on CHOP value changes. | +| DAT Execute DAT | `datexecDat` | `dat` (path to watch) | Trigger Python on DAT content changes. | +| Panel Execute DAT | `panelexecDat` | `panel` | Trigger Python on UI panel events. | + +### I/O + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Web DAT | `webDat` | `url`, `fetchmethod` (0=GET, 1=POST) | HTTP requests. API integration. | +| TCP/IP DAT | `tcpipDat` | `address`, `port`, `mode` | TCP networking. | +| OSC In DAT | `oscinDat` | `port` | Receive OSC as text messages. | +| Serial DAT | `serialDat` | `port`, `baudrate` | Serial port communication (Arduino, etc.). | +| File In DAT | `fileinDat` | `file` | Read text files. | +| File Out DAT | `fileoutDat` | `file`, `write` | Write text files. | + +### Conversions + +| Operator | Type Name | Direction | Use | +|----------|-----------|-----------|-----| +| DAT to CHOP | `dattochopChop` | DAT -> CHOP | Convert table data to channels. | +| CHOP to DAT | `choptodatDat` | CHOP -> DAT | Convert channel data to table rows. | +| SOP to DAT | `soptodatDat` | SOP -> DAT | Extract geometry data as table. | + +## MATs — Material Operators (Yellow) + +Materials for 3D rendering in Render TOP / Geometry COMP. + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Phong MAT | `phongMat` | `diff_colorr/g/b`, `spec_colorr/g/b`, `shininess`, `colormap`, `normalmap` | Classic Phong shading. Simple, fast. | +| PBR MAT | `pbrMat` | `basecolorr/g/b`, `metallic`, `roughness`, `normalmap`, `emitcolorr/g/b` | Physically-based rendering. Realistic materials. | +| GLSL MAT | `glslMat` | `dat` (shader DAT), custom uniforms | Custom vertex + fragment shaders for 3D. | +| Constant MAT | `constMat` | `colorr/g/b`, `colormap` | Flat unlit color/texture. No shading. | +| Point Sprite MAT | `pointspriteMat` | `colormap`, `scale` | Render points as camera-facing sprites. Great for particles. | +| Wireframe MAT | `wireframeMat` | `colorr/g/b`, `width` | Wireframe rendering. | +| Depth MAT | `depthMat` | `near`, `far` | Render depth buffer as grayscale. | + +## COMPs — Component Operators (Gray) + +Containers, 3D scene elements, UI components. + +### 3D Scene + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Geometry COMP | `geometryComp` | `material` (path), `instancechop` (path), `instancing` (toggle) | Renders geometry with material. Instancing host. | +| Camera COMP | `cameraComp` | `tx/ty/tz`, `rx/ry/rz`, `fov`, `near/far` | Camera for Render TOP. | +| Light COMP | `lightComp` | `lighttype` (0=Point, 1=Directional, 2=Spot, 3=Cone), `dimmer`, `colorr/g/b` | Lighting for 3D scenes. | +| Ambient Light COMP | `ambientlightComp` | `dimmer`, `colorr/g/b` | Ambient lighting. | +| Environment Light COMP | `envlightComp` | `envmap` | Image-based lighting (IBL). | + +### Containers + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Container COMP | `containerComp` | `w`, `h`, `bgcolor1/2/3` | UI container. Holds other COMPs for panel layouts. | +| Base COMP | `baseComp` | (none significant) | Generic container. Networks-inside-networks. | +| Replicator COMP | `replicatorComp` | `template`, `operatorsdat` | Clone a template operator N times from a table. | + +### Utilities + +| Operator | Type Name | Key Parameters | Use | +|----------|-----------|---------------|-----| +| Window COMP | `windowComp` | `winw/h`, `winoffsetx/y`, `monitor`, `borders` | Output window for display/projection. | +| Select COMP | `selectComp` | `rowcol`, `panel` | Select and display content from elsewhere. | +| Engine COMP | `engineComp` | `tox`, `externaltox` | Load external .tox components. Sub-process isolation. | + +## Cross-Family Converter Summary + +| From | To | Operator | Type Name | +|------|-----|----------|-----------| +| CHOP | TOP | CHOP to TOP | `choptopTop` | +| TOP | CHOP | TOP to CHOP | `topchopChop` | +| DAT | CHOP | DAT to CHOP | `dattochopChop` | +| CHOP | DAT | CHOP to DAT | `choptodatDat` | +| SOP | CHOP | SOP to CHOP | `soptochopChop` | +| CHOP | SOP | CHOP to SOP | `choptosopSop` | +| SOP | DAT | SOP to DAT | `soptodatDat` | +| DAT | SOP | DAT to SOP | `dattosopSop` | +| SOP | TOP | (use Render TOP + Geometry COMP) | — | +| TOP | SOP | TOP to SOP | `toptosopSop` | diff --git a/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md new file mode 100644 index 0000000000..33c9b5f4d8 --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/references/pitfalls.md @@ -0,0 +1,508 @@ +# TouchDesigner MCP — Pitfalls & Lessons Learned + +Hard-won knowledge from real TD sessions. Read this before building anything. + +## Parameter Names + +### 1. NEVER hardcode parameter names — always discover + +Parameter names change between TD versions. What works in one build may not work in another. ALWAYS use td_get_par_info to discover actual names from TD. + +The agent's LLM training data contains WRONG parameter names. Do not trust them. + +Known historical differences (may vary further — always verify): +| What docs/training say | Actual in some versions | Notes | +|---------------|---------------|-------| +| `dat` | `pixeldat` | GLSL TOP pixel shader DAT | +| `colora` | `alpha` | Constant TOP alpha | +| `sizex` / `sizey` | `size` | Blur TOP (single value) | +| `fontr/g/b/a` | `fontcolorr/g/b/a` | Text TOP font color (r/g/b) | +| `fontcolora` | `fontalpha` | Text TOP font alpha (NOT `fontcolora`) | +| `bgcolora` | `bgalpha` | Text TOP bg alpha | +| `value1name` | `vec0name` | GLSL TOP uniform name | + +### 2. twozero td_execute_python response format + +When calling `td_execute_python` via twozero MCP, successful responses return `(ok)` followed by FPS/error summary (e.g. `[fps 60.0/60] [0 err/0 warn]`), NOT the raw Python `result` dict. If you're parsing responses programmatically, check for the `(ok)` prefix — don't pattern-match on Python variable names from the script. Use `td_get_operator_info` or separate inspection calls to read back values. + +### 3. When using td_set_operator_pars, param names must match exactly + +Use td_get_par_info to discover them. The MCP tool validates parameter names and returns clear errors explaining what went wrong, unlike raw Python which crashes the whole script with tdAttributeError and stops execution. Always discover before setting. + +### 4. Use `safe_par()` pattern for cross-version compatibility + +```python +def safe_par(node, name, value): + p = getattr(node.par, name, None) + if p is not None: + p.val = value + return True + return False +``` + +### 5. `td.tdAttributeError` crashes the whole script — use defensive access + +If you do `node.par.nonexistent = value`, TD raises `tdAttributeError` and stops the entire script. Prevention is better than catching: +- Use `op()` instead of `opex()` — `op()` returns None on failure, `opex()` raises +- Use `hasattr(node.par, 'name')` before accessing any parameter +- Use `getattr(node.par, 'name', None)` with a default +- Use the `safe_par()` pattern from pitfall #3 + +```python +# WRONG — crashes if param doesn't exist: +node.par.nonexistent = value + +# CORRECT — defensive access: +if hasattr(node.par, 'nonexistent'): + node.par.nonexistent = value +``` + +### 6. `outputresolution` is a string menu, not an integer + +``` +menuNames: ['useinput','eighth','quarter','half','2x','4x','8x','fit','limit','custom','parpanel'] +``` +Always use the string form. Setting `outputresolution = 9` may silently fail. +```python +node.par.outputresolution = 'custom' # correct +node.par.resolutionw = 1280; node.par.resolutionh = 720 +``` +Discover valid values: `list(node.par.outputresolution.menuNames)` + +## GLSL Shaders + +### 7. `uTDCurrentTime` does NOT exist in GLSL TOP + +There is NO built-in time uniform for GLSL TOPs. GLSL MAT has `uTDGeneral.seconds` but that's NOT available in GLSL TOP context. + +**PRIMARY — GLSL TOP Vectors/Values page:** +```python +gl.par.value0name = 'uTime' +gl.par.value0.expr = "absTime.seconds" +# In GLSL: uniform float uTime; +``` + +**FALLBACK — Constant TOP texture (for complex time data):** + +CRITICAL: set format to `rgba32float` — default 8-bit clamps to 0-1: +```python +t = root.create(constantTOP, 'time_driver') +t.par.format = 'rgba32float' +t.par.outputresolution = 'custom' +t.par.resolutionw = 1; t.par.resolutionh = 1 +t.par.colorr.expr = "absTime.seconds % 1000.0" +t.outputConnectors[0].connect(glsl.inputConnectors[0]) +``` + +### 8. GLSL compile errors are silent in the API + +The GLSL TOP shows a yellow warning triangle in the UI but `node.errors()` may return empty string. Check `node.warnings()` too, and create an Info DAT pointed at the GLSL TOP to read the actual compiler output. + +### 9. TD GLSL uses `vUV.st` not `gl_FragCoord` — and REQUIRES `TDOutputSwizzle()` on macOS + +Standard GLSL patterns don't work. TD provides: +- `vUV.st` — UV coordinates (0-1) +- `uTDOutputInfo.res.zw` — resolution +- `sTD2DInputs[0]` — input textures +- `layout(location = 0) out vec4 fragColor` — output + +CRITICAL on macOS: Always wrap output with `TDOutputSwizzle()`: +```glsl +fragColor = TDOutputSwizzle(color); +``` +TD uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed. + +### 10. Large GLSL shaders — write to temp file + +GLSL code with special characters can corrupt JSON payloads. Write the shader to a temp file and load it in TD: +```python +# Agent side: write shader to /tmp/shader.glsl via write_file +# TD side: +sd = root.create(textDAT, 'shader_code') +with open('/tmp/shader.glsl', 'r') as f: + sd.text = f.read() +``` + +## Node Management + +### 11. Destroying nodes while iterating `root.children` causes `tdError` + +The iterator is invalidated when a child is destroyed. Always snapshot first: +```python +kids = list(root.children) # snapshot +for child in kids: + if child.valid: # check — earlier destroys may cascade + child.destroy() +``` + +### 11b. Split cleanup and creation into SEPARATE td_execute_python calls + +Creating nodes with the same names you just destroyed in the SAME script causes "Invalid OP object" errors — even with `list()` snapshot. TD's internal references can go stale within one execution context. + +**WRONG (single call):** +```python +# td_execute_python: +for c in list(root.children): + if c.valid and c.name.startswith('promo_'): + c.destroy() +# ... then create promo_audio, promo_shader etc. in same script → CRASHES +``` + +**CORRECT (two separate calls):** +```python +# Call 1: td_execute_python — clean only +for c in list(root.children): + if c.valid and c.name.startswith('promo_'): + c.destroy() + +# Call 2: td_execute_python — build (separate MCP call) +audio = root.create(audiofileinCHOP, 'promo_audio') +# ... rest of build +``` + +### 12. Feedback TOP: use `top` parameter, NOT direct input wire + +The feedbackTOP's `top` parameter references which TOP to delay. Do NOT also wire that TOP directly into the feedback's input — this creates a real cook dependency loop. + +Correct setup: +```python +fb = root.create(feedbackTOP, 'fb_delay') +fb.par.top = comp.path # reference only — no wire to fb input +fb.outputConnectors[0].connect(xf) # fb output -> transform -> fade -> comp +``` + +The "Cook dependency loop detected" warning on the transform/fade chain is expected. + +### 13. GLSL TOP auto-creates companion nodes + +Creating a `glslTOP` also creates `name_pixel` (Text DAT), `name_info` (Info DAT), and `name_compute` (Text DAT). These are visible in the network. Don't be alarmed by "extra" nodes. + +### 14. The default project root is `/project1` + +New TD files start with `/project1` as the main container. System nodes live at `/`, `/ui`, `/sys`, `/local`, `/perform`. Don't create user nodes outside `/project1`. + +### 15. Non-Commercial license caps resolution at 1280x1280 + +Setting `resolutionw=1920` silently clamps to 1280. Always check effective resolution after creation: +```python +n.cook(force=True) +actual = str(n.width) + 'x' + str(n.height) +``` + +## Recording & Codecs + +### 16. MovieFileOut TOP: H.264/H.265/AV1 requires Commercial license + +In Non-Commercial TD, these codecs produce an error. Recommended alternatives: +- `prores` — Apple ProRes, **best on macOS**, HW accelerated, NOT license-restricted. ~55MB/s at 1280x720 but lossless quality. **Use this as default on macOS.** +- `cineform` — GoPro Cineform, supports alpha +- `hap` — GPU-accelerated playback, large files +- `notchlc` — GPU-accelerated, good quality +- `mjpa` — Motion JPEG, legacy fallback (lossy, use only if ProRes unavailable) + +For image sequences: `rec.par.type = 'imagesequence'`, `rec.par.imagefiletype = 'png'` + +### 17. MovieFileOut `.record()` method may not exist + +Use the toggle parameter instead: +```python +rec.par.record = True # start recording +rec.par.record = False # stop recording +``` + +When setting file path and starting recording in the same script, use delayFrames: +```python +rec.par.file = '/tmp/new_output.mov' +run("op('/project1/recorder').par.record = True", delayFrames=2) +``` + +### 18. TOP.save() captures same frame when called rapidly + +Use MovieFileOut for real-time recording. Set `project.realTime = False` for frame-accurate output. + +### 19. AudioFileIn CHOP: cue and recording sequence matters + +The recording sequence must be done in exact order, or the recording will be empty, audio will start mid-file, or the file won't be written. + +**Proven recording sequence:** + +```python +# Step 1: Stop any existing recording +rec.par.record = False + +# Step 2: Reset audio to beginning +audio.par.play = False +audio.par.cue = True +audio.par.cuepoint = 0 # may need cuepointunit=0 too +# Verify: audio.par.cue.eval() should be True + +# Step 3: Set output file path +rec.par.file = '/tmp/output.mov' + +# Step 4: Release cue + start playing + start recording (with frame delay) +audio.par.cue = False +audio.par.play = True +audio.par.playmode = 2 # Sequential — plays once through +run("op('/project1/recorder').par.record = True", delayFrames=3) +``` + +**Why each step matters:** +- `rec.par.record = False` first — if a previous recording is active, setting `par.file` may fail silently +- `audio.par.cue = True` + `cuepoint = 0` — guarantees audio starts from the beginning, otherwise the spectrum may be silent for the first few seconds +- `delayFrames=3` on the record start — setting `par.file` and `par.record = True` in the same script can race; the file path needs a frame to register before recording starts +- `playmode = 2` (Sequential) — plays the file once. Use `playmode = 0` (Locked to Timeline) if you want TD's timeline to control position + +## TD Python API Patterns + +### 20. COMP extension setup: ext0object format is CRITICAL + +`ext0object` expects a CONSTANT string (NOT expression mode): +```python +comp.par.ext0object = "op('./myExtensionDat').module.MyClassName(me)" +``` +NEVER set as just the DAT name. NEVER use ParMode.EXPRESSION. ALWAYS ensure the DAT has `par.language='python'`. + +### 21. td.Panel is NOT subscriptable — use attribute access + +```python +comp.panel.select # correct (attribute access, returns float) +comp.panel['select'] # WRONG — 'td.Panel' object is not subscriptable +``` + +### 22. ALWAYS use relative paths in script callbacks + +In scriptTOP/CHOP/SOP/DAT callbacks, use paths relative to `scriptOp` or `me`: +```python +root = scriptOp.parent().parent() +dat = root.op('pixel_data') +``` +NEVER hardcode absolute paths like `op('/project1/myComp/child')` — they break when containers are renamed or copied. + +### 23. keyboardinCHOP channel names have 'k' prefix + +Channel names are `kup`, `kdown`, `kleft`, `kright`, `ka`, `kb`, etc. — NOT `up`, `down`, `a`, `b`. Always verify with: +```python +channels = [c.name for c in op('/project1/keyboard1').chans()] +``` + +### 24. expressCHOP cook-only properties — false positive errors + +`me.inputVal`, `me.chanIndex`, `me.sampleIndex` work ONLY in cook-context. Calling `par.expr0expr.eval()` from outside always raises an error — this is NOT a real operator error. Ignore these in error scans. + +### 25. td.Vertex attributes — use index access not named attributes + +In TD 2025.32, `td.Vertex` objects do NOT have `.x`, `.y`, `.z` attributes: +```python +# WRONG — crashes: +vertex.x, vertex.y, vertex.z + +# CORRECT — index-based: +vertex.point.P[0], vertex.point.P[1], vertex.point.P[2] +# Or for SOP point positions: +pt = sop.points()[i] +pos = pt.P # use P[0], P[1], P[2] +``` + +## Audio + +### 26. Audio Spectrum CHOP output is weak — boost it + +Raw output is very small (0.001-0.05). Use built-in boost: `spectrum.par.highfrequencyboost = 3.0` + +If still weak, add Math CHOP in Range mode: `fromrangehi=0.05, torangehi=1.0` + +### 27. AudioSpectrum CHOP: timeslice and sample count are the #1 gotcha + +AudioSpectrum at 44100Hz with `timeslice=False` outputs the ENTIRE audio file as samples (~24000+). CHOP-to-TOP then exceeds texture resolution max and warns/fails. + +**Fix:** Keep `timeslice = True` (default) for real-time per-frame FFT. Set `fftsize` to control bin count (it's a STRING enum: `'256'` not `256`). + +If the CHOP-to-TOP still gets too many samples, set `layout = 'rowscropped'` on the choptoTOP. + +```python +spectrum.par.fftsize = '256' # STRING, not int — enum values +spectrum.par.timeslice = True # MUST be True for real-time audio reactivity +spectex.par.layout = 'rowscropped' # handles oversized CHOP inputs +``` + +**resampleCHOP has NO `numsamples` param.** It uses `rate`, `start`, `end`, `method`. Don't guess — always `td_get_par_info('resampleCHOP')` first. + +### 28. CHOP To TOP has NO input connectors — use par.chop reference + +```python +spec_tex = root.create(choptoTOP, 'spectrum_tex') +spec_tex.par.chop = resample # correct: parameter reference +# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0]) # WRONG +``` + +## Workflow + +### 29. Always verify after building — errors are silent + +Node errors and broken connections produce no output. Always check: +```python +for c in list(root.children): + e = c.errors() + w = c.warnings() + if e: print(c.name, 'ERR:', e) + if w: print(c.name, 'WARN:', w) +``` + +### 30. Window COMP param for display target is `winop` + +```python +win = root.create(windowCOMP, 'display') +win.par.winop = '/project1/logo_out' +win.par.winw = 1280; win.par.winh = 720 +win.par.winopen.pulse() +``` + +### 31. `sample()` returns frozen pixels in rapid calls + +`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window. + +### 32. Audio-reactive GLSL: dual-layer sync pipeline + +For audio-synced visuals, use BOTH layers for maximum effect: + +**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut. + +**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass. + +Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages. + +**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds. + +### 33. twozero MCP: benchmark and prefer native tools + +Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools. + +**Always prefer native MCP tools over td_execute_python:** +- `td_create_operator` over `root.create()` scripts (handles viewport positioning) +- `td_set_operator_pars` over `node.par.X = Y` scripts (validates param names) +- `td_get_par_info` over temp-node discovery dance (instant, no cleanup) +- `td_get_errors` over manual `c.errors()` loops +- `td_get_focus` for context awareness (no equivalent in old method) + +Only fall back to `td_execute_python` for multi-step logic (wiring chains, conditional builds, loops). + +### 34. twozero td_execute_python response wrapping + +twozero wraps `td_execute_python` responses with status info: `(ok)\n\n[fps 60.0/60] [0 err/0 warn]`. Your Python `result` variable value may not appear verbatim in the response text. If you need to check results programmatically, use `print()` statements in the script — they appear in the response. Don't rely on string-matching the `result` dict. + +### 35. Audio-reactive chain: DO NOT use Lag CHOP or Filter CHOP for spectrum smoothing + +The Derivative docs and tutorials suggest using Lag CHOP (lag1=0.2, lag2=0.5) to smooth raw FFT output before passing to a shader. **This does NOT work with AudioSpectrum → CHOP to TOP → GLSL.** + +What happens: Lag CHOP operates in timeslice mode. A 256-sample spectrum input gets expanded to 1600-2400 samples. The Lag averaging drives all values to near-zero (~1e-06). The CHOP to TOP produces a 2400x2 texture instead of 256x2. The shader receives effectively zero audio data. + +**The correct chain is: Spectrum(outlength=256) → Math(gain=10) → CHOPtoTOP → GLSL.** No CHOP smoothing at all. If you need smoothing, do it in the GLSL shader via temporal lerp with a feedback texture. + +Verified values with audio playing: +- Without Lag CHOP: bass bins = 5.0-5.4, mid bins = 1.0-1.7 (strong, usable) +- With Lag CHOP: ALL bins = 0.000001-0.00004 (dead, zero audio reactivity) + +### 36. AudioSpectrum Output Length: set manually to avoid CHOP to TOP overflow + +AudioSpectrum in Visualization mode with FFT 8192 outputs 22,050 samples by default (1 per Hz, 0–22050). CHOP to TOP cannot handle this — you get "Number of samples exceeded texture resolution max". + +Fix: `spectrum.par.outputmenu = 'setmanually'` and `spectrum.par.outlength = 256`. This gives 256 frequency bins — plenty for visual FFT. + +DO NOT set `timeslice = False` as a workaround — that processes the entire audio file at once and produces even more samples. + +### 37. GLSL spectrum texture from CHOP to TOP is 256x2 not 256x1 + +AudioSpectrum outputs 2 channels (stereo: chan1, chan2). CHOP to TOP with `dataformat='r'` creates a 256x2 texture — one row per channel. Sample the first channel at `y=0.25` (center of first row), NOT `y=0.5` (boundary between rows): + +```glsl +float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r; // correct +float bass = texture(sTD2DInputs[1], vec2(0.05, 0.5)).r; // WRONG — samples between rows +``` + +### 38. FPS=0 doesn't mean ops aren't cooking — check play state + +TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still produces valid screenshots. The two most common causes: + +**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS. + +**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking. + +Diagnostic sequence when FPS=0: +1. `td_get_perf` — check if any op has extreme CPU/s +2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate +3. Check for blocking CHOPs (audioout, audiodevin, etc.) +4. Toggle play state (spacebar, or check if absTime.seconds is advancing) + +### 39. Recording while FPS=0 produces empty or near-empty files + +This is the #1 cause of "I recorded for 30 seconds but got a 2-frame video." If TD's cook loop is stalled (FPS=0 or very low), MovieFileOut has nothing to record. Unlike `TOP.save()` which captures the last cooked frame regardless, MovieFileOut only writes frames that actually cook. + +**Always verify FPS before starting a recording:** +```python +# Check via td_get_perf first +# If FPS < 30, do NOT start recording — fix the performance issue first +# If FPS=0, the playbar is likely paused — see pitfall #37 +``` + +Common causes of recording empty video: +- Playbar paused (FPS=0) — see pitfall #37 +- Audio device CHOP blocking the main thread — see pitfall #37b +- Recording started before audio was cued — audio is silent, GLSL outputs black, MovieFileOut records black frames that look empty +- `par.file` set in the same script as `par.record = True` — see pitfall #18 + +### 40. GLSL shader produces black output — test before committing to a long render + +New GLSL shaders can fail silently (see pitfall #7). Before recording a long take, always: + +1. **Write a minimal test shader first** that just outputs a solid color or pass-through: +```glsl +void main() { + vec2 uv = vUV.st; + fragColor = TDOutputSwizzle(vec4(uv, 0.0, 1.0)); +} +``` + +2. **Verify the test renders correctly** via `td_get_screenshot` on the GLSL TOP's output. + +3. **Swap in the real shader** and screenshot again immediately. If black, the shader has a compile error or logic issue. + +4. **Only then start recording.** A 90-second ProRes recording is ~5GB. Recording black frames wastes disk and time. + +Common causes of black GLSL output: +- Missing `TDOutputSwizzle()` on macOS (pitfall #8) +- Time uniform not connected — shader uses default 0.0, fractal stays at origin +- Spectrum texture not connected — audio values all 0.0, driving everything to black +- Integer division where float division was expected (`1/2 = 0` not `0.5`) +- `absTime.seconds % 1000.0` rolled over past 1000 and the modulo produces unexpected values + +### 41. td_write_dat uses `text` parameter, NOT `content` + +The MCP tool `td_write_dat` expects a `text` parameter for full replacement. Passing `content` returns an error: `"Provide either 'text' for full replace, or 'old_text'+'new_text' for patching"`. + +If `td_write_dat` fails, fall back to `td_execute_python`: +```python +op("/project1/shader_code").text = shader_string +``` + +### 42. td_execute_python does NOT return stdout or print() output + +Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script. + +### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads() + +The response text from `td_get_operator_info` has `[fps 60.0/60]` appended after the JSON object. This causes `json.loads()` to fail with "Extra data" errors. Strip it before parsing: +```python +clean = response_text.rsplit('[fps', 1)[0] +data = json.loads(clean) +``` + +### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}` + +Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem. + +### 45. Recording duration is manual — no auto-stop at audio end + +MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net: +```bash +ffmpeg -i raw.mov -t 25 -c copy trimmed.mov +``` \ No newline at end of file diff --git a/optional-skills/creative/touchdesigner-mcp/references/python-api.md b/optional-skills/creative/touchdesigner-mcp/references/python-api.md new file mode 100644 index 0000000000..f2955110b0 --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/references/python-api.md @@ -0,0 +1,463 @@ +# TouchDesigner Python API Reference + +## The td Module + +TouchDesigner's Python environment auto-imports the `td` module. All TD-specific classes, functions, and constants live here. Scripts inside TD (Script DATs, CHOP/DAT Execute callbacks, Extensions) have full access. + +When using the MCP `execute_python_script` tool, these globals are pre-loaded: +- `op` — shortcut for `td.op()`, finds operators by path +- `ops` — shortcut for `td.ops()`, finds multiple operators by pattern +- `me` — the operator running the script (via MCP this is the twozero internal executor) +- `parent` — shortcut for `me.parent()` +- `project` — the root project component +- `td` — the full td module + +## Finding Operators: op() and ops() + +### op(path) — Find a single operator + +```python +# Absolute path (always works from MCP) +node = op('/project1/noise1') + +# Relative path (relative to current operator — only in Script DATs) +node = op('noise1') # sibling +node = op('../noise1') # parent's sibling + +# Returns None if not found (does NOT raise) +node = op('/project1/nonexistent') # None +``` + +### ops(pattern) — Find multiple operators + +```python +# Glob patterns +nodes = ops('/project1/noise*') # all nodes starting with "noise" +nodes = ops('/project1/*') # all direct children +nodes = ops('/project1/container1/*') # all children of container1 + +# Returns a tuple of operators (may be empty) +for n in ops('/project1/*'): + print(n.name, n.OPType) +``` + +### Navigation from a node + +```python +node = op('/project1/noise1') + +node.name # 'noise1' +node.path # '/project1/noise1' +node.OPType # 'noiseTop' +node.type # +node.family # 'TOP' + +# Parent / children +node.parent() # the parent COMP +node.parent().children # all siblings + self +node.parent().findChildren(name='noise*') # filtered + +# Type checking +node.isTOP # True +node.isCHOP # False +node.isSOP # False +node.isDAT # False +node.isMAT # False +node.isCOMP # False +``` + +## Parameters + +Every operator has parameters accessed via the `.par` attribute. + +### Reading parameters + +```python +node = op('/project1/noise1') + +# Direct access +node.par.seed.val # current evaluated value (may be an expression result) +node.par.seed.eval() # same as .val +node.par.seed.default # default value +node.par.monochrome.val # boolean parameters: True/False + +# List all parameters +for p in node.pars(): + print(f"{p.name}: {p.val} (default: {p.default})") + +# Filter by page (parameter group) +for p in node.pars('Noise'): # page name + print(f"{p.name}: {p.val}") +``` + +### Setting parameters + +```python +# Direct value setting +node.par.seed.val = 42 +node.par.monochrome.val = True +node.par.resolutionw.val = 1920 +node.par.resolutionh.val = 1080 + +# String parameters +op('/project1/text1').par.text.val = 'Hello World' + +# File paths +op('/project1/moviefilein1').par.file.val = '/path/to/video.mp4' + +# Reference another operator (for "dat", "chop", "top" type parameters) +op('/project1/glsl1').par.dat.val = '/project1/shader_code' +``` + +### Parameter expressions + +```python +# Python expressions that evaluate dynamically +node.par.seed.expr = "me.time.frame" +node.par.tx.expr = "math.sin(me.time.seconds * 2)" + +# Reference another parameter +node.par.brightness1.expr = "op('/project1/constant1').par.value0.val" + +# Export (one-way binding from CHOP to parameter) +# This makes the parameter follow a CHOP channel value +op('/project1/noise1').par.seed.val # can also be driven by exports +``` + +### Parameter types + +| Type | Python Type | Example | +|------|------------|---------| +| Float | `float` | `node.par.brightness1.val = 0.5` | +| Int | `int` | `node.par.seed.val = 42` | +| Toggle | `bool` | `node.par.monochrome.val = True` | +| String | `str` | `node.par.text.val = 'hello'` | +| Menu | `int` (index) or `str` (label) | `node.par.type.val = 'sine'` | +| File | `str` (path) | `node.par.file.val = '/path/to/file'` | +| OP reference | `str` (path) | `node.par.dat.val = '/project1/text1'` | +| Color | separate r/g/b/a floats | `node.par.colorr.val = 1.0` | +| XY/XYZ | separate x/y/z floats | `node.par.tx.val = 0.5` | + +## Creating and Deleting Operators + +```python +# Create via parent component +parent = op('/project1') +new_node = parent.create(noiseTop) # using class reference +new_node = parent.create(noiseTop, 'my_noise') # with custom name + +# The MCP create_td_node tool handles this automatically: +# create_td_node(parentPath="/project1", nodeType="noiseTop", nodeName="my_noise") + +# Delete +node = op('/project1/my_noise') +node.destroy() + +# Copy +original = op('/project1/noise1') +copy = parent.copy(original, name='noise1_copy') +``` + +## Connections (Wiring Operators) + +### Output to Input connections + +```python +# Connect noise1's output to level1's input +op('/project1/noise1').outputConnectors[0].connect(op('/project1/level1')) + +# Connect to specific input index (for multi-input operators like Composite) +op('/project1/noise1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[0]) +op('/project1/text1').outputConnectors[0].connect(op('/project1/composite1').inputConnectors[1]) + +# Disconnect all outputs +op('/project1/noise1').outputConnectors[0].disconnect() + +# Query connections +node = op('/project1/level1') +inputs = node.inputs # list of connected input operators +outputs = node.outputs # list of connected output operators +``` + +### Connection patterns for common setups + +```python +# Linear chain: A -> B -> C -> D +ops_list = [op(f'/project1/{name}') for name in ['noise1', 'level1', 'blur1', 'null1']] +for i in range(len(ops_list) - 1): + ops_list[i].outputConnectors[0].connect(ops_list[i+1]) + +# Fan-out: A -> B, A -> C, A -> D +source = op('/project1/noise1') +for target_name in ['level1', 'composite1', 'transform1']: + source.outputConnectors[0].connect(op(f'/project1/{target_name}')) + +# Merge: A + B + C -> Composite +comp = op('/project1/composite1') +for i, source_name in enumerate(['noise1', 'text1', 'ramp1']): + op(f'/project1/{source_name}').outputConnectors[0].connect(comp.inputConnectors[i]) +``` + +## DAT Content Manipulation + +### Text DATs + +```python +dat = op('/project1/text1') + +# Read +content = dat.text # full text as string + +# Write +dat.text = "new content" +dat.text = '''multi +line +content''' + +# Append +dat.text += "\nnew line" +``` + +### Table DATs + +```python +dat = op('/project1/table1') + +# Read cell +val = dat[0, 0] # row 0, col 0 +val = dat[0, 'name'] # row 0, column named 'name' +val = dat['key', 1] # row named 'key', col 1 + +# Write cell +dat[0, 0] = 'value' + +# Read row/col +row = dat.row(0) # list of Cell objects +col = dat.col('name') # list of Cell objects + +# Dimensions +rows = dat.numRows +cols = dat.numCols + +# Append row +dat.appendRow(['col1_val', 'col2_val', 'col3_val']) + +# Clear +dat.clear() + +# Set entire table +dat.clear() +dat.appendRow(['name', 'value', 'type']) +dat.appendRow(['frequency', '440', 'float']) +dat.appendRow(['amplitude', '0.8', 'float']) +``` + +## Time and Animation + +```python +# Global time +td.absTime.frame # absolute frame number (never resets) +td.absTime.seconds # absolute seconds + +# Timeline time (affected by play/pause/loop) +me.time.frame # current frame on timeline +me.time.seconds # current seconds on timeline +me.time.rate # FPS setting + +# Timeline control (via execute_python_script) +project.play = True +project.play = False +project.frameRange = (1, 300) # set timeline range + +# Cook frame (when operator was last computed) +node.cookFrame +node.cookTime +``` + +## Extensions (Custom Python Classes on Components) + +Extensions add custom Python methods and attributes to COMPs. + +```python +# Create extension on a Base COMP +base = op('/project1/myBase') + +# The extension class is defined in a Text DAT inside the COMP +# Typically named 'ExtClass' with the extension code: + +extension_code = ''' +class MyExtension: + def __init__(self, ownerComp): + self.ownerComp = ownerComp + self.counter = 0 + + def Reset(self): + self.counter = 0 + + def Increment(self): + self.counter += 1 + return self.counter + + @property + def Count(self): + return self.counter +''' + +# Write extension code to DAT inside the COMP +op('/project1/myBase/extClass').text = extension_code + +# Configure the extension on the COMP +base.par.extension1 = 'extClass' # name of the DAT +base.par.promoteextension1 = True # promote methods to parent + +# Call extension methods +base.Increment() # calls MyExtension.Increment() +count = base.Count # accesses MyExtension.Count property +base.Reset() +``` + +## Useful Built-in Modules + +### tdu — TouchDesigner Utilities + +```python +import tdu + +# Dependency tracking (reactive values) +dep = tdu.Dependency(initial_value) +dep.val = new_value # triggers dependents to recook + +# File path utilities +tdu.expandPath('$HOME/Desktop/output.mov') + +# Math +tdu.clamp(value, min, max) +tdu.remap(value, from_min, from_max, to_min, to_max) +``` + +### TDFunctions + +```python +from TDFunctions import * + +# Commonly used utilities +clamp(value, low, high) +remap(value, inLow, inHigh, outLow, outHigh) +interp(value1, value2, t) # linear interpolation +``` + +### TDStoreTools — Persistent Storage + +```python +from TDStoreTools import StorageManager + +# Store data that survives project reload +me.store('myKey', 'myValue') +val = me.fetch('myKey', default='fallback') + +# Storage dict +me.storage['key'] = value +``` + +## Common Patterns via execute_python_script + +### Build a complete chain + +```python +# Create a complete audio-reactive noise chain +parent = op('/project1') + +# Create operators +audio_in = parent.create(audiofileinChop, 'audio_in') +spectrum = parent.create(audiospectrumChop, 'spectrum') +chop_to_top = parent.create(choptopTop, 'chop_to_top') +noise = parent.create(noiseTop, 'noise1') +level = parent.create(levelTop, 'level1') +null_out = parent.create(nullTop, 'out') + +# Wire the chain +audio_in.outputConnectors[0].connect(spectrum) +spectrum.outputConnectors[0].connect(chop_to_top) +noise.outputConnectors[0].connect(level) +level.outputConnectors[0].connect(null_out) + +# Set parameters +audio_in.par.file = '/path/to/music.wav' +audio_in.par.play = True +spectrum.par.size = 512 +noise.par.type = 1 # Sparse +noise.par.monochrome = False +noise.par.resolutionw = 1920 +noise.par.resolutionh = 1080 +level.par.opacity = 0.8 +level.par.gamma1 = 0.7 +``` + +### Query network state + +```python +# Get all TOPs in the project +tops = [c for c in op('/project1').findChildren(type=TOP)] +for t in tops: + print(f"{t.path}: {t.OPType} {'ERROR' if t.errors() else 'OK'}") + +# Find all operators with errors +def find_errors(parent_path='/project1'): + parent = op(parent_path) + errors = [] + for child in parent.findChildren(depth=-1): + if child.errors(): + errors.append((child.path, child.errors())) + return errors + +result = find_errors() +``` + +### Batch parameter changes + +```python +# Set parameters on multiple nodes at once +settings = { + '/project1/noise1': {'seed': 42, 'monochrome': False, 'resolutionw': 1920}, + '/project1/level1': {'brightness1': 1.2, 'gamma1': 0.8}, + '/project1/blur1': {'sizex': 5, 'sizey': 5}, +} + +for path, params in settings.items(): + node = op(path) + if node: + for key, val in params.items(): + setattr(node.par, key, val) +``` + +## Python Version and Packages + +TouchDesigner bundles Python 3.11+ with these pre-installed: +- **numpy** — array operations, fast math +- **scipy** — signal processing, FFT +- **OpenCV** (cv2) — computer vision +- **PIL/Pillow** — image processing +- **requests** — HTTP client +- **json**, **re**, **os**, **sys** — standard library + +**IMPORTANT:** Parameter names in examples below are illustrative. Always run discovery (SKILL.md Step 0) to get actual names for your TD version. Do NOT copy param names from these examples verbatim. + +Custom packages can be installed to TD's Python site-packages directory. See TD documentation for the exact path per platform. + +## SOP Vertex/Point Access (TD 2025.32) + +In TD 2025.32, `td.Vertex` does NOT have `.x`, `.y`, `.z` attributes. Use index access: + +```python +# WRONG — crashes in TD 2025.32: +vertex.x, vertex.y, vertex.z + +# CORRECT — index/attribute access: +pt = sop.points()[i] +pos = pt.P # Position object +x, y, z = pos[0], pos[1], pos[2] + +# Always introspect first: +dir(sop.points()[0]) # see what attributes actually exist +dir(sop.points()[0].P) # see Position object interface +``` diff --git a/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md new file mode 100644 index 0000000000..b8e201f5c3 --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/references/troubleshooting.md @@ -0,0 +1,244 @@ +# TouchDesigner Troubleshooting (twozero MCP) + +> See `references/pitfalls.md` for the comprehensive lessons-learned list. + +## 1. Connection Issues + +### Port 40404 not responding + +Check these in order: + +1. Is TouchDesigner running? + ```bash + pgrep TouchDesigner + ``` + +1b. Quick hub health check (no JSON-RPC needed): + A plain GET to the MCP URL returns instance info: + ``` + curl -s http://localhost:40404/mcp + ``` + Returns: `{"hub": true, "pid": ..., "instances": {"127.0.0.1_PID": {"project": "...", "tdVersion": "...", ...}}}` + If this returns JSON but `instances` is empty, TD is running but twozero hasn't registered yet. + +2. Is twozero installed in TD? + Open TD Palette Browser > twozero should be listed. If not, install it. + +3. Is MCP enabled in twozero settings? + In TD, open twozero preferences and confirm MCP server is toggled ON. + +4. Test the port directly: + ```bash + nc -z 127.0.0.1 40404 + ``` + +5. Test the MCP endpoint: + ```bash + curl -s http://localhost:40404/mcp + ``` + Should return JSON with hub info. If it does, the server is running. + +### Hub responds but no TD instances + +The twozero MCP hub is running but TD hasn't registered. Causes: +- TD project not loaded yet (still on splash screen) +- twozero COMP not initialized in the current project +- twozero version mismatch + +Fix: Open/reload a TD project that contains the twozero COMP. Use td_list_instances +to check which TD instances are registered. + +### Multi-instance setup + +twozero auto-assigns ports for multiple TD instances: +- First instance: 40404 +- Second instance: 40405 +- Third instance: 40406 +- etc. + +Use `td_list_instances` to discover all running instances and their ports. + +## 2. MCP Tool Errors + +### td_execute_python returns error + +The error message from td_execute_python often contains the Python traceback. +If it's unclear, use `td_read_textport` to see the full TD console output — +Python exceptions are always printed there. + +Common causes: +- Syntax error in the script +- Referencing a node that doesn't exist (op() returns None, then you call .par on None) +- Using wrong parameter names (see pitfalls.md) + +### td_set_operator_pars fails + +Parameter name mismatch is the #1 cause. The tool validates param names and +returns clear errors, but you must use exact names. + +Fix: ALWAYS call `td_get_par_info` first to discover the real parameter names: +``` +td_get_par_info(op_type='glslTOP') +td_get_par_info(op_type='noiseTOP') +``` + +### td_create_operator type name errors + +Operator type names use camelCase with family suffix: +- CORRECT: noiseTOP, glslTOP, levelTOP, compositeTOP, audiospectrumCHOP +- WRONG: NoiseTOP, noise_top, NOISE TOP, Noise + +### td_get_operator_info for deep inspection + +If unsure about any aspect of an operator (params, inputs, outputs, state): +``` +td_get_operator_info(path='/project1/noise1', detail='full') +``` + +## 3. Parameter Discovery + +CRITICAL: ALWAYS use td_get_par_info to discover parameter names. + +The agent's LLM training data contains WRONG parameter names for TouchDesigner. +Do not trust them. Known wrong names include dat vs pixeldat, colora vs alpha, +sizex vs size, and many more. See pitfalls.md for the full list. + +Workflow: +1. td_get_par_info(op_type='glslTOP') — get all params for a type +2. td_get_operator_info(path='/project1/mynode', detail='full') — get params for a specific instance +3. Use ONLY the names returned by these tools + +## 4. Performance + +### Diagnosing slow performance + +Use `td_get_perf` to see which operators are slow. Look at cook times — +anything over 1ms per frame is worth investigating. + +Common causes: +- Resolution too high (especially on Non-Commercial) +- Complex GLSL shaders +- Too many TOP-to-CHOP or CHOP-to-TOP transfers (GPU-CPU memory copies) +- Feedback loops without decay (values accumulate, memory grows) + +### Non-Commercial license restrictions + +- Resolution cap: 1280x1280. Setting resolutionw=1920 silently clamps to 1280. +- H.264/H.265/AV1 encoding requires Commercial license. Use ProRes or Hap instead. +- No commercial use of output. + +Always check effective resolution after creation: +```python +n.cook(force=True) +actual = str(n.width) + 'x' + str(n.height) +``` + +## 5. Hermes Configuration + +### Config location + +`$HERMES_HOME/config.yaml` (defaults to `~/.hermes/config.yaml` when `HERMES_HOME` is unset) + +### MCP entry format + +The twozero TD entry should look like: +```yaml +mcpServers: + twozero_td: + url: http://localhost:40404/mcp +``` + +### After config changes + +Restart the Hermes session for changes to take effect. The MCP connection is +established at session startup. + +### Verifying MCP tools are available + +After restarting, the session log should show twozero MCP tools registered. +If tools show as registered but aren't callable, check: +- The twozero MCP hub is still running (curl test above) +- TD is still running with a project loaded +- No firewall blocking localhost:40404 + +## 6. Node Creation Issues + +### "Node type not found" error + +Wrong type string. Use camelCase with family suffix: +- Wrong: NoiseTop, noise_top, NOISE TOP +- Right: noiseTOP + +### Node created but not visible + +Check parentPath — use absolute paths like /project1. The default project +root is /project1. System nodes live at /, /ui, /sys, /local, /perform. +Don't create user nodes outside /project1. + +### Cannot create node inside a non-COMP + +Only COMP operators (Container, Base, Geometry, etc.) can contain children. +You cannot create nodes inside a TOP, CHOP, SOP, DAT, or MAT. + +## 7. Wiring Issues + +### Cross-family wiring + +TOPs connect to TOPs, CHOPs to CHOPs, SOPs to SOPs, DATs to DATs. +Use converter operators to bridge: choptoTOP, topToCHOP, soptoDAT, etc. + +Note: choptoTOP has NO input connectors. Use par.chop reference instead: +```python +spec_tex.par.chop = resample_node # correct +# NOT: resample.outputConnectors[0].connect(spec_tex.inputConnectors[0]) +``` + +### Feedback loops + +Never create A -> B -> A directly. Use a Feedback TOP: +```python +fb = root.create(feedbackTOP, 'fb') +fb.par.top = comp.path # reference only, no wire to fb input +fb.outputConnectors[0].connect(next_node) +``` +"Cook dependency loop detected" warning on the chain is expected and correct. + +## 8. GLSL Issues + +### Shader compilation errors are silent + +GLSL TOP shows a yellow warning in the UI but node.errors() may return empty. +Check node.warnings() too. Create an Info DAT pointed at the GLSL TOP for +full compiler output. + +### TD GLSL specifics + +- Uses GLSL 4.60 (Vulkan backend). GLSL 3.30 and earlier removed. +- UV coordinates: vUV.st (not gl_FragCoord) +- Input textures: sTD2DInputs[0] +- Output: layout(location = 0) out vec4 fragColor +- macOS CRITICAL: Always wrap output with TDOutputSwizzle(color) +- No built-in time uniform. Pass time via GLSL TOP Values page or Constant TOP. + +## 9. Recording Issues + +### H.264/H.265/AV1 requires Commercial license + +Use Apple ProRes on macOS (hardware accelerated, not license-restricted): +```python +rec.par.videocodec = 'prores' # Preferred on macOS — lossless, Non-Commercial OK +# rec.par.videocodec = 'mjpa' # Fallback — lossy, works everywhere +``` + +### MovieFileOut has no .record() method + +Use the toggle parameter: +```python +rec.par.record = True # start +rec.par.record = False # stop +``` + +### All exported frames identical + +TOP.save() captures same frame when called rapidly. Use MovieFileOut for +real-time recording. Set project.realTime = False for frame-accurate output. diff --git a/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh new file mode 100644 index 0000000000..15dc662c1c --- /dev/null +++ b/optional-skills/creative/touchdesigner-mcp/scripts/setup.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# setup.sh — Automated setup for twozero MCP plugin for TouchDesigner +# Idempotent: safe to run multiple times. +set -euo pipefail + +GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m' +OK="${GREEN}✔${NC}"; FAIL="${RED}✘${NC}"; WARN="${YELLOW}⚠${NC}" + +TWOZERO_URL="https://www.404zero.com/pisang/twozero.tox" +TOX_PATH="$HOME/Downloads/twozero.tox" +HERMES_HOME_DIR="${HERMES_HOME:-$HOME/.hermes}" +HERMES_CFG="${HERMES_HOME_DIR}/config.yaml" +MCP_PORT=40404 +MCP_ENDPOINT="http://localhost:${MCP_PORT}/mcp" + +manual_steps=() + +echo -e "\n${CYAN}═══ twozero MCP for TouchDesigner — Setup ═══${NC}\n" + +# ── 1. Check if TouchDesigner is running ── +# Match on process *name* (not full cmdline) to avoid self-matching shells +# that happen to have "TouchDesigner" in their args. macOS and Linux pgrep +# both support -x for exact name match. +if pgrep -x TouchDesigner >/dev/null 2>&1 || pgrep -x TouchDesignerFTE >/dev/null 2>&1; then + echo -e " ${OK} TouchDesigner is running" + td_running=true +else + echo -e " ${WARN} TouchDesigner is not running" + td_running=false +fi + +# ── 2. Ensure twozero.tox exists ── +if [[ -f "$TOX_PATH" ]]; then + echo -e " ${OK} twozero.tox already exists at ${TOX_PATH}" +else + echo -e " ${WARN} twozero.tox not found — downloading..." + if curl -fSL -o "$TOX_PATH" "$TWOZERO_URL" 2>/dev/null; then + echo -e " ${OK} Downloaded twozero.tox to ${TOX_PATH}" + else + echo -e " ${FAIL} Failed to download twozero.tox from ${TWOZERO_URL}" + echo " Please download manually and place at ${TOX_PATH}" + manual_steps+=("Download twozero.tox from ${TWOZERO_URL} to ${TOX_PATH}") + fi +fi + +# ── 3. Ensure Hermes config has twozero_td MCP entry ── +if [[ ! -f "$HERMES_CFG" ]]; then + echo -e " ${FAIL} Hermes config not found at ${HERMES_CFG}" + manual_steps+=("Create ${HERMES_CFG} with twozero_td MCP server entry") +elif grep -q 'twozero_td' "$HERMES_CFG" 2>/dev/null; then + echo -e " ${OK} twozero_td MCP entry exists in Hermes config" +else + echo -e " ${WARN} Adding twozero_td MCP entry to Hermes config..." + python3 -c " +import yaml, sys, copy + +cfg_path = '$HERMES_CFG' +with open(cfg_path, 'r') as f: + cfg = yaml.safe_load(f) or {} + +if 'mcp_servers' not in cfg: + cfg['mcp_servers'] = {} + +if 'twozero_td' not in cfg['mcp_servers']: + cfg['mcp_servers']['twozero_td'] = { + 'url': '${MCP_ENDPOINT}', + 'timeout': 120, + 'connect_timeout': 60 + } + with open(cfg_path, 'w') as f: + yaml.dump(cfg, f, default_flow_style=False, sort_keys=False) +" 2>/dev/null && echo -e " ${OK} twozero_td MCP entry added to config" \ + || { echo -e " ${FAIL} Could not update config (is PyYAML installed?)"; \ + manual_steps+=("Add twozero_td MCP entry to ${HERMES_CFG} manually"); } + manual_steps+=("Restart Hermes session to pick up config change") +fi + +# ── 4. Test if MCP port is responding ── +if nc -z 127.0.0.1 "$MCP_PORT" 2>/dev/null; then + echo -e " ${OK} Port ${MCP_PORT} is open" + + # ── 5. Verify MCP endpoint responds ── + resp=$(curl -s --max-time 3 "$MCP_ENDPOINT" 2>/dev/null || true) + if [[ -n "$resp" ]]; then + echo -e " ${OK} MCP endpoint responded at ${MCP_ENDPOINT}" + else + echo -e " ${WARN} Port open but MCP endpoint returned empty response" + manual_steps+=("Verify MCP is enabled in twozero settings") + fi +else + echo -e " ${WARN} Port ${MCP_PORT} is not open" + if [[ "$td_running" == true ]]; then + manual_steps+=("In TD: drag twozero.tox into network editor → click Install") + manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes") + else + manual_steps+=("Launch TouchDesigner") + manual_steps+=("Drag twozero.tox into the TD network editor and click Install") + manual_steps+=("Enable MCP: twozero icon → Settings → mcp → 'auto start MCP' → Yes") + fi +fi + +# ── Status Report ── +echo -e "\n${CYAN}═══ Status Report ═══${NC}\n" + +if [[ ${#manual_steps[@]} -eq 0 ]]; then + echo -e " ${OK} ${GREEN}Fully configured! twozero MCP is ready to use.${NC}\n" + exit 0 +else + echo -e " ${WARN} ${YELLOW}Manual steps remaining:${NC}\n" + for i in "${!manual_steps[@]}"; do + echo -e " $((i+1)). ${manual_steps[$i]}" + done + echo "" + exit 1 +fi diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md similarity index 100% rename from skills/mcp/mcporter/SKILL.md rename to optional-skills/mcp/mcporter/SKILL.md diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md similarity index 100% rename from skills/mlops/models/clip/SKILL.md rename to optional-skills/mlops/clip/SKILL.md diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md similarity index 100% rename from skills/mlops/models/clip/references/applications.md rename to optional-skills/mlops/clip/references/applications.md diff --git a/skills/mlops/inference/guidance/SKILL.md b/optional-skills/mlops/guidance/SKILL.md similarity index 100% rename from skills/mlops/inference/guidance/SKILL.md rename to optional-skills/mlops/guidance/SKILL.md diff --git a/skills/mlops/inference/guidance/references/backends.md b/optional-skills/mlops/guidance/references/backends.md similarity index 100% rename from skills/mlops/inference/guidance/references/backends.md rename to optional-skills/mlops/guidance/references/backends.md diff --git a/skills/mlops/inference/guidance/references/constraints.md b/optional-skills/mlops/guidance/references/constraints.md similarity index 100% rename from skills/mlops/inference/guidance/references/constraints.md rename to optional-skills/mlops/guidance/references/constraints.md diff --git a/skills/mlops/inference/guidance/references/examples.md b/optional-skills/mlops/guidance/references/examples.md similarity index 100% rename from skills/mlops/inference/guidance/references/examples.md rename to optional-skills/mlops/guidance/references/examples.md diff --git a/optional-skills/mlops/hermes-atropos-environments/SKILL.md b/optional-skills/mlops/hermes-atropos-environments/SKILL.md index 9dff466876..5101886b41 100644 --- a/optional-skills/mlops/hermes-atropos-environments/SKILL.md +++ b/optional-skills/mlops/hermes-atropos-environments/SKILL.md @@ -7,7 +7,7 @@ license: MIT metadata: hermes: tags: [atropos, rl, environments, training, reinforcement-learning, reward-functions] - related_skills: [axolotl, grpo-rl-training, trl-fine-tuning, lm-evaluation-harness] + related_skills: [axolotl, fine-tuning-with-trl, lm-evaluation-harness] --- # Hermes Agent Atropos Environments diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md similarity index 100% rename from skills/mlops/cloud/modal/SKILL.md rename to optional-skills/mlops/modal/SKILL.md diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md similarity index 100% rename from skills/mlops/cloud/modal/references/advanced-usage.md rename to optional-skills/mlops/modal/references/advanced-usage.md diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md similarity index 100% rename from skills/mlops/cloud/modal/references/troubleshooting.md rename to optional-skills/mlops/modal/references/troubleshooting.md diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md similarity index 100% rename from skills/mlops/training/peft/SKILL.md rename to optional-skills/mlops/peft/SKILL.md diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md similarity index 100% rename from skills/mlops/training/peft/references/advanced-usage.md rename to optional-skills/mlops/peft/references/advanced-usage.md diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md similarity index 100% rename from skills/mlops/training/peft/references/troubleshooting.md rename to optional-skills/mlops/peft/references/troubleshooting.md diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/SKILL.md rename to optional-skills/mlops/pytorch-fsdp/SKILL.md diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/references/index.md rename to optional-skills/mlops/pytorch-fsdp/references/index.md diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/references/other.md rename to optional-skills/mlops/pytorch-fsdp/references/other.md diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md similarity index 100% rename from skills/mlops/models/stable-diffusion/SKILL.md rename to optional-skills/mlops/stable-diffusion/SKILL.md diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md similarity index 100% rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md similarity index 100% rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md similarity index 100% rename from skills/mlops/models/whisper/SKILL.md rename to optional-skills/mlops/whisper/SKILL.md diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md similarity index 100% rename from skills/mlops/models/whisper/references/languages.md rename to optional-skills/mlops/whisper/references/languages.md diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md index c74a369209..6c457592a9 100644 --- a/optional-skills/productivity/telephony/SKILL.md +++ b/optional-skills/productivity/telephony/SKILL.md @@ -7,7 +7,7 @@ license: MIT metadata: hermes: tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting] - related_skills: [find-nearby, google-workspace, agentmail] + related_skills: [maps, google-workspace, agentmail] category: productivity --- diff --git a/optional-skills/research/duckduckgo-search/SKILL.md b/optional-skills/research/duckduckgo-search/SKILL.md index ea14e6b30f..c24fc1b956 100644 --- a/optional-skills/research/duckduckgo-search/SKILL.md +++ b/optional-skills/research/duckduckgo-search/SKILL.md @@ -57,32 +57,32 @@ Use the `ddgs` command via `terminal` when it exists. This is the preferred path ```bash # Text search -ddgs text -k "python async programming" -m 5 +ddgs text -q "python async programming" -m 5 # News search -ddgs news -k "artificial intelligence" -m 5 +ddgs news -q "artificial intelligence" -m 5 # Image search -ddgs images -k "landscape photography" -m 10 +ddgs images -q "landscape photography" -m 10 # Video search -ddgs videos -k "python tutorial" -m 5 +ddgs videos -q "python tutorial" -m 5 # With region filter -ddgs text -k "best restaurants" -m 5 -r us-en +ddgs text -q "best restaurants" -m 5 -r us-en # Recent results only (d=day, w=week, m=month, y=year) -ddgs text -k "latest AI news" -m 5 -t w +ddgs text -q "latest AI news" -m 5 -t w # JSON output for parsing -ddgs text -k "fastapi tutorial" -m 5 -o json +ddgs text -q "fastapi tutorial" -m 5 -o json ``` ### CLI Flags | Flag | Description | Example | |------|-------------|---------| -| `-k` | Keywords (query) — **required** | `-k "search terms"` | +| `-q` | Query — **required** | `-q "search terms"` | | `-m` | Max results | `-m 5` | | `-r` | Region | `-r us-en` | | `-t` | Time limit | `-t w` (week) | @@ -189,7 +189,7 @@ DuckDuckGo returns titles, URLs, and snippets — not full page content. To get CLI example: ```bash -ddgs text -k "fastapi deployment guide" -m 3 -o json +ddgs text -q "fastapi deployment guide" -m 3 -o json ``` Python example, only after verifying `ddgs` is installed in that runtime: @@ -229,7 +229,7 @@ Then extract the best URL with `web_extract` or another content-retrieval tool. - **Do not assume the CLI exists**: Check `command -v ddgs` before using it. - **Do not assume `execute_code` can import `ddgs`**: `from ddgs import DDGS` may fail with `ModuleNotFoundError` unless that runtime was prepared separately. - **Package name**: The package is `ddgs` (previously `duckduckgo-search`). Install with `pip install ddgs`. -- **Don't confuse `-k` and `-m`** (CLI): `-k` is for keywords, `-m` is for max results count. +- **Don't confuse `-q` and `-m`** (CLI): `-q` is for the query, `-m` is for max results count. - **Empty results**: If `ddgs` returns nothing, it may be rate-limited. Wait a few seconds and retry. ## Validated With diff --git a/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh index b33ac8a60d..1553d45968 100755 --- a/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh +++ b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh @@ -25,4 +25,4 @@ if ! command -v ddgs &> /dev/null; then exit 1 fi -ddgs text -k "$QUERY" -m "$MAX_RESULTS" +ddgs text -q "$QUERY" -m "$MAX_RESULTS" diff --git a/plans/gemini-oauth-provider.md b/plans/gemini-oauth-provider.md index 9953d0eca5..a466183e80 100644 --- a/plans/gemini-oauth-provider.md +++ b/plans/gemini-oauth-provider.md @@ -4,7 +4,7 @@ Add a first-class `gemini` provider that authenticates via Google OAuth, using the standard Gemini API (not Cloud Code Assist). Users who have a Google AI subscription or Gemini API access can authenticate through the browser without needing to manually copy API keys. ## Architecture Decision -- **Path A (chosen):** Standard Gemini API at `generativelanguage.googleapis.com/v1beta/openai/` +- **Path A (chosen):** Standard Gemini API at `generativelanguage.googleapis.com/v1beta` - **NOT Path B:** Cloud Code Assist (`cloudcode-pa.googleapis.com`) — rate-limited free tier, internal API, account ban risk - Standard `chat_completions` api_mode via OpenAI SDK — no new api_mode needed - Our own OAuth credentials — NOT sharing tokens with Gemini CLI @@ -32,9 +32,9 @@ Add a first-class `gemini` provider that authenticates via Google OAuth, using t - File locking for concurrent access (multiple agent sessions) ## API Integration -- Base URL: `https://generativelanguage.googleapis.com/v1beta/openai/` -- Auth: `Authorization: Bearer ` (passed as `api_key` to OpenAI SDK) -- api_mode: `chat_completions` (standard) +- Base URL: `https://generativelanguage.googleapis.com/v1beta` +- Auth: native Gemini API authentication handled by the provider adapter +- api_mode: `chat_completions` (standard facade over native transport) - Models: gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, etc. ## Files to Create/Modify diff --git a/plugins/disk-cleanup/README.md b/plugins/disk-cleanup/README.md new file mode 100644 index 0000000000..bc46047325 --- /dev/null +++ b/plugins/disk-cleanup/README.md @@ -0,0 +1,51 @@ +# disk-cleanup + +Auto-tracks and cleans up ephemeral files created during Hermes Agent +sessions — test scripts, temp outputs, cron logs, stale chrome profiles. +Scoped strictly to `$HERMES_HOME` and `/tmp/hermes-*`. + +Originally contributed by [@LVT382009](https://github.com/LVT382009) as a +skill in PR #12212. Ported to the plugin system so the behaviour runs +automatically via `post_tool_call` and `on_session_end` hooks — the agent +never needs to remember to call a tool. + +## How it works + +| Hook | Behaviour | +|---|---| +| `post_tool_call` | When `write_file` / `terminal` / `patch` creates a file matching `test_*`, `tmp_*`, or `*.test.*` inside `HERMES_HOME`, track it silently as `test` / `temp` / `cron-output`. | +| `on_session_end` | If any test files were auto-tracked during this turn, run `quick` cleanup (no prompts). | + +Deletion rules (same as the original PR): + +| Category | Threshold | Confirmation | +|---|---|---| +| `test` | every session end | Never | +| `temp` | >7 days since tracked | Never | +| `cron-output` | >14 days since tracked | Never | +| empty dirs under HERMES_HOME | always | Never | +| `research` | >30 days, beyond 10 newest | Always (deep only) | +| `chrome-profile` | >14 days since tracked | Always (deep only) | +| files >500 MB | never auto | Always (deep only) | + +## Slash command + +``` +/disk-cleanup status # breakdown + top-10 largest +/disk-cleanup dry-run # preview without deleting +/disk-cleanup quick # run safe cleanup now +/disk-cleanup deep # quick + list items needing prompt +/disk-cleanup track # manual tracking +/disk-cleanup forget # stop tracking +``` + +## Safety + +- `is_safe_path()` rejects anything outside `HERMES_HOME` or `/tmp/hermes-*` +- Windows mounts (`/mnt/c` etc.) are rejected +- The state directory `$HERMES_HOME/disk-cleanup/` is itself excluded +- `$HERMES_HOME/logs/`, `memories/`, `sessions/`, `skills/`, `plugins/`, + and config files are never tracked +- Backup/restore is scoped to `tracked.json` — the plugin never touches + agent logs +- Atomic writes: `.tmp` → backup → rename diff --git a/plugins/disk-cleanup/__init__.py b/plugins/disk-cleanup/__init__.py new file mode 100644 index 0000000000..0a4b6c7ae1 --- /dev/null +++ b/plugins/disk-cleanup/__init__.py @@ -0,0 +1,316 @@ +"""disk-cleanup plugin — auto-cleanup of ephemeral Hermes session files. + +Wires three behaviours: + +1. ``post_tool_call`` hook — inspects ``write_file`` and ``terminal`` + tool results for newly-created paths matching test/temp patterns + under ``HERMES_HOME`` and tracks them silently. Zero agent + compliance required. + +2. ``on_session_end`` hook — when any test files were auto-tracked + during the just-finished turn, runs :func:`disk_cleanup.quick` and + logs a single line to ``$HERMES_HOME/disk-cleanup/cleanup.log``. + +3. ``/disk-cleanup`` slash command — manual ``status``, ``dry-run``, + ``quick``, ``deep``, ``track``, ``forget``. + +Replaces PR #12212's skill-plus-script design: the agent no longer +needs to remember to run commands. +""" + +from __future__ import annotations + +import logging +import re +import shlex +import threading +from pathlib import Path +from typing import Any, Dict, Optional, Set + +from . import disk_cleanup as dg + +logger = logging.getLogger(__name__) + + +# Per-task set of "test files newly tracked this turn". Keyed by task_id +# (or session_id as fallback) so on_session_end can decide whether to run +# cleanup. Guarded by a lock — post_tool_call can fire concurrently on +# parallel tool calls. +_recent_test_tracks: Dict[str, Set[str]] = {} +_lock = threading.Lock() + + +# Tool-call result shapes we can parse +_WRITE_FILE_PATH_KEY = "path" +_TERMINAL_PATH_REGEX = re.compile(r"(?:^|\s)(/[^\s'\"`]+|\~/[^\s'\"`]+)") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _tracker_key(task_id: str, session_id: str) -> str: + return task_id or session_id or "default" + + +def _record_track(task_id: str, session_id: str, path: Path, category: str) -> None: + """Record that we tracked *path* as *category* during this turn.""" + if category != "test": + return + key = _tracker_key(task_id, session_id) + with _lock: + _recent_test_tracks.setdefault(key, set()).add(str(path)) + + +def _drain(task_id: str, session_id: str) -> Set[str]: + """Pop the set of test paths tracked during this turn.""" + key = _tracker_key(task_id, session_id) + with _lock: + return _recent_test_tracks.pop(key, set()) + + +def _attempt_track(path_str: str, task_id: str, session_id: str) -> None: + """Best-effort auto-track. Never raises.""" + try: + p = Path(path_str).expanduser() + except Exception: + return + if not p.exists(): + return + category = dg.guess_category(p) + if category is None: + return + newly = dg.track(str(p), category, silent=True) + if newly: + _record_track(task_id, session_id, p, category) + + +def _extract_paths_from_write_file(args: Dict[str, Any]) -> Set[str]: + path = args.get(_WRITE_FILE_PATH_KEY) + return {path} if isinstance(path, str) and path else set() + + +def _extract_paths_from_patch(args: Dict[str, Any]) -> Set[str]: + # The patch tool creates new files via the `mode="patch"` path too, but + # most of its use is editing existing files — we only care about new + # ephemeral creations, so treat patch conservatively and only pick up + # the single-file `path` arg. Track-then-cleanup is idempotent, so + # re-tracking an already-tracked file is a no-op (dedup in track()). + path = args.get("path") + return {path} if isinstance(path, str) and path else set() + + +def _extract_paths_from_terminal(args: Dict[str, Any], result: str) -> Set[str]: + """Best-effort: pull candidate filesystem paths from a terminal command + and its output, then let ``guess_category`` / ``is_safe_path`` filter. + """ + paths: Set[str] = set() + cmd = args.get("command") or "" + if isinstance(cmd, str) and cmd: + # Tokenise the command — catches `touch /tmp/hermes-x/test_foo.py` + try: + for tok in shlex.split(cmd, posix=True): + if tok.startswith(("/", "~")): + paths.add(tok) + except ValueError: + pass + # Only scan the result text if it's a reasonable size (avoid 50KB dumps). + if isinstance(result, str) and len(result) < 4096: + for match in _TERMINAL_PATH_REGEX.findall(result): + paths.add(match) + return paths + + +# --------------------------------------------------------------------------- +# Hooks +# --------------------------------------------------------------------------- + +def _on_post_tool_call( + tool_name: str = "", + args: Optional[Dict[str, Any]] = None, + result: Any = None, + task_id: str = "", + session_id: str = "", + tool_call_id: str = "", + **_: Any, +) -> None: + """Auto-track ephemeral files created by recent tool calls.""" + if not isinstance(args, dict): + return + + candidates: Set[str] = set() + if tool_name == "write_file": + candidates = _extract_paths_from_write_file(args) + elif tool_name == "patch": + candidates = _extract_paths_from_patch(args) + elif tool_name == "terminal": + candidates = _extract_paths_from_terminal(args, result if isinstance(result, str) else "") + else: + return + + for path_str in candidates: + _attempt_track(path_str, task_id, session_id) + + +def _on_session_end( + session_id: str = "", + completed: bool = True, + interrupted: bool = False, + **_: Any, +) -> None: + """Run quick cleanup if any test files were tracked during this turn.""" + # Drain both task-level and session-level buckets. In practice only one + # is populated per turn; the other is empty. + drained_session = _drain("", session_id) + # Also drain any task-scoped buckets that happen to exist. This is a + # cheap sweep: if an agent spawned subagents (each with their own + # task_id) they'll have recorded into separate buckets; we want to + # cleanup them all at session end. + with _lock: + task_buckets = list(_recent_test_tracks.keys()) + for key in task_buckets: + if key and key != session_id: + _recent_test_tracks.pop(key, None) + + if not drained_session and not task_buckets: + return + + try: + summary = dg.quick() + except Exception as exc: + logger.debug("disk-cleanup quick cleanup failed: %s", exc) + return + + if summary["deleted"] or summary["empty_dirs"]: + dg._log( + f"AUTO_QUICK (session_end): deleted={summary['deleted']} " + f"dirs={summary['empty_dirs']} freed={dg.fmt_size(summary['freed'])}" + ) + + +# --------------------------------------------------------------------------- +# Slash command +# --------------------------------------------------------------------------- + +_HELP_TEXT = """\ +/disk-cleanup — ephemeral-file cleanup + +Subcommands: + status Per-category breakdown + top-10 largest + dry-run Preview what quick/deep would delete + quick Run safe cleanup now (no prompts) + deep Run quick, then list items that need prompts + track Manually add a path to tracking + forget Stop tracking a path (does not delete) + +Categories: temp | test | research | download | chrome-profile | cron-output | other + +All operations are scoped to HERMES_HOME and /tmp/hermes-*. +Test files are auto-tracked on write_file / terminal and auto-cleaned at session end. +""" + + +def _fmt_summary(summary: Dict[str, Any]) -> str: + base = ( + f"[disk-cleanup] Cleaned {summary['deleted']} files + " + f"{summary['empty_dirs']} empty dirs, freed {dg.fmt_size(summary['freed'])}." + ) + if summary.get("errors"): + base += f"\n {len(summary['errors'])} error(s); see cleanup.log." + return base + + +def _handle_slash(raw_args: str) -> Optional[str]: + argv = raw_args.strip().split() + if not argv or argv[0] in ("help", "-h", "--help"): + return _HELP_TEXT + + sub = argv[0] + + if sub == "status": + return dg.format_status(dg.status()) + + if sub == "dry-run": + auto, prompt = dg.dry_run() + auto_size = sum(i["size"] for i in auto) + prompt_size = sum(i["size"] for i in prompt) + lines = [ + "Dry-run preview (nothing deleted):", + f" Auto-delete : {len(auto)} files ({dg.fmt_size(auto_size)})", + ] + for item in auto: + lines.append(f" [{item['category']}] {item['path']}") + lines.append( + f" Needs prompt: {len(prompt)} files ({dg.fmt_size(prompt_size)})" + ) + for item in prompt: + lines.append(f" [{item['category']}] {item['path']}") + lines.append( + f"\n Total potential: {dg.fmt_size(auto_size + prompt_size)}" + ) + return "\n".join(lines) + + if sub == "quick": + return _fmt_summary(dg.quick()) + + if sub == "deep": + # In-session deep can't prompt the user interactively — show what + # quick cleaned plus the items that WOULD need confirmation. + quick_summary = dg.quick() + _auto, prompt_items = dg.dry_run() + lines = [_fmt_summary(quick_summary)] + if prompt_items: + size = sum(i["size"] for i in prompt_items) + lines.append( + f"\n{len(prompt_items)} item(s) need confirmation " + f"({dg.fmt_size(size)}):" + ) + for item in prompt_items: + lines.append(f" [{item['category']}] {item['path']}") + lines.append( + "\nRun `/disk-cleanup forget ` to skip, or delete " + "manually via terminal." + ) + return "\n".join(lines) + + if sub == "track": + if len(argv) < 3: + return "Usage: /disk-cleanup track " + path_arg = argv[1] + category = argv[2] + if category not in dg.ALLOWED_CATEGORIES: + return ( + f"Unknown category '{category}'. " + f"Allowed: {sorted(dg.ALLOWED_CATEGORIES)}" + ) + if dg.track(path_arg, category, silent=True): + return f"Tracked {path_arg} as '{category}'." + return ( + f"Not tracked (already present, missing, or outside HERMES_HOME): " + f"{path_arg}" + ) + + if sub == "forget": + if len(argv) < 2: + return "Usage: /disk-cleanup forget " + n = dg.forget(argv[1]) + return ( + f"Removed {n} tracking entr{'y' if n == 1 else 'ies'} for {argv[1]}." + if n else f"Not found in tracking: {argv[1]}" + ) + + return f"Unknown subcommand: {sub}\n\n{_HELP_TEXT}" + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + +def register(ctx) -> None: + ctx.register_hook("post_tool_call", _on_post_tool_call) + ctx.register_hook("on_session_end", _on_session_end) + ctx.register_command( + "disk-cleanup", + handler=_handle_slash, + description="Track and clean up ephemeral Hermes session files.", + ) diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py new file mode 100755 index 0000000000..cef2698316 --- /dev/null +++ b/plugins/disk-cleanup/disk_cleanup.py @@ -0,0 +1,496 @@ +"""disk_cleanup — ephemeral file cleanup for Hermes Agent. + +Library module wrapping the deterministic cleanup rules written by +@LVT382009 in PR #12212. The plugin ``__init__.py`` wires these +functions into ``post_tool_call`` and ``on_session_end`` hooks so +tracking and cleanup happen automatically — the agent never needs to +call a tool or remember a skill. + +Rules: + - test files → delete immediately at task end (age >= 0) + - temp files → delete after 7 days + - cron-output → delete after 14 days + - empty dirs → always delete (under HERMES_HOME) + - research → keep 10 newest, prompt for older (deep only) + - chrome-profile→ prompt after 14 days (deep only) + - >500 MB files → prompt always (deep only) + +Scope: strictly HERMES_HOME and /tmp/hermes-* +Never touches: ~/.hermes/logs/ or any system directory. +""" + +from __future__ import annotations + +import json +import logging +import shutil +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +try: + from hermes_constants import get_hermes_home +except Exception: # pragma: no cover — plugin may load before constants resolves + import os + + def get_hermes_home() -> Path: # type: ignore[no-redef] + val = (os.environ.get("HERMES_HOME") or "").strip() + return Path(val).resolve() if val else (Path.home() / ".hermes").resolve() + + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +def get_state_dir() -> Path: + """State dir — separate from ``$HERMES_HOME/logs/``.""" + return get_hermes_home() / "disk-cleanup" + + +def get_tracked_file() -> Path: + return get_state_dir() / "tracked.json" + + +def get_log_file() -> Path: + """Audit log — intentionally NOT under ``$HERMES_HOME/logs/``.""" + return get_state_dir() / "cleanup.log" + + +# --------------------------------------------------------------------------- +# Path safety +# --------------------------------------------------------------------------- + +def is_safe_path(path: Path) -> bool: + """Accept only paths under HERMES_HOME or ``/tmp/hermes-*``. + + Rejects Windows mounts (``/mnt/c`` etc.) and any system directory. + """ + hermes_home = get_hermes_home() + try: + path.resolve().relative_to(hermes_home) + return True + except (ValueError, OSError): + pass + # Allow /tmp/hermes-* explicitly + parts = path.parts + if len(parts) >= 3 and parts[1] == "tmp" and parts[2].startswith("hermes-"): + return True + return False + + +# --------------------------------------------------------------------------- +# Audit log +# --------------------------------------------------------------------------- + +def _log(message: str) -> None: + try: + log_file = get_log_file() + log_file.parent.mkdir(parents=True, exist_ok=True) + ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + with open(log_file, "a") as f: + f.write(f"[{ts}] {message}\n") + except OSError: + # Never let the audit log break the agent loop. + pass + + +# --------------------------------------------------------------------------- +# tracked.json — atomic read/write, backup scoped to tracked.json only +# --------------------------------------------------------------------------- + +def load_tracked() -> List[Dict[str, Any]]: + """Load tracked.json. Restores from ``.bak`` on corruption.""" + tf = get_tracked_file() + tf.parent.mkdir(parents=True, exist_ok=True) + + if not tf.exists(): + return [] + + try: + return json.loads(tf.read_text()) + except (json.JSONDecodeError, ValueError): + bak = tf.with_suffix(".json.bak") + if bak.exists(): + try: + data = json.loads(bak.read_text()) + _log("WARN: tracked.json corrupted — restored from .bak") + return data + except Exception: + pass + _log("WARN: tracked.json corrupted, no backup — starting fresh") + return [] + + +def save_tracked(tracked: List[Dict[str, Any]]) -> None: + """Atomic write: ``.tmp`` → backup old → rename.""" + tf = get_tracked_file() + tf.parent.mkdir(parents=True, exist_ok=True) + tmp = tf.with_suffix(".json.tmp") + tmp.write_text(json.dumps(tracked, indent=2)) + if tf.exists(): + shutil.copy2(tf, tf.with_suffix(".json.bak")) + tmp.replace(tf) + + +# --------------------------------------------------------------------------- +# Categories +# --------------------------------------------------------------------------- + +ALLOWED_CATEGORIES = { + "temp", "test", "research", "download", + "chrome-profile", "cron-output", "other", +} + + +def fmt_size(n: float) -> str: + for unit in ("B", "KB", "MB", "GB", "TB"): + if n < 1024: + return f"{n:.1f} {unit}" + n /= 1024 + return f"{n:.1f} PB" + + +# --------------------------------------------------------------------------- +# Track / forget +# --------------------------------------------------------------------------- + +def track(path_str: str, category: str, silent: bool = False) -> bool: + """Register a file for tracking. Returns True if newly tracked.""" + if category not in ALLOWED_CATEGORIES: + _log(f"WARN: unknown category '{category}', using 'other'") + category = "other" + + path = Path(path_str).resolve() + + if not path.exists(): + _log(f"SKIP: {path} (does not exist)") + return False + + if not is_safe_path(path): + _log(f"REJECT: {path} (outside HERMES_HOME)") + return False + + size = path.stat().st_size if path.is_file() else 0 + tracked = load_tracked() + + # Deduplicate + if any(item["path"] == str(path) for item in tracked): + return False + + tracked.append({ + "path": str(path), + "timestamp": datetime.now(timezone.utc).isoformat(), + "category": category, + "size": size, + }) + save_tracked(tracked) + _log(f"TRACKED: {path} ({category}, {fmt_size(size)})") + if not silent: + print(f"Tracked: {path} ({category}, {fmt_size(size)})") + return True + + +def forget(path_str: str) -> int: + """Remove a path from tracking without deleting the file.""" + p = Path(path_str).resolve() + tracked = load_tracked() + before = len(tracked) + tracked = [i for i in tracked if Path(i["path"]).resolve() != p] + removed = before - len(tracked) + if removed: + save_tracked(tracked) + _log(f"FORGOT: {p} ({removed} entries)") + return removed + + +# --------------------------------------------------------------------------- +# Dry run +# --------------------------------------------------------------------------- + +def dry_run() -> Tuple[List[Dict], List[Dict]]: + """Return (auto_delete_list, needs_prompt_list) without touching files.""" + tracked = load_tracked() + now = datetime.now(timezone.utc) + + auto: List[Dict] = [] + prompt: List[Dict] = [] + + for item in tracked: + p = Path(item["path"]) + if not p.exists(): + continue + age = (now - datetime.fromisoformat(item["timestamp"])).days + cat = item["category"] + size = item["size"] + + if cat == "test": + auto.append(item) + elif cat == "temp" and age > 7: + auto.append(item) + elif cat == "cron-output" and age > 14: + auto.append(item) + elif cat == "research" and age > 30: + prompt.append(item) + elif cat == "chrome-profile" and age > 14: + prompt.append(item) + elif size > 500 * 1024 * 1024: + prompt.append(item) + + return auto, prompt + + +# --------------------------------------------------------------------------- +# Quick cleanup +# --------------------------------------------------------------------------- + +def quick() -> Dict[str, Any]: + """Safe deterministic cleanup — no prompts. + + Returns: ``{"deleted": N, "empty_dirs": N, "freed": bytes, + "errors": [str, ...]}``. + """ + tracked = load_tracked() + now = datetime.now(timezone.utc) + deleted = 0 + freed = 0 + new_tracked: List[Dict] = [] + errors: List[str] = [] + + for item in tracked: + p = Path(item["path"]) + cat = item["category"] + + if not p.exists(): + _log(f"STALE: {p} (removed from tracking)") + continue + + age = (now - datetime.fromisoformat(item["timestamp"])).days + + should_delete = ( + cat == "test" + or (cat == "temp" and age > 7) + or (cat == "cron-output" and age > 14) + ) + + if should_delete: + try: + if p.is_file(): + p.unlink() + elif p.is_dir(): + shutil.rmtree(p) + freed += item["size"] + deleted += 1 + _log(f"DELETED: {p} ({cat}, {fmt_size(item['size'])})") + except OSError as e: + _log(f"ERROR deleting {p}: {e}") + errors.append(f"{p}: {e}") + new_tracked.append(item) + else: + new_tracked.append(item) + + # Remove empty dirs under HERMES_HOME (but leave HERMES_HOME itself and + # a short list of well-known top-level state dirs alone — a fresh install + # has these empty, and deleting them would surprise the user). + hermes_home = get_hermes_home() + _PROTECTED_TOP_LEVEL = { + "logs", "memories", "sessions", "cron", "cronjobs", + "cache", "skills", "plugins", "disk-cleanup", "optional-skills", + "hermes-agent", "backups", "profiles", ".worktrees", + } + empty_removed = 0 + try: + for dirpath in sorted(hermes_home.rglob("*"), reverse=True): + if not dirpath.is_dir() or dirpath == hermes_home: + continue + try: + rel_parts = dirpath.relative_to(hermes_home).parts + except ValueError: + continue + # Skip the well-known top-level state dirs themselves. + if len(rel_parts) == 1 and rel_parts[0] in _PROTECTED_TOP_LEVEL: + continue + try: + if not any(dirpath.iterdir()): + dirpath.rmdir() + empty_removed += 1 + _log(f"DELETED: {dirpath} (empty dir)") + except OSError: + pass + except OSError: + pass + + save_tracked(new_tracked) + _log( + f"QUICK_SUMMARY: {deleted} files, {empty_removed} dirs, " + f"{fmt_size(freed)}" + ) + return { + "deleted": deleted, + "empty_dirs": empty_removed, + "freed": freed, + "errors": errors, + } + + +# --------------------------------------------------------------------------- +# Deep cleanup (interactive — not called from plugin hooks) +# --------------------------------------------------------------------------- + +def deep( + confirm: Optional[callable] = None, +) -> Dict[str, Any]: + """Deep cleanup. + + Runs :func:`quick` first, then asks the *confirm* callable for each + risky item (research > 30d beyond 10 newest, chrome-profile > 14d, + any file > 500 MB). *confirm(item)* must return True to delete. + + Returns: ``{"quick": {...}, "deep_deleted": N, "deep_freed": bytes}``. + """ + quick_result = quick() + + if confirm is None: + # No interactive confirmer — deep stops after the quick pass. + return {"quick": quick_result, "deep_deleted": 0, "deep_freed": 0} + + tracked = load_tracked() + now = datetime.now(timezone.utc) + research, chrome, large = [], [], [] + + for item in tracked: + p = Path(item["path"]) + if not p.exists(): + continue + age = (now - datetime.fromisoformat(item["timestamp"])).days + cat = item["category"] + + if cat == "research" and age > 30: + research.append(item) + elif cat == "chrome-profile" and age > 14: + chrome.append(item) + elif item["size"] > 500 * 1024 * 1024: + large.append(item) + + research.sort(key=lambda x: x["timestamp"], reverse=True) + old_research = research[10:] + + freed, count = 0, 0 + to_remove: List[Dict] = [] + + for group in (old_research, chrome, large): + for item in group: + if confirm(item): + try: + p = Path(item["path"]) + if p.is_file(): + p.unlink() + elif p.is_dir(): + shutil.rmtree(p) + to_remove.append(item) + freed += item["size"] + count += 1 + _log( + f"DELETED: {p} ({item['category']}, " + f"{fmt_size(item['size'])})" + ) + except OSError as e: + _log(f"ERROR deleting {item['path']}: {e}") + + if to_remove: + remove_paths = {i["path"] for i in to_remove} + save_tracked([i for i in tracked if i["path"] not in remove_paths]) + + return {"quick": quick_result, "deep_deleted": count, "deep_freed": freed} + + +# --------------------------------------------------------------------------- +# Status +# --------------------------------------------------------------------------- + +def status() -> Dict[str, Any]: + """Return per-category breakdown and top 10 largest tracked files.""" + tracked = load_tracked() + cats: Dict[str, Dict] = {} + for item in tracked: + c = item["category"] + cats.setdefault(c, {"count": 0, "size": 0}) + cats[c]["count"] += 1 + cats[c]["size"] += item["size"] + + existing = [ + (i["path"], i["size"], i["category"]) + for i in tracked if Path(i["path"]).exists() + ] + existing.sort(key=lambda x: x[1], reverse=True) + + return { + "categories": cats, + "top10": existing[:10], + "total_tracked": len(tracked), + } + + +def format_status(s: Dict[str, Any]) -> str: + """Human-readable status string (for slash command output).""" + lines = [f"{'Category':<20} {'Files':>6} {'Size':>10}", "-" * 40] + cats = s["categories"] + for cat, d in sorted(cats.items(), key=lambda x: x[1]["size"], reverse=True): + lines.append(f"{cat:<20} {d['count']:>6} {fmt_size(d['size']):>10}") + + if not cats: + lines.append("(nothing tracked yet)") + + lines.append("") + lines.append("Top 10 largest tracked files:") + if not s["top10"]: + lines.append(" (none)") + else: + for rank, (path, size, cat) in enumerate(s["top10"], 1): + lines.append(f" {rank:>2}. {fmt_size(size):>8} [{cat}] {path}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Auto-categorisation from tool-call inspection +# --------------------------------------------------------------------------- + +_TEST_PATTERNS = ("test_", "tmp_") +_TEST_SUFFIXES = (".test.py", ".test.js", ".test.ts", ".test.md") + + +def guess_category(path: Path) -> Optional[str]: + """Return a category label for *path*, or None if we shouldn't track it. + + Used by the ``post_tool_call`` hook to auto-track ephemeral files. + """ + if not is_safe_path(path): + return None + + # Skip the state dir itself, logs, memory files, sessions, config. + hermes_home = get_hermes_home() + try: + rel = path.resolve().relative_to(hermes_home) + top = rel.parts[0] if rel.parts else "" + if top in { + "disk-cleanup", "logs", "memories", "sessions", "config.yaml", + "skills", "plugins", ".env", "USER.md", "MEMORY.md", "SOUL.md", + "auth.json", "hermes-agent", + }: + return None + if top == "cron" or top == "cronjobs": + return "cron-output" + if top == "cache": + return "temp" + except ValueError: + # Path isn't under HERMES_HOME (e.g. /tmp/hermes-*) — fall through. + pass + + name = path.name + if name.startswith(_TEST_PATTERNS): + return "test" + if any(name.endswith(sfx) for sfx in _TEST_SUFFIXES): + return "test" + return None diff --git a/plugins/disk-cleanup/plugin.yaml b/plugins/disk-cleanup/plugin.yaml new file mode 100644 index 0000000000..fe005c8849 --- /dev/null +++ b/plugins/disk-cleanup/plugin.yaml @@ -0,0 +1,7 @@ +name: disk-cleanup +version: 2.0.0 +description: "Auto-track and clean up ephemeral files (test scripts, temp outputs, cron logs) created during Hermes sessions. Runs via plugin hooks — no agent action required." +author: "@LVT382009 (original), NousResearch (plugin port)" +hooks: + - post_tool_call + - on_session_end diff --git a/plugins/example-dashboard/dashboard/dist/index.js b/plugins/example-dashboard/dashboard/dist/index.js new file mode 100644 index 0000000000..a54916be41 --- /dev/null +++ b/plugins/example-dashboard/dashboard/dist/index.js @@ -0,0 +1,94 @@ +/** + * Example Dashboard Plugin + * + * Demonstrates how to build a dashboard plugin using the Hermes Plugin SDK. + * No build step needed — this is a plain IIFE that uses globals from the SDK. + */ +(function () { + "use strict"; + + const SDK = window.__HERMES_PLUGIN_SDK__; + const { React } = SDK; + const { Card, CardHeader, CardTitle, CardContent, Badge, Button } = SDK.components; + const { useState, useEffect } = SDK.hooks; + const { cn } = SDK.utils; + + function ExamplePage() { + const [greeting, setGreeting] = useState(null); + const [loading, setLoading] = useState(false); + + function fetchGreeting() { + setLoading(true); + SDK.fetchJSON("/api/plugins/example/hello") + .then(function (data) { setGreeting(data.message); }) + .catch(function () { setGreeting("(backend not available)"); }) + .finally(function () { setLoading(false); }); + } + + return React.createElement("div", { className: "flex flex-col gap-6" }, + // Header card + React.createElement(Card, null, + React.createElement(CardHeader, null, + React.createElement("div", { className: "flex items-center gap-3" }, + React.createElement(CardTitle, { className: "text-lg" }, "Example Plugin"), + React.createElement(Badge, { variant: "outline" }, "v1.0.0"), + ), + ), + React.createElement(CardContent, { className: "flex flex-col gap-4" }, + React.createElement("p", { className: "text-sm text-muted-foreground" }, + "This is an example dashboard plugin. It demonstrates using the Plugin SDK to build ", + "custom tabs with React components, connect to backend API routes, and integrate with ", + "the existing Hermes UI system.", + ), + React.createElement("div", { className: "flex items-center gap-3" }, + React.createElement(Button, { + onClick: fetchGreeting, + disabled: loading, + className: cn( + "inline-flex items-center gap-2 border border-border bg-background/40 px-4 py-2", + "text-sm font-courier transition-colors hover:bg-foreground/10 cursor-pointer", + ), + }, loading ? "Loading..." : "Call Backend API"), + greeting && React.createElement("span", { + className: "text-sm font-courier text-muted-foreground", + }, greeting), + ), + ), + ), + + // Info card about the SDK + React.createElement(Card, null, + React.createElement(CardHeader, null, + React.createElement(CardTitle, { className: "text-base" }, "Plugin SDK Reference"), + ), + React.createElement(CardContent, null, + React.createElement("div", { className: "grid gap-3 text-sm" }, + React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" }, + React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.React"), + React.createElement("span", { className: "text-muted-foreground text-xs" }, "React instance — use instead of importing react"), + ), + React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" }, + React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.hooks"), + React.createElement("span", { className: "text-muted-foreground text-xs" }, "useState, useEffect, useCallback, useMemo, useRef, useContext, createContext"), + ), + React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" }, + React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.components"), + React.createElement("span", { className: "text-muted-foreground text-xs" }, "Card, Badge, Button, Input, Label, Select, Separator, Tabs, etc."), + ), + React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" }, + React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.api"), + React.createElement("span", { className: "text-muted-foreground text-xs" }, "Hermes API client — getStatus(), getSessions(), etc."), + ), + React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" }, + React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.utils"), + React.createElement("span", { className: "text-muted-foreground text-xs" }, "cn(), timeAgo(), isoTimeAgo()"), + ), + ), + ), + ), + ); + } + + // Register this plugin — the dashboard picks it up automatically. + window.__HERMES_PLUGINS__.register("example", ExamplePage); +})(); diff --git a/plugins/example-dashboard/dashboard/manifest.json b/plugins/example-dashboard/dashboard/manifest.json new file mode 100644 index 0000000000..2111bff5e7 --- /dev/null +++ b/plugins/example-dashboard/dashboard/manifest.json @@ -0,0 +1,13 @@ +{ + "name": "example", + "label": "Example", + "description": "Example dashboard plugin — demonstrates the plugin SDK", + "icon": "Sparkles", + "version": "1.0.0", + "tab": { + "path": "/example", + "position": "after:skills" + }, + "entry": "dist/index.js", + "api": "plugin_api.py" +} diff --git a/plugins/example-dashboard/dashboard/plugin_api.py b/plugins/example-dashboard/dashboard/plugin_api.py new file mode 100644 index 0000000000..20aed76e26 --- /dev/null +++ b/plugins/example-dashboard/dashboard/plugin_api.py @@ -0,0 +1,14 @@ +"""Example dashboard plugin — backend API routes. + +Mounted at /api/plugins/example/ by the dashboard plugin system. +""" + +from fastapi import APIRouter + +router = APIRouter() + + +@router.get("/hello") +async def hello(): + """Simple greeting endpoint to demonstrate plugin API routes.""" + return {"message": "Hello from the example plugin!", "plugin": "example", "version": "1.0.0"} diff --git a/plugins/memory/__init__.py b/plugins/memory/__init__.py index cd583e6d8d..0ae65a25d5 100644 --- a/plugins/memory/__init__.py +++ b/plugins/memory/__init__.py @@ -1,18 +1,22 @@ """Memory provider plugin discovery. -Scans ``plugins/memory//`` directories for memory provider plugins. -Each subdirectory must contain ``__init__.py`` with a class implementing -the MemoryProvider ABC. +Scans two directories for memory provider plugins: -Memory providers are separate from the general plugin system — they live -in the repo and are always available without user installation. Only ONE -can be active at a time, selected via ``memory.provider`` in config.yaml. +1. Bundled providers: ``plugins/memory//`` (shipped with hermes-agent) +2. User-installed providers: ``$HERMES_HOME/plugins//`` + +Each subdirectory must contain ``__init__.py`` with a class implementing +the MemoryProvider ABC. On name collisions, bundled providers take +precedence. + +Only ONE provider can be active at a time, selected via +``memory.provider`` in config.yaml. Usage: from plugins.memory import discover_memory_providers, load_memory_provider available = discover_memory_providers() # [(name, desc, available), ...] - provider = load_memory_provider("openviking") # MemoryProvider instance + provider = load_memory_provider("mnemosyne") # MemoryProvider instance """ from __future__ import annotations @@ -29,24 +33,101 @@ logger = logging.getLogger(__name__) _MEMORY_PLUGINS_DIR = Path(__file__).parent +# --------------------------------------------------------------------------- +# Directory helpers +# --------------------------------------------------------------------------- + +def _get_user_plugins_dir() -> Optional[Path]: + """Return ``$HERMES_HOME/plugins/`` or None if unavailable.""" + try: + from hermes_constants import get_hermes_home + d = get_hermes_home() / "plugins" + return d if d.is_dir() else None + except Exception: + return None + + +def _is_memory_provider_dir(path: Path) -> bool: + """Heuristic: does *path* look like a memory provider plugin? + + Checks for ``register_memory_provider`` or ``MemoryProvider`` in the + ``__init__.py`` source. Cheap text scan — no import needed. + """ + init_file = path / "__init__.py" + if not init_file.exists(): + return False + try: + source = init_file.read_text(errors="replace")[:8192] + return "register_memory_provider" in source or "MemoryProvider" in source + except Exception: + return False + + +def _iter_provider_dirs() -> List[Tuple[str, Path]]: + """Yield ``(name, path)`` for all discovered provider directories. + + Scans bundled first, then user-installed. Bundled takes precedence + on name collisions (first-seen wins via ``seen`` set). + """ + seen: set = set() + dirs: List[Tuple[str, Path]] = [] + + # 1. Bundled providers (plugins/memory//) + if _MEMORY_PLUGINS_DIR.is_dir(): + for child in sorted(_MEMORY_PLUGINS_DIR.iterdir()): + if not child.is_dir() or child.name.startswith(("_", ".")): + continue + if not (child / "__init__.py").exists(): + continue + seen.add(child.name) + dirs.append((child.name, child)) + + # 2. User-installed providers ($HERMES_HOME/plugins//) + user_dir = _get_user_plugins_dir() + if user_dir: + for child in sorted(user_dir.iterdir()): + if not child.is_dir() or child.name.startswith(("_", ".")): + continue + if child.name in seen: + continue # bundled takes precedence + if not _is_memory_provider_dir(child): + continue # skip non-memory plugins + dirs.append((child.name, child)) + + return dirs + + +def find_provider_dir(name: str) -> Optional[Path]: + """Resolve a provider name to its directory. + + Checks bundled first, then user-installed. + """ + # Bundled + bundled = _MEMORY_PLUGINS_DIR / name + if bundled.is_dir() and (bundled / "__init__.py").exists(): + return bundled + # User-installed + user_dir = _get_user_plugins_dir() + if user_dir: + user = user_dir / name + if user.is_dir() and _is_memory_provider_dir(user): + return user + return None + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + def discover_memory_providers() -> List[Tuple[str, str, bool]]: - """Scan plugins/memory/ for available providers. + """Scan bundled and user-installed directories for available providers. Returns list of (name, description, is_available) tuples. - Does NOT import the providers — just reads plugin.yaml for metadata - and does a lightweight availability check. + Bundled providers take precedence on name collisions. """ results = [] - if not _MEMORY_PLUGINS_DIR.is_dir(): - return results - - for child in sorted(_MEMORY_PLUGINS_DIR.iterdir()): - if not child.is_dir() or child.name.startswith(("_", ".")): - continue - init_file = child / "__init__.py" - if not init_file.exists(): - continue + for name, child in _iter_provider_dirs(): # Read description from plugin.yaml if available desc = "" yaml_file = child / "plugin.yaml" @@ -70,7 +151,7 @@ def discover_memory_providers() -> List[Tuple[str, str, bool]]: except Exception: available = False - results.append((child.name, desc, available)) + results.append((name, desc, available)) return results @@ -78,11 +159,15 @@ def discover_memory_providers() -> List[Tuple[str, str, bool]]: def load_memory_provider(name: str) -> Optional["MemoryProvider"]: """Load and return a MemoryProvider instance by name. + Checks both bundled (``plugins/memory//``) and user-installed + (``$HERMES_HOME/plugins//``) directories. Bundled takes + precedence on name collisions. + Returns None if the provider is not found or fails to load. """ - provider_dir = _MEMORY_PLUGINS_DIR / name - if not provider_dir.is_dir(): - logger.debug("Memory provider '%s' not found in %s", name, _MEMORY_PLUGINS_DIR) + provider_dir = find_provider_dir(name) + if not provider_dir: + logger.debug("Memory provider '%s' not found in bundled or user plugins", name) return None try: @@ -104,7 +189,10 @@ def _load_provider_from_dir(provider_dir: Path) -> Optional["MemoryProvider"]: - A top-level class that extends MemoryProvider — we instantiate it """ name = provider_dir.name - module_name = f"plugins.memory.{name}" + # Use a separate namespace for user-installed plugins so they don't + # collide with bundled providers in sys.modules. + _is_bundled = _MEMORY_PLUGINS_DIR in provider_dir.parents or provider_dir.parent == _MEMORY_PLUGINS_DIR + module_name = f"plugins.memory.{name}" if _is_bundled else f"_hermes_user_memory.{name}" init_file = provider_dir / "__init__.py" if not init_file.exists(): @@ -257,15 +345,16 @@ def discover_plugin_cli_commands() -> List[dict]: return results # Only look at the active provider's directory - plugin_dir = _MEMORY_PLUGINS_DIR / active_provider - if not plugin_dir.is_dir(): + plugin_dir = find_provider_dir(active_provider) + if not plugin_dir: return results cli_file = plugin_dir / "cli.py" if not cli_file.exists(): return results - module_name = f"plugins.memory.{active_provider}.cli" + _is_bundled = _MEMORY_PLUGINS_DIR in plugin_dir.parents or plugin_dir.parent == _MEMORY_PLUGINS_DIR + module_name = f"plugins.memory.{active_provider}.cli" if _is_bundled else f"_hermes_user_memory.{active_provider}.cli" try: # Import the CLI module (lightweight — no SDK needed) if module_name in sys.modules: diff --git a/plugins/memory/honcho/README.md b/plugins/memory/honcho/README.md index 80cc5a70aa..4f8d10ea9e 100644 --- a/plugins/memory/honcho/README.md +++ b/plugins/memory/honcho/README.md @@ -1,6 +1,6 @@ # Honcho Memory Provider -AI-native cross-session user modeling with dialectic Q&A, semantic search, peer cards, and persistent conclusions. +AI-native cross-session user modeling with multi-pass dialectic reasoning, session summaries, bidirectional peer tools, and persistent conclusions. > **Honcho docs:** @@ -19,9 +19,86 @@ hermes memory setup # generic picker, also works Or manually: ```bash hermes config set memory.provider honcho -echo "HONCHO_API_KEY=your-key" >> ~/.hermes/.env +echo "HONCHO_API_KEY=***" >> ~/.hermes/.env ``` +## Architecture Overview + +### Two-Layer Context Injection + +Context is injected into the **user message** at API-call time (not the system prompt) to preserve prompt caching. Only a static mode header goes in the system prompt. The injected block is wrapped in `` fences with a system note clarifying it's background data, not new user input. + +Two independent layers, each on its own cadence: + +**Layer 1 — Base context** (refreshed every `contextCadence` turns): +1. **SESSION SUMMARY** — from `session.context(summary=True)`, placed first +2. **User Representation** — Honcho's evolving model of the user +3. **User Peer Card** — key facts snapshot +4. **AI Self-Representation** — Honcho's model of the AI peer +5. **AI Identity Card** — AI peer facts + +**Layer 2 — Dialectic supplement** (fired every `dialecticCadence` turns): +Multi-pass `.chat()` reasoning about the user, appended after base context. + +Both layers are joined, then truncated to fit `contextTokens` budget via `_truncate_to_budget` (tokens × 4 chars, word-boundary safe). + +### Cold Start vs Warm Session Prompts + +Dialectic pass 0 automatically selects its prompt based on session state: + +- **Cold** (no base context cached): "Who is this person? What are their preferences, goals, and working style? Focus on facts that would help an AI assistant be immediately useful." +- **Warm** (base context exists): "Given what's been discussed in this session so far, what context about this user is most relevant to the current conversation? Prioritize active context over biographical facts." + +Not configurable — determined automatically. + +### Dialectic Depth (Multi-Pass Reasoning) + +`dialecticDepth` (1–3, clamped) controls how many `.chat()` calls fire per dialectic cycle: + +| Depth | Passes | Behavior | +|-------|--------|----------| +| 1 | single `.chat()` | Base query only (cold or warm prompt) | +| 2 | audit + synthesis | Pass 0 result is self-audited; pass 1 does targeted synthesis. Conditional bail-out if pass 0 returns strong signal (>300 chars or structured with bullets/sections >100 chars) | +| 3 | audit + synthesis + reconciliation | Pass 2 reconciles contradictions across prior passes into a final synthesis | + +### Proportional Reasoning Levels + +When `dialecticDepthLevels` is not set, each pass uses a proportional level relative to `dialecticReasoningLevel` (the "base"): + +| Depth | Pass levels | +|-------|-------------| +| 1 | [base] | +| 2 | [minimal, base] | +| 3 | [minimal, base, low] | + +Override with `dialecticDepthLevels`: an explicit array of reasoning level strings per pass. + +### Three Orthogonal Dialectic Knobs + +| Knob | Controls | Type | +|------|----------|------| +| `dialecticCadence` | How often — minimum turns between dialectic firings | int | +| `dialecticDepth` | How many — passes per firing (1–3) | int | +| `dialecticReasoningLevel` | How hard — reasoning ceiling per `.chat()` call | string | + +### Input Sanitization + +`run_conversation` strips leaked `` blocks from user input before processing. When `saveMessages` persists a turn that included injected context, the block can reappear in subsequent turns via message history. The sanitizer removes `` blocks plus associated system notes. + +## Tools + +Five bidirectional tools. All accept an optional `peer` parameter (`"user"` or `"ai"`, default `"user"`). + +| Tool | LLM call? | Description | +|------|-----------|-------------| +| `honcho_profile` | No | Peer card — key facts snapshot | +| `honcho_search` | No | Semantic search over stored context (800 tok default, 2000 max) | +| `honcho_context` | No | Full session context: summary, representation, card, messages | +| `honcho_reasoning` | Yes | LLM-synthesized answer via dialectic `.chat()` | +| `honcho_conclude` | No | Write a persistent fact/conclusion about the user | + +Tool visibility depends on `recallMode`: hidden in `context` mode, always present in `tools` and `hybrid`. + ## Config Resolution Config is read from the first file that exists: @@ -34,42 +111,128 @@ Config is read from the first file that exists: Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.`. -## Tools - -| Tool | LLM call? | Description | -|------|-----------|-------------| -| `honcho_profile` | No | User's peer card -- key facts snapshot | -| `honcho_search` | No | Semantic search over stored context (800 tok default, 2000 max) | -| `honcho_context` | Yes | LLM-synthesized answer via dialectic reasoning | -| `honcho_conclude` | No | Write a persistent fact about the user | - -Tool availability depends on `recallMode`: hidden in `context` mode, always present in `tools` and `hybrid`. +For every key, resolution order is: **host block > root > env var > default**. ## Full Configuration Reference ### Identity & Connection -| Key | Type | Default | Scope | Description | -|-----|------|---------|-------|-------------| -| `apiKey` | string | -- | root / host | API key. Falls back to `HONCHO_API_KEY` env var | -| `baseUrl` | string | -- | root | Base URL for self-hosted Honcho. Local URLs (`localhost`, `127.0.0.1`, `::1`) auto-skip API key auth | -| `environment` | string | `"production"` | root / host | SDK environment mapping | -| `enabled` | bool | auto | root / host | Master toggle. Auto-enables when `apiKey` or `baseUrl` present | -| `workspace` | string | host key | root / host | Honcho workspace ID | -| `peerName` | string | -- | root / host | User peer identity | -| `aiPeer` | string | host key | root / host | AI peer identity | +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `apiKey` | string | — | API key. Falls back to `HONCHO_API_KEY` env var | +| `baseUrl` | string | — | Base URL for self-hosted Honcho. Local URLs auto-skip API key auth | +| `environment` | string | `"production"` | SDK environment mapping | +| `enabled` | bool | auto | Master toggle. Auto-enables when `apiKey` or `baseUrl` present | +| `workspace` | string | host key | Honcho workspace ID. Shared environment — all profiles in the same workspace can see the same user identity and related memories | +| `peerName` | string | — | User peer identity | +| `aiPeer` | string | host key | AI peer identity | ### Memory & Recall -| Key | Type | Default | Scope | Description | -|-----|------|---------|-------|-------------| -| `recallMode` | string | `"hybrid"` | root / host | `"hybrid"` (auto-inject + tools), `"context"` (auto-inject only, tools hidden), `"tools"` (tools only, no injection). Legacy `"auto"` normalizes to `"hybrid"` | -| `observationMode` | string | `"directional"` | root / host | Shorthand preset: `"directional"` (all on) or `"unified"` (shared pool). Use `observation` object for granular control | -| `observation` | object | -- | root / host | Per-peer observation config (see below) | +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `recallMode` | string | `"hybrid"` | `"hybrid"` (auto-inject + tools), `"context"` (auto-inject only, tools hidden), `"tools"` (tools only, no injection). Legacy `"auto"` → `"hybrid"` | +| `observationMode` | string | `"directional"` | Preset: `"directional"` (all on) or `"unified"` (shared pool). Use `observation` object for granular control | +| `observation` | object | — | Per-peer observation config (see Observation section) | -#### Observation (granular) +### Write Behavior -Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. Set at root or per host block -- each profile can have different observation settings. When present, overrides `observationMode` preset. +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `writeFrequency` | string/int | `"async"` | `"async"` (background), `"turn"` (sync per turn), `"session"` (batch on end), or integer N (every N turns) | +| `saveMessages` | bool | `true` | Persist messages to Honcho API | + +### Session Resolution + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `sessionStrategy` | string | `"per-directory"` | `"per-directory"`, `"per-session"`, `"per-repo"` (git root), `"global"` | +| `sessionPeerPrefix` | bool | `false` | Prepend peer name to session keys | +| `sessions` | object | `{}` | Manual directory-to-session-name mappings | + +#### Session Name Resolution + +The Honcho session name determines which conversation bucket memory lands in. Resolution follows a priority chain — first match wins: + +| Priority | Source | Example session name | +|----------|--------|---------------------| +| 1 | Manual map (`sessions` config) | `"myproject-main"` | +| 2 | `/title` command (mid-session rename) | `"refactor-auth"` | +| 3 | Gateway session key (Telegram, Discord, etc.) | `"agent-main-telegram-dm-8439114563"` | +| 4 | `per-session` strategy | Hermes session ID (`20260415_a3f2b1`) | +| 5 | `per-repo` strategy | Git root directory name (`hermes-agent`) | +| 6 | `per-directory` strategy | Current directory basename (`src`) | +| 7 | `global` strategy | Workspace name (`hermes`) | + +Gateway platforms always resolve via priority 3 (per-chat isolation) regardless of `sessionStrategy`. The strategy setting only affects CLI sessions. + +If `sessionPeerPrefix` is `true`, the peer name is prepended: `eri-hermes-agent`. + +#### What each strategy produces + +- **`per-directory`** — basename of `$PWD`. Opening hermes in `~/code/myapp` and `~/code/other` gives two separate sessions. Same directory = same session across runs. +- **`per-repo`** — git root directory name. All subdirectories within a repo share one session. Falls back to `per-directory` if not inside a git repo. +- **`per-session`** — Hermes session ID (timestamp + hex). Every `hermes` invocation starts a fresh Honcho session. Falls back to `per-directory` if no session ID is available. +- **`global`** — workspace name. One session for everything. Memory accumulates across all directories and runs. + +### Multi-Profile Pattern + +Multiple Hermes profiles can share one workspace while maintaining separate AI identities. Config resolution is **host block > root > env var > default** — host blocks inherit from root, so shared settings only need to be declared once: + +```json +{ + "apiKey": "***", + "workspace": "hermes", + "peerName": "yourname", + "hosts": { + "hermes": { + "aiPeer": "hermes", + "recallMode": "hybrid", + "sessionStrategy": "per-directory" + }, + "hermes.coder": { + "aiPeer": "coder", + "recallMode": "tools", + "sessionStrategy": "per-repo" + } + } +} +``` + +Both profiles see the same user (`yourname`) in the same shared environment (`hermes`), but each AI peer builds its own observations, conclusions, and behavior patterns. The coder's memory stays code-oriented; the main agent's stays broad. + +Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.` (e.g. `hermes -p coder` → host key `hermes.coder`). + +### Dialectic & Reasoning + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `dialecticDepth` | int | `1` | Passes per dialectic cycle (1–3, clamped). 1=single query, 2=audit+synthesis, 3=audit+synthesis+reconciliation | +| `dialecticDepthLevels` | array | — | Optional array of reasoning level strings per pass. Overrides proportional defaults. Example: `["minimal", "low", "medium"]` | +| `dialecticReasoningLevel` | string | `"low"` | Base reasoning level for `.chat()`: `"minimal"`, `"low"`, `"medium"`, `"high"`, `"max"` | +| `dialecticDynamic` | bool | `true` | When `true`, model can override reasoning level per-call via `honcho_reasoning` tool. When `false`, always uses `dialecticReasoningLevel` | +| `dialecticMaxChars` | int | `600` | Max chars of dialectic result injected into system prompt | +| `dialecticMaxInputChars` | int | `10000` | Max chars for dialectic query input to `.chat()`. Honcho cloud limit: 10k | + +### Token Budgets + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `contextTokens` | int | SDK default | Token budget for `context()` API calls. Also gates prefetch truncation (tokens × 4 chars) | +| `messageMaxChars` | int | `25000` | Max chars per message sent via `add_messages()`. Exceeding this triggers chunking with `[continued]` markers. Honcho cloud limit: 25k | + +### Cadence (Cost Control) + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `contextCadence` | int | `1` | Minimum turns between base context refreshes (session summary + representation + card) | +| `dialecticCadence` | int | `1` | Minimum turns between dialectic `.chat()` firings | +| `injectionFrequency` | string | `"every-turn"` | `"every-turn"` or `"first-turn"` (inject context on the first user message only, skip from turn 2 onward) | +| `reasoningLevelCap` | string | — | Hard cap on reasoning level: `"minimal"`, `"low"`, `"medium"`, `"high"` | + +### Observation (Granular) + +Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. When present, overrides `observationMode` preset. ```json "observation": { @@ -85,74 +248,16 @@ Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. Set at root or per host block | `ai.observeMe` | `true` | AI peer self-observation (Honcho builds AI representation) | | `ai.observeOthers` | `true` | AI peer observes user messages (enables cross-peer dialectic) | -Presets for `observationMode`: -- `"directional"` (default): all four booleans `true` +Presets: +- `"directional"` (default): all four `true` - `"unified"`: user `observeMe=true`, AI `observeOthers=true`, rest `false` -Per-profile example -- coder profile observes the user but user doesn't observe coder: +### Hardcoded Limits -```json -"hosts": { - "hermes.coder": { - "observation": { - "user": { "observeMe": true, "observeOthers": false }, - "ai": { "observeMe": true, "observeOthers": true } - } - } -} -``` - -Settings changed in the [Honcho dashboard](https://app.honcho.dev) are synced back on session init. - -### Write Behavior - -| Key | Type | Default | Scope | Description | -|-----|------|---------|-------|-------------| -| `writeFrequency` | string or int | `"async"` | root / host | `"async"` (background thread), `"turn"` (sync per turn), `"session"` (batch on end), or integer N (every N turns) | -| `saveMessages` | bool | `true` | root / host | Whether to persist messages to Honcho API | - -### Session Resolution - -| Key | Type | Default | Scope | Description | -|-----|------|---------|-------|-------------| -| `sessionStrategy` | string | `"per-directory"` | root / host | `"per-directory"`, `"per-session"` (new each run), `"per-repo"` (git root name), `"global"` (single session) | -| `sessionPeerPrefix` | bool | `false` | root / host | Prepend peer name to session keys | -| `sessions` | object | `{}` | root | Manual directory-to-session-name mappings: `{"/path/to/project": "my-session"}` | - -### Token Budgets & Dialectic - -| Key | Type | Default | Scope | Description | -|-----|------|---------|-------|-------------| -| `contextTokens` | int | SDK default | root / host | Token budget for `context()` API calls. Also gates prefetch truncation (tokens x 4 chars) | -| `dialecticReasoningLevel` | string | `"low"` | root / host | Base reasoning level for `peer.chat()`: `"minimal"`, `"low"`, `"medium"`, `"high"`, `"max"` | -| `dialecticDynamic` | bool | `true` | root / host | Auto-bump reasoning based on query length: `<120` chars = base level, `120-400` = +1, `>400` = +2 (capped at `"high"`). Set `false` to always use `dialecticReasoningLevel` as-is | -| `dialecticMaxChars` | int | `600` | root / host | Max chars of dialectic result injected into system prompt | -| `dialecticMaxInputChars` | int | `10000` | root / host | Max chars for dialectic query input to `peer.chat()`. Honcho cloud limit: 10k | -| `messageMaxChars` | int | `25000` | root / host | Max chars per message sent via `add_messages()`. Messages exceeding this are chunked with `[continued]` markers. Honcho cloud limit: 25k | - -### Cost Awareness (Advanced) - -These are read from the root config object, not the host block. Must be set manually in `honcho.json`. - -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| `injectionFrequency` | string | `"every-turn"` | `"every-turn"` or `"first-turn"` (inject context only on turn 0) | -| `contextCadence` | int | `1` | Minimum turns between `context()` API calls | -| `dialecticCadence` | int | `1` | Minimum turns between `peer.chat()` API calls | -| `reasoningLevelCap` | string | -- | Hard cap on auto-bumped reasoning: `"minimal"`, `"low"`, `"mid"`, `"high"` | - -### Hardcoded Limits (Not Configurable) - -| Limit | Value | Location | -|-------|-------|----------| -| Search tool max tokens | 2000 (hard cap), 800 (default) | `__init__.py` handle_tool_call | -| Peer card fetch tokens | 200 | `session.py` get_peer_card | - -## Config Precedence - -For every key, resolution order is: **host block > root > env var > default**. - -Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.`) > `"hermes"`. +| Limit | Value | +|-------|-------| +| Search tool max tokens | 2000 (hard cap), 800 (default) | +| Peer card fetch tokens | 200 | ## Environment Variables @@ -182,15 +287,16 @@ Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes. active profile (`hermes. None: + try: + r = self._run_dialectic_depth(_prewarm_query) + except Exception as exc: + logger.debug("Honcho dialectic prewarm failed: %s", exc) + self._dialectic_empty_streak += 1 + return + if r and r.strip(): + with self._prefetch_lock: + self._prefetch_result = r + self._prefetch_result_fired_at = 0 + # Treat prewarm as turn 0 so cadence gating starts clean. + self._last_dialectic_turn = 0 + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + + self._prefetch_thread_started_at = time.monotonic() + self._prefetch_thread = threading.Thread( + target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic" + ) + self._prefetch_thread.start() + logger.debug("Honcho pre-warm started for session: %s", self._session_key) def _ensure_session(self) -> bool: """Lazily initialize the Honcho session (for tools-only mode). @@ -347,6 +465,11 @@ class HonchoMemoryProvider(MemoryProvider): """Format the prefetch context dict into a readable system prompt block.""" parts = [] + # Session summary — session-scoped context, placed first for relevance + summary = ctx.get("summary", "") + if summary: + parts.append(f"## Session Summary\n{summary}") + rep = ctx.get("representation", "") if rep: parts.append(f"## User Representation\n{rep}") @@ -370,9 +493,9 @@ class HonchoMemoryProvider(MemoryProvider): def system_prompt_block(self) -> str: """Return system prompt text, adapted by recall_mode. - B4: On the FIRST call, fetch and bake the full Honcho context - (user representation, peer card, AI representation, continuity synthesis). - Subsequent calls return the cached block for prompt caching stability. + Returns only the mode header and tool instructions — static text + that doesn't change between turns (prompt-cache friendly). + Live context (representation, card) is injected via prefetch(). """ if self._cron_skipped: return "" @@ -382,24 +505,10 @@ class HonchoMemoryProvider(MemoryProvider): return ( "# Honcho Memory\n" "Active (tools-only mode). Use honcho_profile, honcho_search, " - "honcho_context, and honcho_conclude tools to access user memory." + "honcho_reasoning, honcho_context, and honcho_conclude tools to access user memory." ) return "" - # ----- B4: First-turn context baking ----- - first_turn_block = "" - if self._recall_mode in ("context", "hybrid"): - with self._first_turn_lock: - if self._first_turn_context is None: - # First call — fetch and cache - try: - ctx = self._manager.get_prefetch_context(self._session_key) - self._first_turn_context = self._format_first_turn_context(ctx) if ctx else "" - except Exception as e: - logger.debug("Honcho first-turn context fetch failed: %s", e) - self._first_turn_context = "" - first_turn_block = self._first_turn_context - # ----- B1: adapt text based on recall_mode ----- if self._recall_mode == "context": header = ( @@ -412,7 +521,9 @@ class HonchoMemoryProvider(MemoryProvider): header = ( "# Honcho Memory\n" "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, " - "honcho_search for raw excerpts, honcho_context for synthesized answers, " + "honcho_search for raw excerpts, honcho_context for raw peer context, " + "honcho_reasoning for synthesized answers (pass reasoning_level " + "minimal/low/medium/high/max — you pick the depth per call), " "honcho_conclude to save facts about the user. " "No automatic context injection — you must use tools to access memory." ) @@ -421,16 +532,20 @@ class HonchoMemoryProvider(MemoryProvider): "# Honcho Memory\n" "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. " "Use honcho_profile for a quick factual snapshot, " - "honcho_search for raw excerpts, honcho_context for synthesized answers, " + "honcho_search for raw excerpts, honcho_context for raw peer context, " + "honcho_reasoning for synthesized answers (pass reasoning_level " + "minimal/low/medium/high/max — you pick the depth per call), " "honcho_conclude to save facts about the user." ) - if first_turn_block: - return f"{header}\n\n{first_turn_block}" return header def prefetch(self, query: str, *, session_id: str = "") -> str: - """Return prefetched dialectic context from background thread. + """Return base context (representation + card) plus dialectic supplement. + + Assembles two layers: + 1. Base context from peer.context() — cached, refreshed on context_cadence + 2. Dialectic supplement — cached, refreshed on dialectic_cadence B1: Returns empty when recall_mode is "tools" (no injection). B5: Respects injection_frequency — "first-turn" returns cached/empty after turn 0. @@ -443,22 +558,128 @@ class HonchoMemoryProvider(MemoryProvider): if self._recall_mode == "tools": return "" - # B5: injection_frequency — if "first-turn" and past first turn, return empty - if self._injection_frequency == "first-turn" and self._turn_count > 0: + # B5: injection_frequency — if "first-turn" and past first turn, return empty. + # _turn_count is 1-indexed (first user message = 1), so > 1 means "past first". + if self._injection_frequency == "first-turn" and self._turn_count > 1: return "" + # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal. + if self._is_trivial_prompt(query): + return "" + + parts = [] + + # ----- Layer 1: Base context (representation + card) ----- + # On first call, fetch synchronously so turn 1 isn't empty. + # After that, serve from cache and refresh in background on cadence. + with self._base_context_lock: + if self._base_context_cache is None: + # First call — synchronous fetch + try: + ctx = self._manager.get_prefetch_context(self._session_key) + self._base_context_cache = self._format_first_turn_context(ctx) if ctx else "" + self._last_context_turn = self._turn_count + except Exception as e: + logger.debug("Honcho base context fetch failed: %s", e) + self._base_context_cache = "" + base_context = self._base_context_cache + + # Check if background context prefetch has a fresher result + if self._manager: + fresh_ctx = self._manager.pop_context_result(self._session_key) + if fresh_ctx: + formatted = self._format_first_turn_context(fresh_ctx) + if formatted: + with self._base_context_lock: + self._base_context_cache = formatted + base_context = formatted + + if base_context: + parts.append(base_context) + + # ----- Layer 2: Dialectic supplement ----- + # On the very first turn, no queue_prefetch() has run yet so the + # dialectic result is empty. Run with a bounded timeout so a slow + # Honcho connection doesn't block the first response indefinitely. + # On timeout we let the thread keep running and write its result into + # _prefetch_result under the lock, so the next turn picks it up. + # + # Skip if the session-start prewarm already filled _prefetch_result — + # firing another .chat() would be duplicate work. + with self._prefetch_lock: + _prewarm_landed = bool(self._prefetch_result) + if _prewarm_landed and self._last_dialectic_turn == -999: + self._last_dialectic_turn = self._turn_count + + if self._last_dialectic_turn == -999 and query: + _first_turn_timeout = ( + self._config.timeout if self._config and self._config.timeout else 8.0 + ) + _fired_at = self._turn_count + + def _run_first_turn() -> None: + try: + r = self._run_dialectic_depth(query) + except Exception as exc: + logger.debug("Honcho first-turn dialectic failed: %s", exc) + self._dialectic_empty_streak += 1 + return + if r and r.strip(): + with self._prefetch_lock: + self._prefetch_result = r + self._prefetch_result_fired_at = _fired_at + # Advance cadence only on a non-empty result so the next + # turn retries when the call returned nothing. + self._last_dialectic_turn = _fired_at + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + + self._prefetch_thread_started_at = time.monotonic() + self._prefetch_thread = threading.Thread( + target=_run_first_turn, daemon=True, name="honcho-prefetch-first" + ) + self._prefetch_thread.start() + self._prefetch_thread.join(timeout=_first_turn_timeout) + if self._prefetch_thread.is_alive(): + logger.debug( + "Honcho first-turn dialectic still running after %.1fs — " + "will surface on next turn", + _first_turn_timeout, + ) + if self._prefetch_thread and self._prefetch_thread.is_alive(): self._prefetch_thread.join(timeout=3.0) with self._prefetch_lock: - result = self._prefetch_result + dialectic_result = self._prefetch_result + fired_at = self._prefetch_result_fired_at self._prefetch_result = "" - if not result: + self._prefetch_result_fired_at = -999 + + # Discard stale pending results: if the fire happened more than + # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns + # passed without consumption), the content likely no longer tracks + # the current conversational pivot. + stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER + if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit: + logger.debug( + "Honcho pending dialectic discarded as stale: fired_at=%d, " + "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit, + ) + dialectic_result = "" + + if dialectic_result and dialectic_result.strip(): + parts.append(dialectic_result) + + if not parts: return "" + result = "\n\n".join(parts) + # ----- Port #3265: token budget enforcement ----- result = self._truncate_to_budget(result) - return f"## Honcho Context\n{result}" + return result def _truncate_to_budget(self, text: str) -> str: """Truncate text to fit within context_tokens budget if set.""" @@ -475,9 +696,11 @@ class HonchoMemoryProvider(MemoryProvider): return truncated + " …" def queue_prefetch(self, query: str, *, session_id: str = "") -> None: - """Fire a background dialectic query for the upcoming turn. + """Fire background prefetch threads for the upcoming turn. - B5: Checks cadence before firing background threads. + B5: Checks cadence independently for dialectic and context refresh. + Context refresh updates the base layer (representation + card). + Dialectic fires the LLM reasoning supplement. """ if self._cron_skipped: return @@ -488,32 +711,11 @@ class HonchoMemoryProvider(MemoryProvider): if self._recall_mode == "tools": return - # B5: cadence check — skip if too soon since last dialectic call - if self._dialectic_cadence > 1: - if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: - logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", - self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) - return + # Trivial prompts don't warrant either a context refresh or a dialectic call. + if self._is_trivial_prompt(query): + return - self._last_dialectic_turn = self._turn_count - - def _run(): - try: - result = self._manager.dialectic_query( - self._session_key, query, peer="user" - ) - if result and result.strip(): - with self._prefetch_lock: - self._prefetch_result = result - except Exception as e: - logger.debug("Honcho prefetch failed: %s", e) - - self._prefetch_thread = threading.Thread( - target=_run, daemon=True, name="honcho-prefetch" - ) - self._prefetch_thread.start() - - # Also fire context prefetch if cadence allows + # ----- Context refresh (base layer) — independent cadence ----- if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence: self._last_context_turn = self._turn_count try: @@ -521,6 +723,290 @@ class HonchoMemoryProvider(MemoryProvider): except Exception as e: logger.debug("Honcho context prefetch failed: %s", e) + # ----- Dialectic prefetch (supplement layer) ----- + # Thread-alive guard with stale-thread recovery: a hung Honcho call + # older than timeout × multiplier is treated as dead so it can't + # block subsequent fires. + if self._thread_is_live(): + logger.debug("Honcho dialectic prefetch skipped: prior thread still running") + return + + # Cadence gate, widened by the empty-streak backoff so a persistently + # silent backend doesn't retry every turn forever. + effective = self._effective_cadence() + if (self._turn_count - self._last_dialectic_turn) < effective: + logger.debug( + "Honcho dialectic prefetch skipped: effective cadence %d " + "(base %d, empty streak %d), turns since last: %d", + effective, self._dialectic_cadence, self._dialectic_empty_streak, + self._turn_count - self._last_dialectic_turn, + ) + return + + # Cadence advances only on a non-empty result so empty returns + # (transient API error, sparse representation) retry next turn. + _fired_at = self._turn_count + + def _run(): + try: + result = self._run_dialectic_depth(query) + except Exception as e: + logger.debug("Honcho prefetch failed: %s", e) + self._dialectic_empty_streak += 1 + return + if result and result.strip(): + with self._prefetch_lock: + self._prefetch_result = result + self._prefetch_result_fired_at = _fired_at + self._last_dialectic_turn = _fired_at + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + + self._prefetch_thread_started_at = time.monotonic() + self._prefetch_thread = threading.Thread( + target=_run, daemon=True, name="honcho-prefetch" + ) + self._prefetch_thread.start() + + # ----- Dialectic depth: multi-pass .chat() with cold/warm prompts ----- + + # Proportional reasoning levels per depth/pass when dialecticDepthLevels + # is not configured. The base level is dialecticReasoningLevel. + # Index: (depth, pass) → level relative to base. + _PROPORTIONAL_LEVELS: dict[tuple[int, int], str] = { + # depth 1: single pass at base level + (1, 0): "base", + # depth 2: pass 0 lighter, pass 1 at base + (2, 0): "minimal", + (2, 1): "base", + # depth 3: pass 0 lighter, pass 1 at base, pass 2 one above minimal + (3, 0): "minimal", + (3, 1): "base", + (3, 2): "low", + } + + _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max") + + # Char-count thresholds for the query-length reasoning heuristic. + _HEURISTIC_LENGTH_MEDIUM = 120 + _HEURISTIC_LENGTH_HIGH = 400 + + # Liveness constants. A thread older than timeout × multiplier is treated + # as dead so a hung Honcho call can't block future retries indefinitely. + _STALE_THREAD_MULTIPLIER = 2.0 + # Pending result whose fire-turn is older than cadence × multiplier is + # discarded on read so we don't inject context for a stale conversational + # pivot after a gap of trivial-prompt turns. + _STALE_RESULT_MULTIPLIER = 2 + # Cap on the empty-streak backoff so a persistently silent backend + # eventually settles on a ceiling instead of unbounded widening. + _BACKOFF_MAX = 8 + + def _thread_is_live(self) -> bool: + """Thread-alive guard that treats threads older than the stale + threshold as dead, so a hung Honcho request can't block new fires.""" + if not self._prefetch_thread or not self._prefetch_thread.is_alive(): + return False + timeout = (self._config.timeout if self._config and self._config.timeout else 8.0) + age = time.monotonic() - self._prefetch_thread_started_at + if age > timeout * self._STALE_THREAD_MULTIPLIER: + logger.debug( + "Honcho prefetch thread age %.1fs exceeds stale threshold " + "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER, + ) + return False + return True + + def _effective_cadence(self) -> int: + """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base.""" + if self._dialectic_empty_streak <= 0: + return self._dialectic_cadence + widened = self._dialectic_cadence + self._dialectic_empty_streak + ceiling = self._dialectic_cadence * self._BACKOFF_MAX + return min(widened, ceiling) + + def liveness_snapshot(self) -> dict: + """In-process snapshot of dialectic liveness state for diagnostics. + + Returns current turn, last successful dialectic turn, pending-result + fire turn, empty streak, effective cadence, and thread status. + """ + thread_age = None + if self._prefetch_thread and self._prefetch_thread.is_alive(): + thread_age = time.monotonic() - self._prefetch_thread_started_at + return { + "turn_count": self._turn_count, + "last_dialectic_turn": self._last_dialectic_turn, + "pending_result_fired_at": self._prefetch_result_fired_at, + "empty_streak": self._dialectic_empty_streak, + "effective_cadence": self._effective_cadence(), + "thread_alive": thread_age is not None, + "thread_age_seconds": thread_age, + } + + def _apply_reasoning_heuristic(self, base: str, query: str) -> str: + """Scale `base` up by query length, clamped at reasoning_level_cap. + + Char-count heuristic: +1 at >=120 chars, +2 at >=400. + """ + if not self._reasoning_heuristic or not query: + return base + if base not in self._LEVEL_ORDER: + return base + n = len(query) + if n < self._HEURISTIC_LENGTH_MEDIUM: + bump = 0 + elif n < self._HEURISTIC_LENGTH_HIGH: + bump = 1 + else: + bump = 2 + base_idx = self._LEVEL_ORDER.index(base) + cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap) + return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)] + + def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str: + """Resolve reasoning level for a given pass index. + + Precedence: + 1. dialecticDepthLevels (explicit per-pass) — wins absolutely + 2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes) + 3. Base level = dialecticReasoningLevel, optionally scaled by the + reasoning heuristic when the mapping falls through to 'base' + """ + if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels): + return self._dialectic_depth_levels[pass_idx] + + base = (self._config.dialectic_reasoning_level if self._config else "low") + mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx)) + if mapping is None or mapping == "base": + return self._apply_reasoning_heuristic(base, query) + return mapping + + def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str: + """Build the prompt for a given dialectic pass. + + Pass 0: cold start (general user query) or warm (session-scoped). + Pass 1: self-audit / targeted synthesis against gaps from pass 0. + Pass 2: reconciliation / contradiction check across prior passes. + """ + if pass_idx == 0: + if is_cold: + return ( + "Who is this person? What are their preferences, goals, " + "and working style? Focus on facts that would help an AI " + "assistant be immediately useful." + ) + return ( + "Given what's been discussed in this session so far, what " + "context about this user is most relevant to the current " + "conversation? Prioritize active context over biographical facts." + ) + elif pass_idx == 1: + prior = prior_results[-1] if prior_results else "" + return ( + f"Given this initial assessment:\n\n{prior}\n\n" + "What gaps remain in your understanding that would help " + "going forward? Synthesize what you actually know about " + "the user's current state and immediate needs, grounded " + "in evidence from recent sessions." + ) + else: + # pass 2: reconciliation + return ( + f"Prior passes produced:\n\n" + f"Pass 1:\n{prior_results[0] if len(prior_results) > 0 else '(empty)'}\n\n" + f"Pass 2:\n{prior_results[1] if len(prior_results) > 1 else '(empty)'}\n\n" + "Do these assessments cohere? Reconcile any contradictions " + "and produce a final, concise synthesis of what matters most " + "for the current conversation." + ) + + @staticmethod + def _signal_sufficient(result: str) -> bool: + """Check if a dialectic pass returned enough signal to skip further passes. + + Heuristic: a response longer than 100 chars with some structure + (section headers, bullets, or an ordered list) is considered sufficient. + """ + if not result or len(result.strip()) < 100: + return False + # Structured output with sections/bullets is strong signal + if "\n" in result and ( + "##" in result + or "•" in result + or re.search(r"^[*-] ", result, re.MULTILINE) + or re.search(r"^\s*\d+\. ", result, re.MULTILINE) + ): + return True + # Long enough even without structure + return len(result.strip()) > 300 + + def _run_dialectic_depth(self, query: str) -> str: + """Execute up to dialecticDepth .chat() calls with conditional bail-out. + + Cold start (no base context): general user-oriented query. + Warm session (base context exists): session-scoped query. + Each pass is conditional — bails early if prior pass returned strong signal. + Returns the best (usually last) result. + """ + if not self._manager or not self._session_key: + return "" + + is_cold = not self._base_context_cache + results: list[str] = [] + + for i in range(self._dialectic_depth): + if i == 0: + prompt = self._build_dialectic_prompt(0, results, is_cold) + else: + # Skip further passes if prior pass delivered strong signal + if results and self._signal_sufficient(results[-1]): + logger.debug("Honcho dialectic depth %d: pass %d skipped, prior signal sufficient", + self._dialectic_depth, i) + break + prompt = self._build_dialectic_prompt(i, results, is_cold) + + level = self._resolve_pass_level(i, query=query) + logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s", + self._dialectic_depth, i, level, is_cold) + + result = self._manager.dialectic_query( + self._session_key, prompt, + reasoning_level=level, + peer="user", + ) + results.append(result or "") + + # Return the last non-empty result (deepest pass that ran) + for r in reversed(results): + if r and r.strip(): + return r + return "" + + # Prompts that carry no semantic signal — trivial acknowledgements, slash + # commands, empty input. Skipping injection here saves tokens and prevents + # stale user-model context from derailing one-word replies. + _TRIVIAL_PROMPT_RE = re.compile( + r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|' + r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$', + re.IGNORECASE, + ) + + @classmethod + def _is_trivial_prompt(cls, text: str) -> bool: + """Return True if the prompt is too trivial to warrant context injection.""" + if not text: + return True + stripped = text.strip() + if not stripped: + return True + if stripped.startswith("/"): + return True + if cls._TRIVIAL_PROMPT_RE.match(stripped): + return True + return False + def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None: """Track turn count for cadence and injection_frequency logic.""" self._turn_count = turn_number @@ -659,7 +1145,14 @@ class HonchoMemoryProvider(MemoryProvider): try: if tool_name == "honcho_profile": - card = self._manager.get_peer_card(self._session_key) + peer = args.get("peer", "user") + card_update = args.get("card") + if card_update: + result = self._manager.set_peer_card(self._session_key, card_update, peer=peer) + if result is None: + return tool_error("Failed to update peer card.") + return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result}) + card = self._manager.get_peer_card(self._session_key, peer=peer) if not card: return json.dumps({"result": "No profile facts available yet."}) return json.dumps({"result": card}) @@ -669,30 +1162,68 @@ class HonchoMemoryProvider(MemoryProvider): if not query: return tool_error("Missing required parameter: query") max_tokens = min(int(args.get("max_tokens", 800)), 2000) + peer = args.get("peer", "user") result = self._manager.search_context( - self._session_key, query, max_tokens=max_tokens + self._session_key, query, max_tokens=max_tokens, peer=peer ) if not result: return json.dumps({"result": "No relevant context found."}) return json.dumps({"result": result}) - elif tool_name == "honcho_context": + elif tool_name == "honcho_reasoning": query = args.get("query", "") if not query: return tool_error("Missing required parameter: query") peer = args.get("peer", "user") + reasoning_level = args.get("reasoning_level") result = self._manager.dialectic_query( - self._session_key, query, peer=peer + self._session_key, query, + reasoning_level=reasoning_level, + peer=peer, ) + # Update cadence tracker so auto-injection respects the gap after an explicit call + self._last_dialectic_turn = self._turn_count return json.dumps({"result": result or "No result from Honcho."}) + elif tool_name == "honcho_context": + peer = args.get("peer", "user") + ctx = self._manager.get_session_context(self._session_key, peer=peer) + if not ctx: + return json.dumps({"result": "No context available yet."}) + parts = [] + if ctx.get("summary"): + parts.append(f"## Summary\n{ctx['summary']}") + if ctx.get("representation"): + parts.append(f"## Representation\n{ctx['representation']}") + if ctx.get("card"): + parts.append(f"## Card\n{ctx['card']}") + if ctx.get("recent_messages"): + msgs = ctx["recent_messages"] + msg_str = "\n".join( + f" [{m['role']}] {m['content'][:200]}" + for m in msgs[-5:] # last 5 for brevity + ) + parts.append(f"## Recent messages\n{msg_str}") + return json.dumps({"result": "\n\n".join(parts) or "No context available."}) + elif tool_name == "honcho_conclude": - conclusion = args.get("conclusion", "") - if not conclusion: - return tool_error("Missing required parameter: conclusion") - ok = self._manager.create_conclusion(self._session_key, conclusion) + delete_id = (args.get("delete_id") or "").strip() + conclusion = args.get("conclusion", "").strip() + peer = args.get("peer", "user") + + has_delete_id = bool(delete_id) + has_conclusion = bool(conclusion) + if has_delete_id == has_conclusion: + return tool_error("Exactly one of conclusion or delete_id must be provided.") + + if has_delete_id: + ok = self._manager.delete_conclusion(self._session_key, delete_id, peer=peer) + if ok: + return json.dumps({"result": f"Conclusion {delete_id} deleted."}) + return tool_error(f"Failed to delete conclusion {delete_id}.") + ok = self._manager.create_conclusion(self._session_key, conclusion, peer=peer) if ok: - return json.dumps({"result": f"Conclusion saved: {conclusion}"}) + return json.dumps({"result": f"Conclusion saved for {peer}: {conclusion}"}) return tool_error("Failed to save conclusion.") return tool_error(f"Unknown tool: {tool_name}") diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index dff4b386a5..5c829a4c98 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -440,11 +440,63 @@ def cmd_setup(args) -> None: if new_recall in ("hybrid", "context", "tools"): hermes_host["recallMode"] = new_recall - # --- 7. Session strategy --- - current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-directory") + # --- 7. Context token budget --- + current_ctx_tokens = hermes_host.get("contextTokens") or cfg.get("contextTokens") + current_display = str(current_ctx_tokens) if current_ctx_tokens else "uncapped" + print("\n Context injection per turn (hybrid/context recall modes only):") + print(" uncapped -- no limit (default)") + print(" N -- token limit per turn (e.g. 1200)") + new_ctx_tokens = _prompt("Context tokens", default=current_display) + if new_ctx_tokens.strip().lower() in ("none", "uncapped", "no limit"): + hermes_host.pop("contextTokens", None) + elif new_ctx_tokens.strip() == "": + pass # keep current + else: + try: + val = int(new_ctx_tokens) + if val >= 0: + hermes_host["contextTokens"] = val + except (ValueError, TypeError): + pass # keep current + + # --- 7b. Dialectic cadence --- + current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2") + print("\n Dialectic cadence:") + print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") + print(" 1 = every turn, 2 = every other turn, 3+ = sparser.") + print(" Recommended: 1-5.") + new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) + try: + val = int(new_dialectic) + if val >= 1: + hermes_host["dialecticCadence"] = val + except (ValueError, TypeError): + hermes_host["dialecticCadence"] = 2 + + # --- 7c. Dialectic reasoning level --- + current_reasoning = ( + hermes_host.get("dialecticReasoningLevel") + or cfg.get("dialecticReasoningLevel") + or "low" + ) + print("\n Dialectic reasoning level:") + print(" Depth Honcho uses when synthesizing user context on auto-injected calls.") + print(" minimal -- quick factual lookups") + print(" low -- straightforward questions (default)") + print(" medium -- multi-aspect synthesis") + print(" high -- complex behavioral patterns") + print(" max -- thorough audit-level analysis") + new_reasoning = _prompt("Reasoning level", default=current_reasoning) + if new_reasoning in ("minimal", "low", "medium", "high", "max"): + hermes_host["dialecticReasoningLevel"] = new_reasoning + else: + hermes_host["dialecticReasoningLevel"] = "low" + + # --- 8. Session strategy --- + current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") print("\n Session strategy:") - print(" per-directory -- one session per working directory (default)") - print(" per-session -- new Honcho session each run") + print(" per-session -- each run starts clean, Honcho injects context automatically") + print(" per-directory -- reuses session per dir, prior context auto-injected each run") print(" per-repo -- one session per git repository") print(" global -- single session across all directories") new_strat = _prompt("Session strategy", default=current_strat) @@ -490,10 +542,11 @@ def cmd_setup(args) -> None: print(f" Recall: {hcfg.recall_mode}") print(f" Sessions: {hcfg.session_strategy}") print("\n Honcho tools available in chat:") - print(" honcho_context -- ask Honcho about the user (LLM-synthesized)") - print(" honcho_search -- semantic search over history (no LLM)") - print(" honcho_profile -- peer card, key facts (no LLM)") - print(" honcho_conclude -- persist a user fact to memory (no LLM)") + print(" honcho_context -- session context: summary, representation, card, messages") + print(" honcho_search -- semantic search over history") + print(" honcho_profile -- peer card, key facts") + print(" honcho_reasoning -- ask Honcho a question, synthesized answer") + print(" honcho_conclude -- persist a user fact to memory") print("\n Other commands:") print(" hermes honcho status -- show full config") print(" hermes honcho mode -- change recall/observation mode") @@ -585,13 +638,29 @@ def cmd_status(args) -> None: print(f" Enabled: {hcfg.enabled}") print(f" API key: {masked}") print(f" Workspace: {hcfg.workspace_id}") - print(f" Config path: {active_path}") + + # Config paths — show where config was read from and where writes go + global_path = Path.home() / ".honcho" / "config.json" + print(f" Config: {active_path}") if write_path != active_path: - print(f" Write path: {write_path} (instance-local)") + print(f" Write to: {write_path} (profile-local)") + if active_path == global_path: + print(f" Fallback: (none — using global ~/.honcho/config.json)") + elif global_path.exists(): + print(f" Fallback: {global_path} (exists, cross-app interop)") + print(f" AI peer: {hcfg.ai_peer}") print(f" User peer: {hcfg.peer_name or 'not set'}") print(f" Session key: {hcfg.resolve_session_name()}") + print(f" Session strat: {hcfg.session_strategy}") print(f" Recall mode: {hcfg.recall_mode}") + print(f" Context budget: {hcfg.context_tokens or '(uncapped)'} tokens") + raw = getattr(hcfg, "raw", None) or {} + dialectic_cadence = raw.get("dialecticCadence") or 1 + print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}") + reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap + heuristic_on = "on" if hcfg.reasoning_heuristic else "off" + print(f" Reasoning: base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}") print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})") print(f" Write freq: {hcfg.write_frequency}") @@ -599,8 +668,8 @@ def cmd_status(args) -> None: print("\n Connection... ", end="", flush=True) try: client = get_honcho_client(hcfg) - print("OK") _show_peer_cards(hcfg, client) + print("OK") except Exception as e: print(f"FAILED ({e})\n") else: @@ -824,6 +893,41 @@ def cmd_mode(args) -> None: print(f" {label}Recall mode -> {mode_arg} ({MODES[mode_arg]})\n") +def cmd_strategy(args) -> None: + """Show or set the session strategy.""" + STRATEGIES = { + "per-session": "each run starts clean, Honcho injects context automatically", + "per-directory": "reuses session per dir, prior context auto-injected each run", + "per-repo": "one session per git repository", + "global": "single session across all directories", + } + cfg = _read_config() + strat_arg = getattr(args, "strategy", None) + + if strat_arg is None: + current = ( + (cfg.get("hosts") or {}).get(_host_key(), {}).get("sessionStrategy") + or cfg.get("sessionStrategy") + or "per-session" + ) + print("\nHoncho session strategy\n" + "─" * 40) + for s, desc in STRATEGIES.items(): + marker = " <-" if s == current else "" + print(f" {s:<15} {desc}{marker}") + print(f"\n Set with: hermes honcho strategy [per-session|per-directory|per-repo|global]\n") + return + + if strat_arg not in STRATEGIES: + print(f" Invalid strategy '{strat_arg}'. Options: {', '.join(STRATEGIES)}\n") + return + + host = _host_key() + label = f"[{host}] " if host != "hermes" else "" + cfg.setdefault("hosts", {}).setdefault(host, {})["sessionStrategy"] = strat_arg + _write_config(cfg) + print(f" {label}Session strategy -> {strat_arg} ({STRATEGIES[strat_arg]})\n") + + def cmd_tokens(args) -> None: """Show or set token budget settings.""" cfg = _read_config() @@ -1143,10 +1247,11 @@ def cmd_migrate(args) -> None: print(" automatically. Files become the seed, not the live store.") print() print(" Honcho tools (available to the agent during conversation)") - print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)") - print(" honcho_search — semantic search over stored context (no LLM)") - print(" honcho_profile — fast peer card snapshot (no LLM)") - print(" honcho_conclude — write a conclusion/fact back to memory (no LLM)") + print(" honcho_context — session context: summary, representation, card, messages") + print(" honcho_search — semantic search over stored context") + print(" honcho_profile — fast peer card snapshot") + print(" honcho_reasoning — ask Honcho a question, synthesized answer") + print(" honcho_conclude — write a conclusion/fact back to memory") print() print(" Session naming") print(" OpenClaw: no persistent session concept — files are global.") @@ -1197,6 +1302,8 @@ def honcho_command(args) -> None: cmd_peer(args) elif sub == "mode": cmd_mode(args) + elif sub == "strategy": + cmd_strategy(args) elif sub == "tokens": cmd_tokens(args) elif sub == "identity": @@ -1211,7 +1318,7 @@ def honcho_command(args) -> None: cmd_sync(args) else: print(f" Unknown honcho command: {sub}") - print(" Available: status, sessions, map, peer, mode, tokens, identity, migrate, enable, disable, sync\n") + print(" Available: status, sessions, map, peer, mode, strategy, tokens, identity, migrate, enable, disable, sync\n") def register_cli(subparser) -> None: @@ -1270,6 +1377,15 @@ def register_cli(subparser) -> None: help="Recall mode to set (hybrid/context/tools). Omit to show current.", ) + strategy_parser = subs.add_parser( + "strategy", help="Show or set session strategy (per-session/per-directory/per-repo/global)", + ) + strategy_parser.add_argument( + "strategy", nargs="?", metavar="STRATEGY", + choices=("per-session", "per-directory", "per-repo", "global"), + help="Session strategy to set. Omit to show current.", + ) + tokens_parser = subs.add_parser( "tokens", help="Show or set token budget for context and dialectic", ) diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 3c779f64fe..fef2e2d58f 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -94,6 +94,68 @@ def _resolve_bool(host_val, root_val, *, default: bool) -> bool: return default +def _parse_context_tokens(host_val, root_val) -> int | None: + """Parse contextTokens: host wins, then root, then None (uncapped).""" + for val in (host_val, root_val): + if val is not None: + try: + return int(val) + except (ValueError, TypeError): + pass + return None + + +def _parse_dialectic_depth(host_val, root_val) -> int: + """Parse dialecticDepth: host wins, then root, then 1. Clamped to 1-3.""" + for val in (host_val, root_val): + if val is not None: + try: + return max(1, min(int(val), 3)) + except (ValueError, TypeError): + pass + return 1 + + +_VALID_REASONING_LEVELS = ("minimal", "low", "medium", "high", "max") + + +def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] | None: + """Parse dialecticDepthLevels: optional array of reasoning levels per pass. + + Returns None when not configured (use proportional defaults). + When configured, validates each level and truncates/pads to match depth. + """ + for val in (host_val, root_val): + if val is not None and isinstance(val, list): + levels = [ + lvl if lvl in _VALID_REASONING_LEVELS else "low" + for lvl in val[:depth] + ] + # Pad with "low" if array is shorter than depth + while len(levels) < depth: + levels.append("low") + return levels + return None + + +def _resolve_optional_float(*values: Any) -> float | None: + """Return the first non-empty value coerced to a positive float.""" + for value in values: + if value is None: + continue + if isinstance(value, str): + value = value.strip() + if not value: + continue + try: + parsed = float(value) + except (TypeError, ValueError): + continue + if parsed > 0: + return parsed + return None + + _VALID_OBSERVATION_MODES = {"unified", "directional"} _OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"} @@ -159,6 +221,8 @@ class HonchoClientConfig: environment: str = "production" # Optional base URL for self-hosted Honcho (overrides environment mapping) base_url: str | None = None + # Optional request timeout in seconds for Honcho SDK HTTP calls + timeout: float | None = None # Identity peer_name: str | None = None ai_peer: str = "hermes" @@ -168,17 +232,30 @@ class HonchoClientConfig: # Write frequency: "async" (background thread), "turn" (sync per turn), # "session" (flush on session end), or int (every N turns) write_frequency: str | int = "async" - # Prefetch budget + # Prefetch budget (None = no cap; set to an integer to bound auto-injected context) context_tokens: int | None = None # Dialectic (peer.chat) settings # reasoning_level: "minimal" | "low" | "medium" | "high" | "max" dialectic_reasoning_level: str = "low" - # dynamic: auto-bump reasoning level based on query length - # true — low->medium (120+ chars), low->high (400+ chars), capped at "high" - # false — always use dialecticReasoningLevel as-is + # When true, the model can override reasoning_level per-call via the + # honcho_reasoning tool param (agentic). When false, always uses + # dialecticReasoningLevel and ignores model-provided overrides. dialectic_dynamic: bool = True # Max chars of dialectic result to inject into Hermes system prompt dialectic_max_chars: int = 600 + # Dialectic depth: how many .chat() calls per dialectic cycle (1-3). + # Depth 1: single call. Depth 2: self-audit + targeted synthesis. + # Depth 3: self-audit + synthesis + reconciliation. + dialectic_depth: int = 1 + # Optional per-pass reasoning level override. Array of reasoning levels + # matching dialectic_depth length. When None, uses proportional defaults + # derived from dialectic_reasoning_level. + dialectic_depth_levels: list[str] | None = None + # When true, the auto-injected dialectic scales reasoning level up on + # longer queries. See HonchoMemoryProvider for thresholds. + reasoning_heuristic: bool = True + # Ceiling for the heuristic-selected reasoning level. + reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) message_max_chars: int = 25000 @@ -189,10 +266,8 @@ class HonchoClientConfig: # "context" — auto-injected context only, Honcho tools removed # "tools" — Honcho tools only, no auto-injected context recall_mode: str = "hybrid" - # When True and recallMode is "tools", create the Honcho session eagerly - # during initialize() instead of deferring to the first tool call. - # This ensures sync_turn() can write from the very first turn. - # Does NOT enable automatic context injection — only changes init timing. + # Eager init in tools mode — when true, initializes session during + # initialize() instead of deferring to first tool call init_on_session_start: bool = False # Observation mode: legacy string shorthand ("directional" or "unified"). # Kept for backward compat; granular per-peer booleans below are preferred. @@ -224,12 +299,14 @@ class HonchoClientConfig: resolved_host = host or resolve_active_host() api_key = os.environ.get("HONCHO_API_KEY") base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None + timeout = _resolve_optional_float(os.environ.get("HONCHO_TIMEOUT")) return cls( host=resolved_host, workspace_id=workspace_id, api_key=api_key, environment=os.environ.get("HONCHO_ENVIRONMENT", "production"), base_url=base_url, + timeout=timeout, ai_peer=resolved_host, enabled=bool(api_key or base_url), ) @@ -290,6 +367,11 @@ class HonchoClientConfig: or os.environ.get("HONCHO_BASE_URL", "").strip() or None ) + timeout = _resolve_optional_float( + raw.get("timeout"), + raw.get("requestTimeout"), + os.environ.get("HONCHO_TIMEOUT"), + ) # Auto-enable when API key or base_url is present (unless explicitly disabled) # Host-level enabled wins, then root-level, then auto-enable if key/url exists. @@ -335,12 +417,16 @@ class HonchoClientConfig: api_key=api_key, environment=environment, base_url=base_url, + timeout=timeout, peer_name=host_block.get("peerName") or raw.get("peerName"), ai_peer=ai_peer, enabled=enabled, save_messages=save_messages, write_frequency=write_frequency, - context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"), + context_tokens=_parse_context_tokens( + host_block.get("contextTokens"), + raw.get("contextTokens"), + ), dialectic_reasoning_level=( host_block.get("dialecticReasoningLevel") or raw.get("dialecticReasoningLevel") @@ -356,6 +442,25 @@ class HonchoClientConfig: or raw.get("dialecticMaxChars") or 600 ), + dialectic_depth=_parse_dialectic_depth( + host_block.get("dialecticDepth"), + raw.get("dialecticDepth"), + ), + dialectic_depth_levels=_parse_dialectic_depth_levels( + host_block.get("dialecticDepthLevels"), + raw.get("dialecticDepthLevels"), + depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")), + ), + reasoning_heuristic=_resolve_bool( + host_block.get("reasoningHeuristic"), + raw.get("reasoningHeuristic"), + default=True, + ), + reasoning_level_cap=( + host_block.get("reasoningLevelCap") + or raw.get("reasoningLevelCap") + or "high" + ), message_max_chars=int( host_block.get("messageMaxChars") or raw.get("messageMaxChars") @@ -422,16 +527,18 @@ class HonchoClientConfig: cwd: str | None = None, session_title: str | None = None, session_id: str | None = None, + gateway_session_key: str | None = None, ) -> str | None: """Resolve Honcho session name. Resolution order: 1. Manual directory override from sessions map 2. Hermes session title (from /title command) - 3. per-session strategy — Hermes session_id ({timestamp}_{hex}) - 4. per-repo strategy — git repo root directory name - 5. per-directory strategy — directory basename - 6. global strategy — workspace name + 3. Gateway session key (stable per-chat identifier from gateway platforms) + 4. per-session strategy — Hermes session_id ({timestamp}_{hex}) + 5. per-repo strategy — git repo root directory name + 6. per-directory strategy — directory basename + 7. global strategy — workspace name """ import re @@ -445,12 +552,22 @@ class HonchoClientConfig: # /title mid-session remap if session_title: - sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_title).strip('-') + sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', session_title).strip('-') if sanitized: if self.session_peer_prefix and self.peer_name: return f"{self.peer_name}-{sanitized}" return sanitized + # Gateway session key: stable per-chat identifier passed by the gateway + # (e.g. "agent:main:telegram:dm:8439114563"). Sanitize colons to hyphens + # for Honcho session ID compatibility. This takes priority over strategy- + # based resolution because gateway platforms need per-chat isolation that + # cwd-based strategies cannot provide. + if gateway_session_key: + sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-') + if sanitized: + return sanitized + # per-session: inherit Hermes session_id (new Honcho session each run) if self.session_strategy == "per-session" and session_id: if self.session_peer_prefix and self.peer_name: @@ -512,13 +629,20 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho: # mapping, enabling remote self-hosted Honcho deployments without # requiring the server to live on localhost. resolved_base_url = config.base_url - if not resolved_base_url: + resolved_timeout = config.timeout + if not resolved_base_url or resolved_timeout is None: try: from hermes_cli.config import load_config hermes_cfg = load_config() honcho_cfg = hermes_cfg.get("honcho", {}) if isinstance(honcho_cfg, dict): - resolved_base_url = honcho_cfg.get("base_url", "").strip() or None + if not resolved_base_url: + resolved_base_url = honcho_cfg.get("base_url", "").strip() or None + if resolved_timeout is None: + resolved_timeout = _resolve_optional_float( + honcho_cfg.get("timeout"), + honcho_cfg.get("request_timeout"), + ) except Exception: pass @@ -553,6 +677,8 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho: } if resolved_base_url: kwargs["base_url"] = resolved_base_url + if resolved_timeout is not None: + kwargs["timeout"] = resolved_timeout _honcho_client = Honcho(**kwargs) diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py index 2cd4c5bd2f..79625b5cd5 100644 --- a/plugins/memory/honcho/session.py +++ b/plugins/memory/honcho/session.py @@ -78,6 +78,7 @@ class HonchoSessionManager: honcho: Honcho | None = None, context_tokens: int | None = None, config: Any | None = None, + runtime_user_peer_name: str | None = None, ): """ Initialize the session manager. @@ -87,10 +88,12 @@ class HonchoSessionManager: context_tokens: Max tokens for context() calls (None = Honcho default). config: HonchoClientConfig from global config (provides peer_name, ai_peer, write_frequency, observation, etc.). + runtime_user_peer_name: Gateway user identity for per-user memory scoping. """ self._honcho = honcho self._context_tokens = context_tokens self._config = config + self._runtime_user_peer_name = runtime_user_peer_name self._cache: dict[str, HonchoSession] = {} self._peers_cache: dict[str, Any] = {} self._sessions_cache: dict[str, Any] = {} @@ -100,9 +103,11 @@ class HonchoSessionManager: self._write_frequency = write_frequency self._turn_counter: int = 0 - # Prefetch caches: session_key → last result (consumed once per turn) + # Prefetch cache: session_key → last context result (consumed once per turn). + # Dialectic results are cached on the plugin side (HonchoMemoryProvider + # ._prefetch_result) so session-start prewarm and turn-driven fires share + # one source of truth; see __init__.py _do_session_init for the prewarm. self._context_cache: dict[str, dict] = {} - self._dialectic_cache: dict[str, str] = {} self._prefetch_cache_lock = threading.Lock() self._dialectic_reasoning_level: str = ( config.dialectic_reasoning_level if config else "low" @@ -272,8 +277,10 @@ class HonchoSessionManager: logger.debug("Local session cache hit: %s", key) return self._cache[key] - # Use peer names from global config when available - if self._config and self._config.peer_name: + # Gateway sessions should use the runtime user identity when available. + if self._runtime_user_peer_name: + user_peer_id = self._sanitize_id(self._runtime_user_peer_name) + elif self._config and self._config.peer_name: user_peer_id = self._sanitize_id(self._config.peer_name) else: # Fallback: derive from session key @@ -486,36 +493,9 @@ class HonchoSessionManager: _REASONING_LEVELS = ("minimal", "low", "medium", "high", "max") - def _dynamic_reasoning_level(self, query: str) -> str: - """ - Pick a reasoning level for a dialectic query. - - When dialecticDynamic is true (default), auto-bumps based on query - length so Honcho applies more inference where it matters: - - < 120 chars -> configured default (typically "low") - 120-400 chars -> +1 level above default (cap at "high") - > 400 chars -> +2 levels above default (cap at "high") - - "max" is never selected automatically -- reserve it for explicit config. - - When dialecticDynamic is false, always returns the configured level. - """ - if not self._dialectic_dynamic: - return self._dialectic_reasoning_level - - levels = self._REASONING_LEVELS - default_idx = levels.index(self._dialectic_reasoning_level) if self._dialectic_reasoning_level in levels else 1 - n = len(query) - if n < 120: - bump = 0 - elif n < 400: - bump = 1 - else: - bump = 2 - # Cap at "high" (index 3) for auto-selection - idx = min(default_idx + bump, 3) - return levels[idx] + def _default_reasoning_level(self) -> str: + """Return the configured default reasoning level.""" + return self._dialectic_reasoning_level def dialectic_query( self, session_key: str, query: str, @@ -526,14 +506,15 @@ class HonchoSessionManager: Query Honcho's dialectic endpoint about a peer. Runs an LLM on Honcho's backend against the target peer's full - representation. Higher latency than context() — call async via - prefetch_dialectic() to avoid blocking the response. + representation. Higher latency than context() — callers run this in + a background thread (see HonchoMemoryProvider) to avoid blocking. Args: session_key: The session key to query against. query: Natural language question. - reasoning_level: Override the config default. If None, uses - _dynamic_reasoning_level(query). + reasoning_level: Override the configured default (dialecticReasoningLevel). + Only honored when dialecticDynamic is true. + If None or dialecticDynamic is false, uses the configured default. peer: Which peer to query — "user" (default) or "ai". Returns: @@ -543,29 +524,34 @@ class HonchoSessionManager: if not session: return "" + target_peer_id = self._resolve_peer_id(session, peer) + if target_peer_id is None: + return "" + # Guard: truncate query to Honcho's dialectic input limit if len(query) > self._dialectic_max_input_chars: query = query[:self._dialectic_max_input_chars].rsplit(" ", 1)[0] - level = reasoning_level or self._dynamic_reasoning_level(query) + if self._dialectic_dynamic and reasoning_level: + level = reasoning_level + else: + level = self._default_reasoning_level() try: if self._ai_observe_others: - # AI peer can observe user — use cross-observation routing - if peer == "ai": - ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id) + # AI peer can observe other peers — use assistant as observer. + ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id) + if target_peer_id == session.assistant_peer_id: result = ai_peer_obj.chat(query, reasoning_level=level) or "" else: - ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id) result = ai_peer_obj.chat( query, - target=session.user_peer_id, + target=target_peer_id, reasoning_level=level, ) or "" else: - # AI can't observe others — each peer queries self - peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id - target_peer = self._get_or_create_peer(peer_id) + # Without cross-observation, each peer queries its own context. + target_peer = self._get_or_create_peer(target_peer_id) result = target_peer.chat(query, reasoning_level=level) or "" # Apply Hermes-side char cap before caching @@ -576,42 +562,6 @@ class HonchoSessionManager: logger.warning("Honcho dialectic query failed: %s", e) return "" - def prefetch_dialectic(self, session_key: str, query: str) -> None: - """ - Fire a dialectic_query in a background thread, caching the result. - - Non-blocking. The result is available via pop_dialectic_result() - on the next call (typically the following turn). Reasoning level - is selected dynamically based on query complexity. - - Args: - session_key: The session key to query against. - query: The user's current message, used as the query. - """ - def _run(): - result = self.dialectic_query(session_key, query) - if result: - self.set_dialectic_result(session_key, result) - - t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True) - t.start() - - def set_dialectic_result(self, session_key: str, result: str) -> None: - """Store a prefetched dialectic result in a thread-safe way.""" - if not result: - return - with self._prefetch_cache_lock: - self._dialectic_cache[session_key] = result - - def pop_dialectic_result(self, session_key: str) -> str: - """ - Return and clear the cached dialectic result for this session. - - Returns empty string if no result is ready yet. - """ - with self._prefetch_cache_lock: - return self._dialectic_cache.pop(session_key, "") - def prefetch_context(self, session_key: str, user_message: str | None = None) -> None: """ Fire get_prefetch_context in a background thread, caching the result. @@ -647,10 +597,11 @@ class HonchoSessionManager: """ Pre-fetch user and AI peer context from Honcho. - Fetches peer_representation and peer_card for both peers. search_query - is intentionally omitted — it would only affect additional excerpts - that this code does not consume, and passing the raw message exposes - conversation content in server access logs. + Fetches peer_representation and peer_card for both peers, plus the + session summary when available. search_query is intentionally omitted + — it would only affect additional excerpts that this code does not + consume, and passing the raw message exposes conversation content in + server access logs. Args: session_key: The session key to get context for. @@ -658,15 +609,29 @@ class HonchoSessionManager: Returns: Dictionary with 'representation', 'card', 'ai_representation', - and 'ai_card' keys. + 'ai_card', and optionally 'summary' keys. """ session = self._cache.get(session_key) if not session: return {} result: dict[str, str] = {} + + # Session summary — provides session-scoped context. + # Fresh sessions (per-session cold start, or first-ever per-directory) + # return null summary — the guard below handles that gracefully. + # Per-directory returning sessions get their accumulated summary. try: - user_ctx = self._fetch_peer_context(session.user_peer_id) + honcho_session = self._sessions_cache.get(session.honcho_session_id) + if honcho_session: + ctx = honcho_session.context(summary=True) + if ctx.summary and getattr(ctx.summary, "content", None): + result["summary"] = ctx.summary.content + except Exception as e: + logger.debug("Failed to fetch session summary from Honcho: %s", e) + + try: + user_ctx = self._fetch_peer_context(session.user_peer_id, target=session.user_peer_id) result["representation"] = user_ctx["representation"] result["card"] = "\n".join(user_ctx["card"]) except Exception as e: @@ -674,7 +639,7 @@ class HonchoSessionManager: # Also fetch AI peer's own representation so Hermes knows itself. try: - ai_ctx = self._fetch_peer_context(session.assistant_peer_id) + ai_ctx = self._fetch_peer_context(session.assistant_peer_id, target=session.assistant_peer_id) result["ai_representation"] = ai_ctx["representation"] result["ai_card"] = "\n".join(ai_ctx["card"]) except Exception as e: @@ -862,7 +827,7 @@ class HonchoSessionManager: return [str(item) for item in card if item] return [str(card)] - def _fetch_peer_card(self, peer_id: str) -> list[str]: + def _fetch_peer_card(self, peer_id: str, *, target: str | None = None) -> list[str]: """Fetch a peer card directly from the peer object. This avoids relying on session.context(), which can return an empty @@ -872,22 +837,33 @@ class HonchoSessionManager: peer = self._get_or_create_peer(peer_id) getter = getattr(peer, "get_card", None) if callable(getter): - return self._normalize_card(getter()) + return self._normalize_card(getter(target=target) if target is not None else getter()) legacy_getter = getattr(peer, "card", None) if callable(legacy_getter): - return self._normalize_card(legacy_getter()) + return self._normalize_card(legacy_getter(target=target) if target is not None else legacy_getter()) return [] - def _fetch_peer_context(self, peer_id: str, search_query: str | None = None) -> dict[str, Any]: + def _fetch_peer_context( + self, + peer_id: str, + search_query: str | None = None, + *, + target: str | None = None, + ) -> dict[str, Any]: """Fetch representation + peer card directly from a peer object.""" peer = self._get_or_create_peer(peer_id) representation = "" card: list[str] = [] try: - ctx = peer.context(search_query=search_query) if search_query else peer.context() + context_kwargs: dict[str, Any] = {} + if target is not None: + context_kwargs["target"] = target + if search_query is not None: + context_kwargs["search_query"] = search_query + ctx = peer.context(**context_kwargs) if context_kwargs else peer.context() representation = ( getattr(ctx, "representation", None) or getattr(ctx, "peer_representation", None) @@ -899,24 +875,111 @@ class HonchoSessionManager: if not representation: try: - representation = peer.representation() or "" + representation = ( + peer.representation(target=target) if target is not None else peer.representation() + ) or "" except Exception as e: logger.debug("Direct peer.representation() failed for '%s': %s", peer_id, e) if not card: try: - card = self._fetch_peer_card(peer_id) + card = self._fetch_peer_card(peer_id, target=target) except Exception as e: logger.debug("Direct peer card fetch failed for '%s': %s", peer_id, e) return {"representation": representation, "card": card} - def get_peer_card(self, session_key: str) -> list[str]: + def get_session_context(self, session_key: str, peer: str = "user") -> dict[str, Any]: + """Fetch full session context from Honcho including summary. + + Uses the session-level context() API which returns summary, + peer_representation, peer_card, and messages. """ - Fetch the user peer's card — a curated list of key facts. + session = self._cache.get(session_key) + if not session: + return {} + + honcho_session = self._sessions_cache.get(session.honcho_session_id) + if not honcho_session: + # Fall back to peer-level context, respecting the requested peer + peer_id = self._resolve_peer_id(session, peer) + if peer_id is None: + peer_id = session.user_peer_id + return self._fetch_peer_context(peer_id, target=peer_id) + + try: + peer_id = self._resolve_peer_id(session, peer) + ctx = honcho_session.context( + summary=True, + peer_target=peer_id, + peer_perspective=session.user_peer_id if peer == "user" else session.assistant_peer_id, + ) + + result: dict[str, Any] = {} + + # Summary + if ctx.summary: + result["summary"] = ctx.summary.content + + # Peer representation and card + if ctx.peer_representation: + result["representation"] = ctx.peer_representation + if ctx.peer_card: + result["card"] = "\n".join(ctx.peer_card) + + # Messages (last N for context) + if ctx.messages: + recent = ctx.messages[-10:] # last 10 messages + result["recent_messages"] = [ + {"role": getattr(m, "peer_id", "unknown"), "content": (m.content or "")[:500]} + for m in recent + ] + + return result + except Exception as e: + logger.debug("Session context fetch failed: %s", e) + return {} + + def _resolve_peer_id(self, session: HonchoSession, peer: str | None) -> str: + """Resolve a peer alias or explicit peer ID to a concrete Honcho peer ID. + + Always returns a non-empty string: either a known peer ID or a + sanitized version of the caller-supplied alias/ID. + """ + candidate = (peer or "user").strip() + if not candidate: + return session.user_peer_id + + normalized = self._sanitize_id(candidate) + if normalized == self._sanitize_id("user"): + return session.user_peer_id + if normalized == self._sanitize_id("ai"): + return session.assistant_peer_id + + return normalized + + def _resolve_observer_target( + self, + session: HonchoSession, + peer: str | None, + ) -> tuple[str, str | None]: + """Resolve observer and target peer IDs for context/search/profile queries.""" + target_peer_id = self._resolve_peer_id(session, peer) + + if target_peer_id == session.assistant_peer_id: + return session.assistant_peer_id, session.assistant_peer_id + + if self._ai_observe_others: + return session.assistant_peer_id, target_peer_id + + return target_peer_id, None + + def get_peer_card(self, session_key: str, peer: str = "user") -> list[str]: + """ + Fetch a peer card — a curated list of key facts. Fast, no LLM reasoning. Returns raw structured facts Honcho has - inferred about the user (name, role, preferences, patterns). + inferred about the target peer (name, role, preferences, patterns). Empty list if unavailable. """ session = self._cache.get(session_key) @@ -924,12 +987,19 @@ class HonchoSessionManager: return [] try: - return self._fetch_peer_card(session.user_peer_id) + observer_peer_id, target_peer_id = self._resolve_observer_target(session, peer) + return self._fetch_peer_card(observer_peer_id, target=target_peer_id) except Exception as e: logger.debug("Failed to fetch peer card from Honcho: %s", e) return [] - def search_context(self, session_key: str, query: str, max_tokens: int = 800) -> str: + def search_context( + self, + session_key: str, + query: str, + max_tokens: int = 800, + peer: str = "user", + ) -> str: """ Semantic search over Honcho session context. @@ -941,6 +1011,7 @@ class HonchoSessionManager: session_key: Session to search against. query: Search query for semantic matching. max_tokens: Token budget for returned content. + peer: Peer alias or explicit peer ID to search about. Returns: Relevant context excerpts as a string, or empty string if none. @@ -950,7 +1021,13 @@ class HonchoSessionManager: return "" try: - ctx = self._fetch_peer_context(session.user_peer_id, search_query=query) + observer_peer_id, target = self._resolve_observer_target(session, peer) + + ctx = self._fetch_peer_context( + observer_peer_id, + search_query=query, + target=target, + ) parts = [] if ctx["representation"]: parts.append(ctx["representation"]) @@ -962,16 +1039,17 @@ class HonchoSessionManager: logger.debug("Honcho search_context failed: %s", e) return "" - def create_conclusion(self, session_key: str, content: str) -> bool: - """Write a conclusion about the user back to Honcho. + def create_conclusion(self, session_key: str, content: str, peer: str = "user") -> bool: + """Write a conclusion about a target peer back to Honcho. - Conclusions are facts the AI peer observes about the user — - preferences, corrections, clarifications, project context. - They feed into the user's peer card and representation. + Conclusions are facts a peer observes about another peer or itself — + preferences, corrections, clarifications, and project context. + They feed into the target peer's card and representation. Args: session_key: Session to associate the conclusion with. - content: The conclusion text (e.g. "User prefers dark mode"). + content: The conclusion text. + peer: Peer alias or explicit peer ID. "user" is the default alias. Returns: True on success, False on failure. @@ -985,25 +1063,90 @@ class HonchoSessionManager: return False try: - if self._ai_observe_others: - # AI peer creates conclusion about user (cross-observation) + target_peer_id = self._resolve_peer_id(session, peer) + if target_peer_id is None: + logger.warning("Could not resolve conclusion peer '%s' for session '%s'", peer, session_key) + return False + + if target_peer_id == session.assistant_peer_id: assistant_peer = self._get_or_create_peer(session.assistant_peer_id) - conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id) + conclusions_scope = assistant_peer.conclusions_of(session.assistant_peer_id) + elif self._ai_observe_others: + assistant_peer = self._get_or_create_peer(session.assistant_peer_id) + conclusions_scope = assistant_peer.conclusions_of(target_peer_id) else: - # AI can't observe others — user peer creates self-conclusion - user_peer = self._get_or_create_peer(session.user_peer_id) - conclusions_scope = user_peer.conclusions_of(session.user_peer_id) + target_peer = self._get_or_create_peer(target_peer_id) + conclusions_scope = target_peer.conclusions_of(target_peer_id) conclusions_scope.create([{ "content": content.strip(), "session_id": session.honcho_session_id, }]) - logger.info("Created conclusion for %s: %s", session_key, content[:80]) + logger.info("Created conclusion about %s for %s: %s", target_peer_id, session_key, content[:80]) return True except Exception as e: logger.error("Failed to create conclusion: %s", e) return False + def delete_conclusion(self, session_key: str, conclusion_id: str, peer: str = "user") -> bool: + """Delete a conclusion by ID. Use only for PII removal. + + Args: + session_key: Session key for peer resolution. + conclusion_id: The conclusion ID to delete. + peer: Peer alias or explicit peer ID. + + Returns: + True on success, False on failure. + """ + session = self._cache.get(session_key) + if not session: + return False + try: + target_peer_id = self._resolve_peer_id(session, peer) + if target_peer_id == session.assistant_peer_id: + observer = self._get_or_create_peer(session.assistant_peer_id) + scope = observer.conclusions_of(session.assistant_peer_id) + elif self._ai_observe_others: + observer = self._get_or_create_peer(session.assistant_peer_id) + scope = observer.conclusions_of(target_peer_id) + else: + target_peer = self._get_or_create_peer(target_peer_id) + scope = target_peer.conclusions_of(target_peer_id) + scope.delete(conclusion_id) + logger.info("Deleted conclusion %s for %s", conclusion_id, session_key) + return True + except Exception as e: + logger.error("Failed to delete conclusion %s: %s", conclusion_id, e) + return False + + def set_peer_card(self, session_key: str, card: list[str], peer: str = "user") -> list[str] | None: + """Update a peer's card. + + Args: + session_key: Session key for peer resolution. + card: New peer card as list of fact strings. + peer: Peer alias or explicit peer ID. + + Returns: + Updated card on success, None on failure. + """ + session = self._cache.get(session_key) + if not session: + return None + try: + peer_id = self._resolve_peer_id(session, peer) + if peer_id is None: + logger.warning("Could not resolve peer '%s' for set_peer_card in session '%s'", peer, session_key) + return None + peer_obj = self._get_or_create_peer(peer_id) + result = peer_obj.set_card(card) + logger.info("Updated peer card for %s (%d facts)", peer_id, len(card)) + return result + except Exception as e: + logger.error("Failed to set peer card: %s", e) + return None + def seed_ai_identity(self, session_key: str, content: str, source: str = "manual") -> bool: """ Seed the AI peer's Honcho representation from text content. @@ -1061,7 +1204,7 @@ class HonchoSessionManager: return {"representation": "", "card": ""} try: - ctx = self._fetch_peer_context(session.assistant_peer_id) + ctx = self._fetch_peer_context(session.assistant_peer_id, target=session.assistant_peer_id) return { "representation": ctx["representation"] or "", "card": "\n".join(ctx["card"]), diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py index 1777d423bd..86d7ad5efb 100644 --- a/plugins/memory/openviking/__init__.py +++ b/plugins/memory/openviking/__init__.py @@ -10,8 +10,9 @@ lifecycle instead of read-only search endpoints. Config via environment variables (profile-scoped via each profile's .env): OPENVIKING_ENDPOINT — Server URL (default: http://127.0.0.1:1933) OPENVIKING_API_KEY — API key (required for authenticated servers) - OPENVIKING_ACCOUNT — Tenant account (default: root) + OPENVIKING_ACCOUNT — Tenant account (default: default) OPENVIKING_USER — Tenant user (default: default) + OPENVIKING_AGENT — Tenant agent (default: hermes) Capabilities: - Automatic memory extraction on session commit (6 categories) @@ -80,11 +81,12 @@ class _VikingClient: """Thin HTTP client for the OpenViking REST API.""" def __init__(self, endpoint: str, api_key: str = "", - account: str = "", user: str = ""): + account: str = "", user: str = "", agent: str = ""): self._endpoint = endpoint.rstrip("/") self._api_key = api_key - self._account = account or os.environ.get("OPENVIKING_ACCOUNT", "root") + self._account = account or os.environ.get("OPENVIKING_ACCOUNT", "default") self._user = user or os.environ.get("OPENVIKING_USER", "default") + self._agent = agent or os.environ.get("OPENVIKING_AGENT", "hermes") self._httpx = _get_httpx() if self._httpx is None: raise ImportError("httpx is required for OpenViking: pip install httpx") @@ -94,6 +96,7 @@ class _VikingClient: "Content-Type": "application/json", "X-OpenViking-Account": self._account, "X-OpenViking-User": self._user, + "X-OpenViking-Agent": self._agent, } if self._api_key: h["X-API-Key"] = self._api_key @@ -282,20 +285,44 @@ class OpenVikingMemoryProvider(MemoryProvider): }, { "key": "api_key", - "description": "OpenViking API key", + "description": "OpenViking API key (leave blank for local dev mode)", "secret": True, "env_var": "OPENVIKING_API_KEY", }, + { + "key": "account", + "description": "OpenViking tenant account ID ([default], used when local mode, OPENVIKING_API_KEY is empty)", + "default": "default", + "env_var": "OPENVIKING_ACCOUNT", + }, + { + "key": "user", + "description": "OpenViking user ID within the account ([default], used when local mode, OPENVIKING_API_KEY is empty)", + "default": "default", + "env_var": "OPENVIKING_USER", + }, + { + "key": "agent", + "description": "OpenViking agent ID within the account ([hermes], useful in multi-agent mode)", + "default": "hermes", + "env_var": "OPENVIKING_AGENT", + }, ] def initialize(self, session_id: str, **kwargs) -> None: self._endpoint = os.environ.get("OPENVIKING_ENDPOINT", _DEFAULT_ENDPOINT) self._api_key = os.environ.get("OPENVIKING_API_KEY", "") + self._account = os.environ.get("OPENVIKING_ACCOUNT", "default") + self._user = os.environ.get("OPENVIKING_USER", "default") + self._agent = os.environ.get("OPENVIKING_AGENT", "hermes") self._session_id = session_id self._turn_count = 0 try: - self._client = _VikingClient(self._endpoint, self._api_key) + self._client = _VikingClient( + self._endpoint, self._api_key, + account=self._account, user=self._user, agent=self._agent, + ) if not self._client.health(): logger.warning("OpenViking server at %s is not reachable", self._endpoint) self._client = None @@ -325,7 +352,8 @@ class OpenVikingMemoryProvider(MemoryProvider): "(abstract/overview/full), viking_browse to explore.\n" "Use viking_remember to store facts, viking_add_resource to index URLs/docs." ) - except Exception: + except Exception as e: + logger.warning("OpenViking system_prompt_block failed: %s", e) return ( "# OpenViking Knowledge Base\n" f"Active. Endpoint: {self._endpoint}\n" @@ -351,7 +379,10 @@ class OpenVikingMemoryProvider(MemoryProvider): def _run(): try: - client = _VikingClient(self._endpoint, self._api_key) + client = _VikingClient( + self._endpoint, self._api_key, + account=self._account, user=self._user, agent=self._agent, + ) resp = client.post("/api/v1/search/find", { "query": query, "top_k": 5, @@ -386,7 +417,10 @@ class OpenVikingMemoryProvider(MemoryProvider): def _sync(): try: - client = _VikingClient(self._endpoint, self._api_key) + client = _VikingClient( + self._endpoint, self._api_key, + account=self._account, user=self._user, agent=self._agent, + ) sid = self._session_id # Add user message @@ -442,7 +476,10 @@ class OpenVikingMemoryProvider(MemoryProvider): def _write(): try: - client = _VikingClient(self._endpoint, self._api_key) + client = _VikingClient( + self._endpoint, self._api_key, + account=self._account, user=self._user, agent=self._agent, + ) # Add as a user message with memory context so the commit # picks it up as an explicit memory during extraction client.post(f"/api/v1/sessions/{self._session_id}/messages", { diff --git a/pyproject.toml b/pyproject.toml index fa3fd48227..bd83673651 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "hermes-agent" -version = "0.9.0" +version = "0.10.0" description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere" readme = "README.md" requires-python = ">=3.11" @@ -40,7 +40,7 @@ dependencies = [ modal = ["modal>=1.0.0,<2"] daytona = ["daytona>=0.148.0,<1"] dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"] -messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"] +messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"] cron = ["croniter>=6.0.0,<7"] slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"] matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"] @@ -63,10 +63,12 @@ homeassistant = ["aiohttp>=3.9.0,<4"] sms = ["aiohttp>=3.9.0,<4"] acp = ["agent-client-protocol>=0.9.0,<1.0"] mistral = ["mistralai>=2.3.0,<3"] +bedrock = ["boto3>=1.35.0,<2"] termux = [ # Tested Android / Termux path: keeps the core CLI feature-rich while # avoiding extras that currently depend on non-Android wheels (notably # faster-whisper -> ctranslate2 via the voice extra). + "python-telegram-bot[webhooks]>=22.6,<23", "hermes-agent[cron]", "hermes-agent[cli]", "hermes-agent[pty]", @@ -74,8 +76,8 @@ termux = [ "hermes-agent[honcho]", "hermes-agent[acp]", ] -dingtalk = ["dingtalk-stream>=0.1.0,<1"] -feishu = ["lark-oapi>=1.5.3,<2"] +dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"] +feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"] web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"] rl = [ "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30", @@ -108,6 +110,7 @@ all = [ "hermes-agent[dingtalk]", "hermes-agent[feishu]", "hermes-agent[mistral]", + "hermes-agent[bedrock]", "hermes-agent[web]", ] @@ -123,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector hermes_cli = ["web_dist/**/*"] [tool.setuptools.packages.find] -include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"] +include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/run_agent.py b/run_agent.py index 626951b276..a1e3e3038b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -48,6 +48,10 @@ from hermes_constants import get_hermes_home # Load .env from ~/.hermes/.env first, then project root as dev fallback. # User-managed env files should override stale shell exports on restart. from hermes_cli.env_loader import load_hermes_dotenv +from hermes_cli.timeouts import ( + get_provider_request_timeout, + get_provider_stale_timeout, +) _hermes_home = get_hermes_home() _project_env = Path(__file__).parent / '.env' @@ -75,7 +79,7 @@ from tools.browser_tool import cleanup_browser from hermes_constants import OPENROUTER_BASE_URL # Agent internals extracted to agent/ package for modularity -from agent.memory_manager import build_memory_context_block +from agent.memory_manager import build_memory_context_block, sanitize_context from agent.retry_utils import jittered_backoff from agent.error_classifier import classify_api_error, FailoverReason from agent.prompt_builder import ( @@ -159,6 +163,20 @@ class _SafeWriter: return getattr(self._inner, name) +def _get_proxy_from_env() -> Optional[str]: + """Read proxy URL from environment variables. + + Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order. + Returns the first valid proxy URL found, or None if no proxy is configured. + """ + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + value = os.environ.get(key, "").strip() + if value: + return value + return None + + def _install_safe_stdio() -> None: """Wrap stdout/stderr so best-effort console output cannot crash the agent.""" for stream_name in ("stdout", "stderr"): @@ -353,12 +371,133 @@ def _sanitize_surrogates(text: str) -> str: return text +def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]: + """Convert chat-style multimodal content to Responses API input parts. + + Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format) + Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format) + + Returns an empty list when ``content`` is not a list or contains no + recognized parts — callers fall back to the string path. + """ + if not isinstance(content, list): + return [] + converted: List[Dict[str, Any]] = [] + for part in content: + if isinstance(part, str): + if part: + converted.append({"type": "input_text", "text": part}) + continue + if not isinstance(part, dict): + continue + ptype = str(part.get("type") or "").strip().lower() + if ptype in {"text", "input_text", "output_text"}: + text = part.get("text") + if isinstance(text, str) and text: + converted.append({"type": "input_text", "text": text}) + continue + if ptype in {"image_url", "input_image"}: + image_ref = part.get("image_url") + detail = part.get("detail") + if isinstance(image_ref, dict): + url = image_ref.get("url") + detail = image_ref.get("detail", detail) + else: + url = image_ref + if not isinstance(url, str) or not url: + continue + image_part: Dict[str, Any] = {"type": "input_image", "image_url": url} + if isinstance(detail, str) and detail.strip(): + image_part["detail"] = detail.strip() + converted.append(image_part) + return converted + + +def _summarize_user_message_for_log(content: Any) -> str: + """Return a short text summary of a user message for logging/trajectory. + + Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}`` + parts from the API server. Logging, spinner previews, and trajectory + files all want a plain string — this helper extracts the first chunk of + text and notes any attached images. Returns an empty string for empty + lists and ``str(content)`` for unexpected scalar types. + """ + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + text_bits: List[str] = [] + image_count = 0 + for part in content: + if isinstance(part, str): + if part: + text_bits.append(part) + continue + if not isinstance(part, dict): + continue + ptype = str(part.get("type") or "").strip().lower() + if ptype in {"text", "input_text", "output_text"}: + text = part.get("text") + if isinstance(text, str) and text: + text_bits.append(text) + elif ptype in {"image_url", "input_image"}: + image_count += 1 + summary = " ".join(text_bits).strip() + if image_count: + note = f"[{image_count} image{'s' if image_count != 1 else ''}]" + summary = f"{note} {summary}" if summary else note + return summary + try: + return str(content) + except Exception: + return "" + + +def _sanitize_structure_surrogates(payload: Any) -> bool: + """Replace surrogate code points in nested dict/list payloads in-place. + + Mirror of ``_sanitize_structure_non_ascii`` but for surrogate recovery. + Used to scrub nested structured fields (e.g. ``reasoning_details`` — an + array of dicts with ``summary``/``text`` strings) that flat per-field + checks don't reach. Returns True if any surrogates were replaced. + """ + found = False + + def _walk(node): + nonlocal found + if isinstance(node, dict): + for key, value in node.items(): + if isinstance(value, str): + if _SURROGATE_RE.search(value): + node[key] = _SURROGATE_RE.sub('\ufffd', value) + found = True + elif isinstance(value, (dict, list)): + _walk(value) + elif isinstance(node, list): + for idx, value in enumerate(node): + if isinstance(value, str): + if _SURROGATE_RE.search(value): + node[idx] = _SURROGATE_RE.sub('\ufffd', value) + found = True + elif isinstance(value, (dict, list)): + _walk(value) + + _walk(payload) + return found + + def _sanitize_messages_surrogates(messages: list) -> bool: """Sanitize surrogate characters from all string content in a messages list. Walks message dicts in-place. Returns True if any surrogates were found - and replaced, False otherwise. Covers content/text, name, and tool call - metadata/arguments so retries don't fail on a non-content field. + and replaced, False otherwise. Covers content/text, name, tool call + metadata/arguments, AND any additional string or nested structured fields + (``reasoning``, ``reasoning_content``, ``reasoning_details``, etc.) so + retries don't fail on a non-content field. Byte-level reasoning models + (xiaomi/mimo, kimi, glm) can emit lone surrogates in reasoning output + that flow through to ``api_messages["reasoning_content"]`` on the next + turn and crash json.dumps inside the OpenAI SDK. """ found = False for msg in messages: @@ -398,9 +537,89 @@ def _sanitize_messages_surrogates(messages: list) -> bool: if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args): fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args) found = True + # Walk any additional string / nested fields (reasoning, + # reasoning_content, reasoning_details, etc.) — surrogates from + # byte-level reasoning models (xiaomi/mimo, kimi, glm) can lurk + # in these fields and aren't covered by the per-field checks above. + # Matches _sanitize_messages_non_ascii's coverage (PR #10537). + for key, value in msg.items(): + if key in {"content", "name", "tool_calls", "role"}: + continue + if isinstance(value, str): + if _SURROGATE_RE.search(value): + msg[key] = _SURROGATE_RE.sub('\ufffd', value) + found = True + elif isinstance(value, (dict, list)): + if _sanitize_structure_surrogates(value): + found = True return found +def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str: + """Attempt to repair malformed tool_call argument JSON. + + Models like GLM-5.1 via Ollama can produce truncated JSON, trailing + commas, Python ``None``, etc. The API proxy rejects these with HTTP 400 + "invalid tool call arguments". This function applies common repairs; + if all fail it returns ``"{}"`` so the request succeeds (better than + crashing the session). All repairs are logged at WARNING level. + """ + raw_stripped = raw_args.strip() if isinstance(raw_args, str) else "" + + # Fast-path: empty / whitespace-only -> empty object + if not raw_stripped: + logger.warning("Sanitized empty tool_call arguments for %s", tool_name) + return "{}" + + # Python-literal None -> normalise to {} + if raw_stripped == "None": + logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name) + return "{}" + + # Attempt common JSON repairs + fixed = raw_stripped + # 1. Strip trailing commas before } or ] + fixed = re.sub(r',\s*([}\]])', r'\1', fixed) + # 2. Close unclosed structures + open_curly = fixed.count('{') - fixed.count('}') + open_bracket = fixed.count('[') - fixed.count(']') + if open_curly > 0: + fixed += '}' * open_curly + if open_bracket > 0: + fixed += ']' * open_bracket + # 3. Remove excess closing braces/brackets (bounded to 50 iterations) + for _ in range(50): + try: + json.loads(fixed) + break + except json.JSONDecodeError: + if fixed.endswith('}') and fixed.count('}') > fixed.count('{'): + fixed = fixed[:-1] + elif fixed.endswith(']') and fixed.count(']') > fixed.count('['): + fixed = fixed[:-1] + else: + break + + try: + json.loads(fixed) + logger.warning( + "Repaired malformed tool_call arguments for %s: %s → %s", + tool_name, raw_stripped[:80], fixed[:80], + ) + return fixed + except json.JSONDecodeError: + pass + + # Last resort: replace with empty object so the API request doesn't + # crash the entire session. + logger.warning( + "Unrepairable tool_call arguments for %s — " + "replaced with empty object (was: %s)", + tool_name, raw_stripped[:80], + ) + return "{}" + + def _strip_non_ascii(text: str) -> str: """Remove non-ASCII characters, replacing with closest ASCII equivalent or removing. @@ -457,6 +676,15 @@ def _sanitize_messages_non_ascii(messages: list) -> bool: if sanitized != fn_args: fn["arguments"] = sanitized found = True + # Sanitize any additional top-level string fields (e.g. reasoning_content) + for key, value in msg.items(): + if key in {"content", "name", "tool_calls", "role"}: + continue + if isinstance(value, str): + sanitized = _strip_non_ascii(value) + if sanitized != value: + msg[key] = sanitized + found = True return found @@ -531,13 +759,6 @@ class AIAgent: for AI models that support function calling. """ - # ── Class-level context pressure dedup (survives across instances) ── - # The gateway creates a new AIAgent per message, so instance-level flags - # reset every time. This dict tracks {session_id: (warn_level, timestamp)} - # to suppress duplicate warnings within a cooldown window. - _context_pressure_last_warned: dict = {} - _CONTEXT_PRESSURE_COOLDOWN = 300 # seconds between re-warning same session - @property def base_url(self) -> str: return self._base_url @@ -593,6 +814,7 @@ class AIAgent: prefill_messages: List[Dict[str, Any]] = None, platform: str = None, user_id: str = None, + gateway_session_key: str = None, skip_context_files: bool = False, skip_memory: bool = False, session_db=None, @@ -638,6 +860,9 @@ class AIAgent: prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context. Useful for injecting a few-shot example or priming the model's response style. Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}] + NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a conversation that ends on an + assistant-role message (400 error). For those models use structured outputs or + output_config.format instead of a trailing-assistant prefill. platform (str): The interface platform the user is on (e.g. "cli", "telegram", "discord", "whatsapp"). Used to inject platform-specific formatting hints into the system prompt. skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules @@ -658,6 +883,7 @@ class AIAgent: self.ephemeral_system_prompt = ephemeral_system_prompt self.platform = platform # "cli", "telegram", "discord", "whatsapp", etc. self._user_id = user_id # Platform user identifier (gateway sessions) + self._gateway_session_key = gateway_session_key # Stable per-chat key (e.g. agent:main:telegram:dm:123) # Pluggable print function — CLI replaces this with _cprint so that # raw ANSI status lines are routed through prompt_toolkit's renderer # instead of going directly to stdout where patch_stdout's StdoutProxy @@ -676,13 +902,18 @@ class AIAgent: self.provider = provider_name or "" self.acp_command = acp_command or command self.acp_args = list(acp_args or args or []) - if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}: + if api_mode in {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}: self.api_mode = api_mode elif self.provider == "openai-codex": self.api_mode = "codex_responses" + elif self.provider == "xai": + self.api_mode = "codex_responses" elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower: self.api_mode = "codex_responses" self.provider = "openai-codex" + elif (provider_name is None) and "api.x.ai" in self._base_url_lower: + self.api_mode = "codex_responses" + self.provider = "xai" elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower): self.api_mode = "anthropic_messages" self.provider = "anthropic" @@ -691,6 +922,9 @@ class AIAgent: # use a URL convention ending in /anthropic. Auto-detect these so the # Anthropic Messages API adapter is used instead of chat completions. self.api_mode = "anthropic_messages" + elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower: + # AWS Bedrock — auto-detect from provider name or base URL. + self.api_mode = "bedrock_converse" else: self.api_mode = "chat_completions" @@ -705,20 +939,27 @@ class AIAgent: except Exception: pass - # GPT-5.x models require the Responses API path — they are rejected - # on /v1/chat/completions by both OpenAI and OpenRouter. Also - # auto-upgrade for direct OpenAI URLs (api.openai.com) since all - # newer tool-calling models prefer Responses there. - # ACP runtimes are excluded: CopilotACPClient handles its own - # routing and does not implement the Responses API surface. + # GPT-5.x models usually require the Responses API path, but some + # providers have exceptions (for example Copilot's gpt-5-mini still + # uses chat completions). Also auto-upgrade for direct OpenAI URLs + # (api.openai.com) since all newer tool-calling models prefer + # Responses there. ACP runtimes are excluded: CopilotACPClient + # handles its own routing and does not implement the Responses API + # surface. + # When api_mode was explicitly provided, respect it — the user + # knows what their endpoint supports (#10473). if ( - self.api_mode == "chat_completions" + api_mode is None + and self.api_mode == "chat_completions" and self.provider != "copilot-acp" and not str(self.base_url or "").lower().startswith("acp://copilot") and not str(self.base_url or "").lower().startswith("acp+tcp://") and ( self._is_direct_openai_url() - or self._model_requires_responses_api(self.model) + or self._provider_model_requires_responses_api( + self.model, + provider=self.provider, + ) ) ): self.api_mode = "codex_responses" @@ -754,7 +995,28 @@ class AIAgent: self._interrupt_requested = False self._interrupt_message = None # Optional message that triggered interrupt self._execution_thread_id: int | None = None # Set at run_conversation() start + self._interrupt_thread_signal_pending = False self._client_lock = threading.RLock() + + # /steer mechanism — inject a user note into the next tool result + # without interrupting the agent. Unlike interrupt(), steer() does + # NOT set _interrupt_requested; it waits for the current tool batch + # to finish naturally, then the drain hook appends the text to the + # last tool result's content so the model sees it on its next + # iteration. Message-role alternation is preserved (we modify an + # existing tool message rather than inserting a new user turn). + self._pending_steer: Optional[str] = None + self._pending_steer_lock = threading.Lock() + + # Concurrent-tool worker thread tracking. `_execute_tool_calls_concurrent` + # runs each tool on its own ThreadPoolExecutor worker — those worker + # threads have tids distinct from `_execution_thread_id`, so + # `_set_interrupt(True, _execution_thread_id)` alone does NOT cause + # `is_interrupted()` inside the worker to return True. Track the + # workers here so `interrupt()` / `clear_interrupt()` can fan out to + # their tids explicitly. + self._tool_worker_threads: set[int] = set() + self._tool_worker_threads_lock = threading.Lock() # Subagent delegation state self._delegate_depth = 0 # 0 = top-level agent, incremented for children @@ -781,13 +1043,15 @@ class AIAgent: self.prefill_messages = prefill_messages or [] # Prefilled conversation turns self._force_ascii_payload = False - # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. - # Reduces input costs by ~75% on multi-turn conversations by caching the - # conversation prefix. Uses system_and_3 strategy (4 breakpoints). - is_openrouter = self._is_openrouter_url() - is_claude = "claude" in self.model.lower() - is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic" - self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic + # Anthropic prompt caching: auto-enabled for Claude models on native + # Anthropic, OpenRouter, and third-party gateways that speak the + # Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces + # input costs by ~75% on multi-turn conversations. Uses system_and_3 + # strategy (4 breakpoints). See ``_anthropic_prompt_cache_policy`` + # for the layout-vs-transport decision. + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy() + ) self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) # Iteration budget: the LLM is only notified when it actually exhausts @@ -799,12 +1063,6 @@ class AIAgent: self._budget_exhausted_injected = False self._budget_grace_call = False - # Context pressure warnings: notify the USER (not the LLM) as context - # fills up. Purely informational — displayed in CLI output and sent via - # status_callback for gateway platforms. Does NOT inject into messages. - # Tiered: fires at 85% and again at 95% of compaction threshold. - self._context_pressure_warned_at = 0.0 # highest tier already shown - # Activity tracking — updated on each API call, tool execution, and # stream chunk. Used by the gateway timeout handler to report what the # agent was doing when it was killed, and by the "still working" @@ -873,31 +1131,92 @@ class AIAgent: self._anthropic_client = None self._is_anthropic_oauth = False + # Resolve per-provider / per-model request timeout once up front so + # every client construction path below (Anthropic native, OpenAI-wire, + # router-based implicit auth) can apply it consistently. Bedrock + # Claude uses its own timeout path and is not covered here. + _provider_timeout = get_provider_request_timeout(self.provider, self.model) + if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token - # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. - # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key. - # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401). - _is_native_anthropic = self.provider == "anthropic" - effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "") - self.api_key = effective_key - self._anthropic_api_key = effective_key - self._anthropic_base_url = base_url - from agent.anthropic_adapter import _is_oauth_token as _is_oat - self._is_anthropic_oauth = _is_oat(effective_key) - self._anthropic_client = build_anthropic_client(effective_key, base_url) - # No OpenAI client needed for Anthropic mode + # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity + # (prompt caching, thinking budgets, adaptive thinking). + _is_bedrock_anthropic = self.provider == "bedrock" + if _is_bedrock_anthropic: + from agent.anthropic_adapter import build_anthropic_bedrock_client + import re as _re + _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + _br_region = _region_match.group(1) if _region_match else "us-east-1" + self._bedrock_region = _br_region + self._anthropic_client = build_anthropic_bedrock_client(_br_region) + self._anthropic_api_key = "aws-sdk" + self._anthropic_base_url = base_url + self._is_anthropic_oauth = False + self.api_key = "aws-sdk" + self.client = None + self._client_kwargs = {} + if not self.quiet_mode: + print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock + AnthropicBedrock SDK, {_br_region})") + else: + # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. + # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own API key. + # Falling back would send Anthropic credentials to third-party endpoints (Fixes #1739, #minimax-401). + _is_native_anthropic = self.provider == "anthropic" + effective_key = (api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or "") + self.api_key = effective_key + self._anthropic_api_key = effective_key + self._anthropic_base_url = base_url + # Only mark the session as OAuth-authenticated when the token + # genuinely belongs to native Anthropic. Third-party providers + # (MiniMax, Kimi, GLM, LiteLLM proxies) that accept the + # Anthropic protocol must never trip OAuth code paths — doing + # so injects Claude-Code identity headers and system prompts + # that cause 401/403 on their endpoints. Guards #1739 and + # the third-party identity-injection bug. + from agent.anthropic_adapter import _is_oauth_token as _is_oat + self._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False + self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout) + # No OpenAI client needed for Anthropic mode + self.client = None + self._client_kwargs = {} + if not self.quiet_mode: + print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)") + if effective_key and len(effective_key) > 12: + print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}") + elif self.api_mode == "bedrock_converse": + # AWS Bedrock — uses boto3 directly, no OpenAI client needed. + # Region is extracted from the base_url or defaults to us-east-1. + import re as _re + _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1" + # Guardrail config — read from config.yaml at init time. + self._bedrock_guardrail_config = None + try: + from hermes_cli.config import load_config as _load_br_cfg + _gr = _load_br_cfg().get("bedrock", {}).get("guardrail", {}) + if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"): + self._bedrock_guardrail_config = { + "guardrailIdentifier": _gr["guardrail_identifier"], + "guardrailVersion": _gr["guardrail_version"], + } + if _gr.get("stream_processing_mode"): + self._bedrock_guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"] + if _gr.get("trace"): + self._bedrock_guardrail_config["trace"] = _gr["trace"] + except Exception: + pass self.client = None self._client_kwargs = {} if not self.quiet_mode: - print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)") - if effective_key and len(effective_key) > 12: - print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}") + _gr_label = " + Guardrails" if self._bedrock_guardrail_config else "" + print(f"🤖 AI Agent initialized with model: {self.model} (AWS Bedrock, {self._bedrock_region}{_gr_label})") else: if api_key and base_url: # Explicit credentials from CLI/gateway — construct directly. # The runtime provider resolver already handled auth for us. client_kwargs = {"api_key": api_key, "base_url": base_url} + if _provider_timeout is not None: + client_kwargs["timeout"] = _provider_timeout if self.provider == "copilot-acp": client_kwargs["command"] = self.acp_command client_kwargs["args"] = self.acp_args @@ -918,6 +1237,9 @@ class AIAgent: } elif "portal.qwen.ai" in effective_base.lower(): client_kwargs["default_headers"] = _qwen_portal_headers() + elif "chatgpt.com" in effective_base.lower(): + from agent.auxiliary_client import _codex_cloudflare_headers + client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key) else: # No explicit creds — use the centralized provider router from agent.auxiliary_client import resolve_provider_client @@ -928,6 +1250,8 @@ class AIAgent: "api_key": _routed_client.api_key, "base_url": str(_routed_client.base_url), } + if _provider_timeout is not None: + client_kwargs["timeout"] = _provider_timeout # Preserve any default_headers the router set if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers: client_kwargs["default_headers"] = dict(_routed_client._default_headers) @@ -937,21 +1261,28 @@ class AIAgent: # message instead of silently routing through OpenRouter. _explicit = (self.provider or "").strip().lower() if _explicit and _explicit not in ("auto", "openrouter", "custom"): + # Look up the actual env var name from the provider + # config — some providers use non-standard names + # (e.g. alibaba → DASHSCOPE_API_KEY, not ALIBABA_API_KEY). + _env_hint = f"{_explicit.upper()}_API_KEY" + try: + from hermes_cli.auth import PROVIDER_REGISTRY + _pcfg = PROVIDER_REGISTRY.get(_explicit) + if _pcfg and _pcfg.api_key_env_vars: + _env_hint = _pcfg.api_key_env_vars[0] + except Exception: + pass raise RuntimeError( f"Provider '{_explicit}' is set in config.yaml but no API key " - f"was found. Set the {_explicit.upper()}_API_KEY environment " + f"was found. Set the {_env_hint} environment " f"variable, or switch to a different provider with `hermes model`." ) - # Final fallback: try raw OpenRouter key - client_kwargs = { - "api_key": os.getenv("OPENROUTER_API_KEY", ""), - "base_url": OPENROUTER_BASE_URL, - "default_headers": { - "HTTP-Referer": "https://hermes-agent.nousresearch.com", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "productivity,cli-agent", - }, - } + # No provider configured — reject with a clear message. + raise RuntimeError( + "No LLM provider configured. Run `hermes model` to " + "select a provider, or run `hermes setup` for first-time " + "configuration." + ) self._client_kwargs = client_kwargs # stored for rebuilding after interrupt @@ -1056,7 +1387,12 @@ class AIAgent: # Show prompt caching status if self._use_prompt_caching and not self.quiet_mode: - source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter" + if self._use_native_cache_layout and self.provider == "anthropic": + source = "native Anthropic" + elif self._use_native_cache_layout: + source = "Anthropic-compatible endpoint" + else: + source = "Claude via OpenRouter" print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)") # Session logging setup - auto-save conversation trajectories for debugging @@ -1128,6 +1464,10 @@ class AIAgent: _agent_cfg = _load_agent_config() except Exception: _agent_cfg = {} + # Cache only the derived auxiliary compression context override that is + # needed later by the startup feasibility check. Avoid exposing a + # broad pseudo-public config object on the agent instance. + self._aux_compression_context_length_config = None # Persistent memory (MEMORY.md + USER.md) -- loaded from disk self._memory_store = None @@ -1163,31 +1503,6 @@ class AIAgent: try: _mem_provider_name = mem_config.get("provider", "") if mem_config else "" - # Auto-migrate: if Honcho was actively configured (enabled + - # credentials) but memory.provider is not set, activate the - # honcho plugin automatically. Just having the config file - # is not enough — the user may have disabled Honcho or the - # file may be from a different tool. - if not _mem_provider_name: - try: - from plugins.memory.honcho.client import HonchoClientConfig as _HCC - _hcfg = _HCC.from_global_config() - if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url): - _mem_provider_name = "honcho" - # Persist so this only auto-migrates once - try: - from hermes_cli.config import load_config as _lc, save_config as _sc - _cfg = _lc() - _cfg.setdefault("memory", {})["provider"] = "honcho" - _sc(_cfg) - except Exception: - pass - if not self.quiet_mode: - print(" ✓ Auto-migrated Honcho to memory provider plugin.") - print(" Your config and data are preserved.\n") - except Exception: - pass - if _mem_provider_name: from agent.memory_manager import MemoryManager as _MemoryManager from plugins.memory import load_memory_provider as _load_mem @@ -1203,9 +1518,21 @@ class AIAgent: "hermes_home": str(_ghh()), "agent_context": "primary", } + # Thread session title for memory provider scoping + # (e.g. honcho uses this to derive chat-scoped session keys) + if self._session_db: + try: + _st = self._session_db.get_session_title(self.session_id) + if _st: + _init_kwargs["session_title"] = _st + except Exception: + pass # Thread gateway user identity for per-user memory scoping if self._user_id: _init_kwargs["user_id"] = self._user_id + # Thread gateway session key for stable per-chat Honcho session isolation + if self._gateway_session_key: + _init_kwargs["gateway_session_key"] = self._gateway_session_key # Profile identity for per-profile provider scoping try: from hermes_cli.profiles import get_active_profile_name @@ -1223,14 +1550,27 @@ class AIAgent: logger.warning("Memory provider plugin init failed: %s", _mpe) self._memory_manager = None - # Inject memory provider tool schemas into the tool surface + # Inject memory provider tool schemas into the tool surface. + # Skip tools whose names already exist (plugins may register the + # same tools via ctx.register_tool(), which lands in self.tools + # through get_tool_definitions()). Duplicate function names cause + # 400 errors on providers that enforce unique names (e.g. Xiaomi + # MiMo via Nous Portal). if self._memory_manager and self.tools is not None: + _existing_tool_names = { + t.get("function", {}).get("name") + for t in self.tools + if isinstance(t, dict) + } for _schema in self._memory_manager.get_all_tool_schemas(): + _tname = _schema.get("name", "") + if _tname and _tname in _existing_tool_names: + continue # already registered via plugin path _wrapped = {"type": "function", "function": _schema} self.tools.append(_wrapped) - _tname = _schema.get("name", "") if _tname: self.valid_tool_names.add(_tname) + _existing_tool_names.add(_tname) # Skills config: nudge interval for skill creation reminders self._skill_nudge_interval = 10 @@ -1258,6 +1598,24 @@ class AIAgent: compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20)) compression_protect_last = int(_compression_cfg.get("protect_last_n", 20)) + # Read optional explicit context_length override for the auxiliary + # compression model. Custom endpoints often cannot report this via + # /models, so the startup feasibility check needs the config hint. + try: + _aux_cfg = _agent_cfg.get("auxiliary", {}).get("compression", {}) + except Exception: + _aux_cfg = {} + if isinstance(_aux_cfg, dict): + _aux_context_config = _aux_cfg.get("context_length") + else: + _aux_context_config = None + if _aux_context_config is not None: + try: + _aux_context_config = int(_aux_context_config) + except (TypeError, ValueError): + _aux_context_config = None + self._aux_compression_context_length_config = _aux_context_config + # Read explicit context_length override from model config _model_cfg = _agent_cfg.get("model", {}) if isinstance(_model_cfg, dict): @@ -1268,6 +1626,19 @@ class AIAgent: try: _config_context_length = int(_config_context_length) except (TypeError, ValueError): + logger.warning( + "Invalid model.context_length in config.yaml: %r — " + "must be a plain integer (e.g. 256000, not '256K'). " + "Falling back to auto-detection.", + _config_context_length, + ) + import sys + print( + f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n" + f" Must be a plain integer (e.g. 256000, not '256K').\n" + f" Falling back to auto-detected context window.\n", + file=sys.stderr, + ) _config_context_length = None # Store for reuse in switch_model (so config override persists across model switches) @@ -1296,7 +1667,20 @@ class AIAgent: try: _config_context_length = int(_cp_ctx) except (TypeError, ValueError): - pass + logger.warning( + "Invalid context_length for model %r in " + "custom_providers: %r — must be a plain " + "integer (e.g. 256000, not '256K'). " + "Falling back to auto-detection.", + self.model, _cp_ctx, + ) + import sys + print( + f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n" + f" Must be a plain integer (e.g. 256000, not '256K').\n" + f" Falling back to auto-detected context window.\n", + file=sys.stderr, + ) break # Select context engine: config-driven (like memory providers). @@ -1482,6 +1866,7 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, + "use_native_cache_layout": self._use_native_cache_layout, # Context engine state that _try_activate_fallback() overwrites. # Use getattr for model/base_url/api_key/provider since plugin # engines may not have these (they're ContextCompressor-specific). @@ -1553,12 +1938,26 @@ class AIAgent: turn-scoped). """ import logging + import re as _re from hermes_cli.providers import determine_api_mode # ── Determine api_mode if not provided ── if not api_mode: api_mode = determine_api_mode(new_provider, base_url) + # Defense-in-depth: ensure OpenCode base_url doesn't carry a trailing + # /v1 into the anthropic_messages client, which would cause the SDK to + # hit /v1/v1/messages. `model_switch.switch_model()` already strips + # this, but we guard here so any direct callers (future code paths, + # tests) can't reintroduce the double-/v1 404 bug. + if ( + api_mode == "anthropic_messages" + and new_provider in ("opencode-zen", "opencode-go") + and isinstance(base_url, str) + and base_url + ): + base_url = _re.sub(r"/v1/?$", "", base_url) + old_model = self.model old_provider = self.provider @@ -1587,8 +1986,9 @@ class AIAgent: self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None) self._anthropic_client = build_anthropic_client( effective_key, self._anthropic_base_url, + timeout=get_provider_request_timeout(self.provider, self.model), ) - self._is_anthropic_oauth = _is_oauth_token(effective_key) + self._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False self.client = None self._client_kwargs = {} else: @@ -1598,6 +1998,9 @@ class AIAgent: "api_key": effective_key, "base_url": effective_base, } + _sm_timeout = get_provider_request_timeout(self.provider, self.model) + if _sm_timeout is not None: + self._client_kwargs["timeout"] = _sm_timeout self.client = self._create_openai_client( dict(self._client_kwargs), reason="switch_model", @@ -1605,10 +2008,13 @@ class AIAgent: ) # ── Re-evaluate prompt caching ── - is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower()) - or is_native_anthropic + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy( + provider=new_provider, + base_url=self.base_url, + api_mode=api_mode, + model=new_model, + ) ) # ── Update context compressor ── @@ -1643,6 +2049,7 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, + "use_native_cache_layout": self._use_native_cache_layout, "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model, "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url, "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "", @@ -1733,13 +2140,16 @@ class AIAgent: def _should_emit_quiet_tool_messages(self) -> bool: """Return True when quiet-mode tool summaries should print directly. - When the caller provides ``tool_progress_callback`` (for example the CLI - TUI or a gateway progress renderer), that callback owns progress display. - Emitting quiet-mode summary lines here duplicates progress and leaks tool - previews into flows that are expected to stay silent, such as - ``hermes chat -q``. + Quiet mode is used by both the interactive CLI and embedded/library + callers. The CLI may still want compact progress hints when no callback + owns rendering. Embedded/library callers, on the other hand, expect + quiet mode to be truly silent. """ - return self.quiet_mode and not self.tool_progress_callback + return ( + self.quiet_mode + and not self.tool_progress_callback + and getattr(self, "platform", "") == "cli" + ) def _emit_status(self, message: str) -> None: """Emit a lifecycle status message to both CLI and gateway channels. @@ -1789,7 +2199,10 @@ class AIAgent: return try: from agent.auxiliary_client import get_text_auxiliary_client - from agent.model_metadata import get_model_context_length + from agent.model_metadata import ( + MINIMUM_CONTEXT_LENGTH, + get_model_context_length, + ) client, aux_model = get_text_auxiliary_client( "compression", @@ -1812,45 +2225,61 @@ class AIAgent: aux_base_url = str(getattr(client, "base_url", "")) aux_api_key = str(getattr(client, "api_key", "")) - # Read user-configured context_length for the compression model. - # Custom endpoints often don't support /models API queries so - # get_model_context_length() falls through to the 128K default, - # ignoring the explicit config value. Pass it as the highest- - # priority hint so the configured value is always respected. - _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {}) - _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None - if _aux_context_config is not None: - try: - _aux_context_config = int(_aux_context_config) - except (TypeError, ValueError): - _aux_context_config = None - aux_context = get_model_context_length( aux_model, base_url=aux_base_url, api_key=aux_api_key, - config_context_length=_aux_context_config, + config_context_length=getattr(self, "_aux_compression_context_length_config", None), ) + # Hard floor: the auxiliary compression model must have at least + # MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model + # is already required to meet this floor (checked earlier in + # __init__), so the compression model must too — otherwise it + # cannot summarise a full threshold-sized window of main-model + # content. Mirrors the main-model rejection pattern. + if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH: + raise ValueError( + f"Auxiliary compression model {aux_model} has a context " + f"window of {aux_context:,} tokens, which is below the " + f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes " + f"Agent. Choose a compression model with at least " + f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set " + f"auxiliary.compression.model in config.yaml), or set " + f"auxiliary.compression.context_length to override the " + f"detected value if it is wrong." + ) + threshold = self.context_compressor.threshold_tokens if aux_context < threshold: - # Suggest a threshold that would fit the aux model, - # rounded down to a clean percentage. - safe_pct = int((aux_context / self.context_compressor.context_length) * 100) + # Auto-correct: lower the live session threshold so + # compression actually works this session. The hard floor + # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH, + # so the new threshold is always >= 64K. + old_threshold = threshold + new_threshold = aux_context + self.context_compressor.threshold_tokens = new_threshold + # Keep threshold_percent in sync so future main-model + # context_length changes (update_model) re-derive from a + # sensible number rather than the original too-high value. + main_ctx = self.context_compressor.context_length + if main_ctx: + self.context_compressor.threshold_percent = ( + new_threshold / main_ctx + ) + safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50 msg = ( - f"⚠ Compression model ({aux_model}) context " - f"is {aux_context:,} tokens, but the main model's " - f"compression threshold is {threshold:,} tokens. " - f"Context compression will not be possible — the " - f"content to summarise will exceed the auxiliary " - f"model's context window.\n" - f" Fix options (config.yaml):\n" + f"⚠ Compression model ({aux_model}) context is " + f"{aux_context:,} tokens, but the main model's " + f"compression threshold was {old_threshold:,} tokens. " + f"Auto-lowered this session's threshold to " + f"{new_threshold:,} tokens so compression can run.\n" + f" To make this permanent, edit config.yaml — either:\n" f" 1. Use a larger compression model:\n" f" auxiliary:\n" f" compression:\n" - f" model: \n" - f" 2. Lower the compression threshold to fit " - f"the current model:\n" + f" model: \n" + f" 2. Lower the compression threshold:\n" f" compression:\n" f" threshold: 0.{safe_pct:02d}" ) @@ -1859,12 +2288,17 @@ class AIAgent: logger.warning( "Auxiliary compression model %s has %d token context, " "below the main model's compression threshold of %d " - "tokens — compression summaries will fail or be " - "severely truncated.", + "tokens — auto-lowered session threshold to %d to " + "keep compression working.", aux_model, aux_context, - threshold, + old_threshold, + new_threshold, ) + except ValueError: + # Hard rejections (aux below minimum context) must propagate + # so the session refuses to start. + raise except Exception as exc: logger.debug( "Compression feasibility check failed (non-fatal): %s", exc @@ -1892,10 +2326,117 @@ class AIAgent: url = (base_url or self._base_url_lower).lower() return "api.openai.com" in url and "openrouter" not in url + def _resolved_api_call_timeout(self) -> float: + """Resolve the effective per-call request timeout in seconds. + + Priority: + 1. ``providers..models..timeout_seconds`` (per-model override) + 2. ``providers..request_timeout_seconds`` (provider-wide) + 3. ``HERMES_API_TIMEOUT`` env var (legacy escape hatch) + 4. 1800.0s default + + Used by OpenAI-wire chat completions (streaming and non-streaming) so + the per-provider config knob wins over the 1800s default. Without this + helper, the hardcoded ``HERMES_API_TIMEOUT`` fallback would always be + passed as a per-call ``timeout=`` kwarg, overriding the client-level + timeout the AIAgent.__init__ path configured. + """ + cfg = get_provider_request_timeout(self.provider, self.model) + if cfg is not None: + return cfg + return float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) + + def _resolved_api_call_stale_timeout_base(self) -> tuple[float, bool]: + """Resolve the base non-stream stale timeout and whether it is implicit. + + Priority: + 1. ``providers..models..stale_timeout_seconds`` + 2. ``providers..stale_timeout_seconds`` + 3. ``HERMES_API_CALL_STALE_TIMEOUT`` env var + 4. 300.0s default + + Returns ``(timeout_seconds, uses_implicit_default)`` so the caller can + preserve legacy behaviors that only apply when the user has *not* + explicitly configured a stale timeout, such as auto-disabling the + detector for local endpoints. + """ + cfg = get_provider_stale_timeout(self.provider, self.model) + if cfg is not None: + return cfg, False + + env_timeout = os.getenv("HERMES_API_CALL_STALE_TIMEOUT") + if env_timeout is not None: + return float(env_timeout), False + + return 300.0, True + + def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float: + """Compute the effective non-stream stale timeout for this request.""" + stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base() + base_url = getattr(self, "_base_url", None) or self.base_url or "" + if uses_implicit_default and base_url and is_local_endpoint(base_url): + return float("inf") + + est_tokens = sum(len(str(v)) for v in messages) // 4 + if est_tokens > 100_000: + return max(stale_base, 600.0) + if est_tokens > 50_000: + return max(stale_base, 450.0) + return stale_base + def _is_openrouter_url(self) -> bool: """Return True when the base URL targets OpenRouter.""" return "openrouter" in self._base_url_lower + def _anthropic_prompt_cache_policy( + self, + *, + provider: Optional[str] = None, + base_url: Optional[str] = None, + api_mode: Optional[str] = None, + model: Optional[str] = None, + ) -> tuple[bool, bool]: + """Decide whether to apply Anthropic prompt caching and which layout to use. + + Returns ``(should_cache, use_native_layout)``: + * ``should_cache`` — inject ``cache_control`` breakpoints for this + request (applies to OpenRouter Claude, native Anthropic, and + third-party gateways that speak the native Anthropic protocol). + * ``use_native_layout`` — place markers on the *inner* content + blocks (native Anthropic accepts and requires this layout); + when False markers go on the message envelope (OpenRouter and + OpenAI-wire proxies expect the looser layout). + + Third-party providers using the native Anthropic transport + (``api_mode == 'anthropic_messages'`` + Claude-named model) get + caching with the native layout so they benefit from the same + cost reduction as direct Anthropic callers, provided their + gateway implements the Anthropic cache_control contract + (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do). + """ + eff_provider = (provider if provider is not None else self.provider) or "" + eff_base_url = base_url if base_url is not None else (self.base_url or "") + eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "") + eff_model = (model if model is not None else self.model) or "" + + base_lower = eff_base_url.lower() + is_claude = "claude" in eff_model.lower() + is_openrouter = "openrouter" in base_lower + is_anthropic_wire = eff_api_mode == "anthropic_messages" + is_native_anthropic = ( + is_anthropic_wire + and (eff_provider == "anthropic" or "api.anthropic.com" in base_lower) + ) + + if is_native_anthropic: + return True, True + if is_openrouter and is_claude: + return True, False + if is_anthropic_wire and is_claude: + # Third-party Anthropic-compatible gateway. + return True, True + return False, False + @staticmethod def _model_requires_responses_api(model: str) -> bool: """Return True for models that require the Responses API path. @@ -1911,6 +2452,24 @@ class AIAgent: m = m.rsplit("/", 1)[-1] return m.startswith("gpt-5") + @staticmethod + def _provider_model_requires_responses_api( + model: str, + *, + provider: Optional[str] = None, + ) -> bool: + """Return True when this provider/model pair should use Responses API.""" + normalized_provider = (provider or "").strip().lower() + if normalized_provider == "copilot": + try: + from hermes_cli.models import _should_use_copilot_responses_api + return _should_use_copilot_responses_api(model) + except Exception: + # Fall back to the generic GPT-5 rule if Copilot-specific + # logic is unavailable for any reason. + pass + return AIAgent._model_requires_responses_api(model) + def _max_tokens_param(self, value: int) -> dict: """Return the correct max tokens kwarg for the current provider. @@ -1946,19 +2505,104 @@ class AIAgent: return bool(cleaned.strip()) def _strip_think_blocks(self, content: str) -> str: - """Remove reasoning/thinking blocks from content, returning only visible text.""" + """Remove reasoning/thinking blocks from content, returning only visible text. + + Handles four cases: + 1. Closed tag pairs (````) — the common path when + the provider emits complete reasoning blocks. + 2. Unterminated open tag at a block boundary (start of text or + after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the + closing tag is dropped. Everything from the open tag to end + of string is stripped. The block-boundary check mirrors + ``gateway/stream_consumer.py``'s filter so models that mention + ```` in prose aren't over-stripped. + 3. Stray orphan open/close tags that slip through. + 4. Tag variants: ````, ````, ````, + ````, ```` (Gemma 4), all + case-insensitive. + """ if not content: return "" - # Strip all reasoning tag variants: , , , - # , , (Gemma 4) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) + # 1. Closed tag pairs — case-insensitive for all variants so + # mixed-case tags (, ) don't slip through to + # the unterminated-tag pass and take trailing content with them. + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'\s*', '', content, flags=re.IGNORECASE) + # 2. Unterminated reasoning block — open tag at a block boundary + # (start of text, or after a newline) with no matching close. + # Strip from the tag to end of string. Fixes #8878 / #9568 + # (MiniMax M2.7 leaking raw reasoning into assistant content). + content = re.sub( + r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$', + '', + content, + flags=re.DOTALL | re.IGNORECASE, + ) + # 3. Stray orphan open/close tags that slipped through. + content = re.sub( + r'\s*', + '', + content, + flags=re.IGNORECASE, + ) return content + @staticmethod + def _has_natural_response_ending(content: str) -> bool: + """Heuristic: does visible assistant text look intentionally finished?""" + if not content: + return False + stripped = content.rstrip() + if not stripped: + return False + if stripped.endswith("```"): + return True + return stripped[-1] in '.!?:)"\']}。!?:)】」』》' + + def _is_ollama_glm_backend(self) -> bool: + """Detect the narrow backend family affected by Ollama/GLM stop misreports.""" + model_lower = (self.model or "").lower() + provider_lower = (self.provider or "").lower() + if "glm" not in model_lower and provider_lower != "zai": + return False + if "ollama" in self._base_url_lower or ":11434" in self._base_url_lower: + return True + return bool(self.base_url and is_local_endpoint(self.base_url)) + + def _should_treat_stop_as_truncated( + self, + finish_reason: str, + assistant_message, + messages: Optional[list] = None, + ) -> bool: + """Detect conservative stop->length misreports for Ollama-hosted GLM models.""" + if finish_reason != "stop" or self.api_mode != "chat_completions": + return False + if not self._is_ollama_glm_backend(): + return False + if not any( + isinstance(msg, dict) and msg.get("role") == "tool" + for msg in (messages or []) + ): + return False + if assistant_message is None or getattr(assistant_message, "tool_calls", None): + return False + + content = getattr(assistant_message, "content", None) + if not isinstance(content, str): + return False + + visible_text = self._strip_think_blocks(content).strip() + if not visible_text: + return False + if len(visible_text) < 20 or not re.search(r"\s", visible_text): + return False + + return not self._has_natural_response_ending(visible_text) + def _looks_like_codex_intermediate_ack( self, user_message: str, @@ -2923,7 +3567,34 @@ class AIAgent: # Signal all tools to abort any in-flight operations immediately. # Scope the interrupt to this agent's execution thread so other # agents running in the same process (gateway) are not affected. - _set_interrupt(True, self._execution_thread_id) + if self._execution_thread_id is not None: + _set_interrupt(True, self._execution_thread_id) + self._interrupt_thread_signal_pending = False + else: + # The interrupt arrived before run_conversation() finished + # binding the agent to its execution thread. Defer the tool-level + # interrupt signal until startup completes instead of targeting + # the caller thread by mistake. + self._interrupt_thread_signal_pending = True + # Fan out to concurrent-tool worker threads. Those workers run tools + # on their own tids (ThreadPoolExecutor workers), so `is_interrupted()` + # inside a tool only sees an interrupt when their specific tid is in + # the `_interrupted_threads` set. Without this propagation, an + # already-running concurrent tool (e.g. a terminal command hung on + # network I/O) never notices the interrupt and has to run to its own + # timeout. See `_run_tool` for the matching entry/exit bookkeeping. + # `getattr` fallback covers test stubs that build AIAgent via + # object.__new__ and skip __init__. + _tracker = getattr(self, "_tool_worker_threads", None) + _tracker_lock = getattr(self, "_tool_worker_threads_lock", None) + if _tracker is not None and _tracker_lock is not None: + with _tracker_lock: + _worker_tids = list(_tracker) + for _wtid in _worker_tids: + try: + _set_interrupt(True, _wtid) + except Exception: + pass # Propagate interrupt to any running child agents (subagent delegation) with self._active_children_lock: children_copy = list(self._active_children) @@ -2939,7 +3610,149 @@ class AIAgent: """Clear any pending interrupt request and the per-thread tool interrupt signal.""" self._interrupt_requested = False self._interrupt_message = None - _set_interrupt(False, self._execution_thread_id) + self._interrupt_thread_signal_pending = False + if self._execution_thread_id is not None: + _set_interrupt(False, self._execution_thread_id) + # Also clear any concurrent-tool worker thread bits. Tracked + # workers normally clear their own bit on exit, but an explicit + # clear here guarantees no stale interrupt can survive a turn + # boundary and fire on a subsequent, unrelated tool call that + # happens to get scheduled onto the same recycled worker tid. + # `getattr` fallback covers test stubs that build AIAgent via + # object.__new__ and skip __init__. + _tracker = getattr(self, "_tool_worker_threads", None) + _tracker_lock = getattr(self, "_tool_worker_threads_lock", None) + if _tracker is not None and _tracker_lock is not None: + with _tracker_lock: + _worker_tids = list(_tracker) + for _wtid in _worker_tids: + try: + _set_interrupt(False, _wtid) + except Exception: + pass + # A hard interrupt supersedes any pending /steer — the steer was + # meant for the agent's next tool-call iteration, which will no + # longer happen. Drop it instead of surprising the user with a + # late injection on the post-interrupt turn. + _steer_lock = getattr(self, "_pending_steer_lock", None) + if _steer_lock is not None: + with _steer_lock: + self._pending_steer = None + + def steer(self, text: str) -> bool: + """ + Inject a user message into the next tool result without interrupting. + + Unlike interrupt(), this does NOT stop the current tool call. The + text is stashed and the agent loop appends it to the LAST tool + result's content once the current tool batch finishes. The model + sees the steer as part of the tool output on its next iteration. + + Thread-safe: callable from gateway/CLI/TUI threads. Multiple calls + before the drain point concatenate with newlines. + + Args: + text: The user text to inject. Empty strings are ignored. + + Returns: + True if the steer was accepted, False if the text was empty. + """ + if not text or not text.strip(): + return False + cleaned = text.strip() + _lock = getattr(self, "_pending_steer_lock", None) + if _lock is None: + # Test stubs that built AIAgent via object.__new__ skip __init__. + # Fall back to direct attribute set; no concurrent callers expected + # in those stubs. + existing = getattr(self, "_pending_steer", None) + self._pending_steer = (existing + "\n" + cleaned) if existing else cleaned + return True + with _lock: + if self._pending_steer: + self._pending_steer = self._pending_steer + "\n" + cleaned + else: + self._pending_steer = cleaned + return True + + def _drain_pending_steer(self) -> Optional[str]: + """Return the pending steer text (if any) and clear the slot. + + Safe to call from the agent execution thread after appending tool + results. Returns None when no steer is pending. + """ + _lock = getattr(self, "_pending_steer_lock", None) + if _lock is None: + text = getattr(self, "_pending_steer", None) + self._pending_steer = None + return text + with _lock: + text = self._pending_steer + self._pending_steer = None + return text + + def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None: + """Append any pending /steer text to the last tool result in this turn. + + Called at the end of a tool-call batch, before the next API call. + The steer is appended to the last ``role:"tool"`` message's content + with a clear marker so the model understands it came from the user + and NOT from the tool itself. Role alternation is preserved — + nothing new is inserted, we only modify existing content. + + Args: + messages: The running messages list. + num_tool_msgs: Number of tool results appended in this batch; + used to locate the tail slice safely. + """ + if num_tool_msgs <= 0 or not messages: + return + steer_text = self._drain_pending_steer() + if not steer_text: + return + # Find the last tool-role message in the recent tail. Skipping + # non-tool messages defends against future code appending + # something else at the boundary. + target_idx = None + for j in range(len(messages) - 1, max(len(messages) - num_tool_msgs - 1, -1), -1): + msg = messages[j] + if isinstance(msg, dict) and msg.get("role") == "tool": + target_idx = j + break + if target_idx is None: + # No tool result in this batch (e.g. all skipped by interrupt); + # put the steer back so the caller's fallback path can deliver + # it as a normal next-turn user message. + _lock = getattr(self, "_pending_steer_lock", None) + if _lock is not None: + with _lock: + if self._pending_steer: + self._pending_steer = self._pending_steer + "\n" + steer_text + else: + self._pending_steer = steer_text + else: + existing = getattr(self, "_pending_steer", None) + self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text + return + marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]" + existing_content = messages[target_idx].get("content", "") + if not isinstance(existing_content, str): + # Anthropic multimodal content blocks — preserve them and append + # a text block at the end. + try: + blocks = list(existing_content) if existing_content else [] + blocks.append({"type": "text", "text": marker.lstrip()}) + messages[target_idx]["content"] = blocks + except Exception: + # Fall back to string replacement if content shape is unexpected. + messages[target_idx]["content"] = f"{existing_content}{marker}" + else: + messages[target_idx]["content"] = existing_content + marker + logger.info( + "Delivered /steer to agent after tool batch (%d chars): %s", + len(steer_text), + steer_text[:120] + ("..." if len(steer_text) > 120 else ""), + ) def _touch_activity(self, desc: str) -> None: """Update the last-activity timestamp and description (thread-safe).""" @@ -3014,6 +3827,65 @@ class AIAgent: except Exception: pass + def commit_memory_session(self, messages: list = None) -> None: + """Trigger end-of-session extraction without tearing providers down. + Called when session_id rotates (e.g. /new, context compression); + providers keep their state and continue running under the old + session_id — they just flush pending extraction now.""" + if not self._memory_manager: + return + try: + self._memory_manager.on_session_end(messages or []) + except Exception: + pass + + def release_clients(self) -> None: + """Release LLM client resources WITHOUT tearing down session tool state. + + Used by the gateway when evicting this agent from _agent_cache for + memory-management reasons (LRU cap or idle TTL) — the session may + resume at any time with a freshly-built AIAgent that reuses the + same task_id / session_id, so we must NOT kill: + - process_registry entries for task_id (user's bg shells) + - terminal sandbox for task_id (cwd, env, shell state) + - browser daemon for task_id (open tabs, cookies) + - memory provider (has its own lifecycle; keeps running) + + We DO close: + - OpenAI/httpx client pool (big chunk of held memory + sockets; + the rebuilt agent gets a fresh client anyway) + - Active child subagents (per-turn artefacts; safe to drop) + + Safe to call multiple times. Distinct from close() — which is the + hard teardown for actual session boundaries (/new, /reset, session + expiry). + """ + # Close active child agents (per-turn; no cross-turn persistence). + try: + with self._active_children_lock: + children = list(self._active_children) + self._active_children.clear() + for child in children: + try: + child.release_clients() + except Exception: + # Fall back to full close on children; they're per-turn. + try: + child.close() + except Exception: + pass + except Exception: + pass + + # Close the OpenAI/httpx client to release sockets immediately. + try: + client = getattr(self, "client", None) + if client is not None: + self._close_openai_client(client, reason="cache_evict", shared=True) + self.client = None + except Exception: + pass + def close(self) -> None: """Release all resources held by this agent instance. @@ -3550,7 +4422,14 @@ class AIAgent: if role in {"user", "assistant"}: content = msg.get("content", "") - content_text = str(content) if content is not None else "" + if isinstance(content, list): + content_parts = _chat_content_to_responses_parts(content) + content_text = "".join( + p.get("text", "") for p in content_parts if p.get("type") == "input_text" + ) + else: + content_parts = [] + content_text = str(content) if content is not None else "" if role == "assistant": # Replay encrypted reasoning items from previous turns @@ -3563,12 +4442,19 @@ class AIAgent: item_id = ri.get("id") if item_id and item_id in seen_item_ids: continue - items.append(ri) + # Strip the "id" field — with store=False the + # Responses API cannot look up items by ID and + # returns 404. The encrypted_content blob is + # self-contained for reasoning chain continuity. + replay_item = {k: v for k, v in ri.items() if k != "id"} + items.append(replay_item) if item_id: seen_item_ids.add(item_id) has_codex_reasoning = True - if content_text.strip(): + if content_parts: + items.append({"role": "assistant", "content": content_parts}) + elif content_text.strip(): items.append({"role": "assistant", "content": content_text}) elif has_codex_reasoning: # The Responses API requires a following item after each @@ -3621,7 +4507,12 @@ class AIAgent: }) continue - items.append({"role": role, "content": content_text}) + # Non-assistant (user) role: emit multimodal parts when present, + # otherwise fall back to the text payload. + if content_parts: + items.append({"role": role, "content": content_parts}) + else: + items.append({"role": role, "content": content_text}) continue if role == "tool": @@ -3704,8 +4595,10 @@ class AIAgent: continue seen_ids.add(item_id) reasoning_item = {"type": "reasoning", "encrypted_content": encrypted} - if isinstance(item_id, str) and item_id: - reasoning_item["id"] = item_id + # Do NOT include the "id" in the outgoing item — with + # store=False (our default) the API tries to resolve the + # id server-side and returns 404. The id is still used + # above for local deduplication via seen_ids. summary = item.get("summary") if isinstance(summary, list): reasoning_item["summary"] = summary @@ -3719,6 +4612,46 @@ class AIAgent: content = item.get("content", "") if content is None: content = "" + if isinstance(content, list): + # Multimodal content from ``_chat_messages_to_responses_input`` + # is already in Responses format (``input_text`` / ``input_image``). + # Validate each part and pass through. + validated: List[Dict[str, Any]] = [] + for part_idx, part in enumerate(content): + if isinstance(part, str): + if part: + validated.append({"type": "input_text", "text": part}) + continue + if not isinstance(part, dict): + raise ValueError( + f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string." + ) + ptype = str(part.get("type") or "").strip().lower() + if ptype in {"input_text", "text", "output_text"}: + text = part.get("text", "") + if not isinstance(text, str): + text = str(text or "") + validated.append({"type": "input_text", "text": text}) + elif ptype in {"input_image", "image_url"}: + image_ref = part.get("image_url", "") + detail = part.get("detail") + if isinstance(image_ref, dict): + url = image_ref.get("url", "") + detail = image_ref.get("detail", detail) + else: + url = image_ref + if not isinstance(url, str): + url = str(url or "") + image_part: Dict[str, Any] = {"type": "input_image", "image_url": url} + if isinstance(detail, str) and detail.strip(): + image_part["detail"] = detail.strip() + validated.append(image_part) + else: + raise ValueError( + f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}." + ) + normalized.append({"role": role, "content": validated}) + continue if not isinstance(content, str): content = str(content) @@ -3806,6 +4739,7 @@ class AIAgent: "model", "instructions", "input", "tools", "store", "reasoning", "include", "max_output_tokens", "temperature", "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", + "extra_headers", } normalized: Dict[str, Any] = { "model": model, @@ -3841,6 +4775,20 @@ class AIAgent: if val is not None: normalized[passthrough_key] = val + extra_headers = api_kwargs.get("extra_headers") + if extra_headers is not None: + if not isinstance(extra_headers, dict): + raise ValueError("Codex Responses request 'extra_headers' must be an object.") + normalized_headers: Dict[str, str] = {} + for key, value in extra_headers.items(): + if not isinstance(key, str) or not key.strip(): + raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.") + if value is None: + continue + normalized_headers[key.strip()] = str(value) + if normalized_headers: + normalized["extra_headers"] = normalized_headers + if allow_stream: stream = api_kwargs.get("stream") if stream is not None and stream is not True: @@ -4105,7 +5053,43 @@ class AIAgent: return bool(getattr(http_client, "is_closed", False)) return False + @staticmethod + def _build_keepalive_http_client() -> Any: + try: + import httpx as _httpx + import socket as _socket + + _sock_opts = [(_socket.SOL_SOCKET, _socket.SO_KEEPALIVE, 1)] + if hasattr(_socket, "TCP_KEEPIDLE"): + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPIDLE, 30)) + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPINTVL, 10)) + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPCNT, 3)) + elif hasattr(_socket, "TCP_KEEPALIVE"): + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30)) + # When a custom transport is provided, httpx won't auto-read proxy + # from env vars (allow_env_proxies = trust_env and transport is None). + # Explicitly read proxy settings to ensure HTTP_PROXY/HTTPS_PROXY work. + _proxy = _get_proxy_from_env() + return _httpx.Client( + transport=_httpx.HTTPTransport(socket_options=_sock_opts), + proxy=_proxy, + ) + except Exception: + return None + def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any: + from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls + # Treat client_kwargs as read-only. Callers pass self._client_kwargs (or shallow + # copies of it) in; any in-place mutation leaks back into the stored dict and is + # reused on subsequent requests. #10933 hit this by injecting an httpx.Client + # transport that was torn down after the first request, so the next request + # wrapped a closed transport and raised "Cannot send a request, as the client + # has been closed" on every retry. The revert resolved that specific path; this + # copy locks the contract so future transport/keepalive work can't reintroduce + # the same class of bug. + client_kwargs = dict(client_kwargs) + _validate_proxy_env_urls() + _validate_base_url(client_kwargs.get("base_url")) if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"): from agent.copilot_acp_client import CopilotACPClient @@ -4117,6 +5101,64 @@ class AIAgent: self._client_log_context(), ) return client + if self.provider == "google-gemini-cli" or str(client_kwargs.get("base_url", "")).startswith("cloudcode-pa://"): + from agent.gemini_cloudcode_adapter import GeminiCloudCodeClient + + # Strip OpenAI-specific kwargs the Gemini client doesn't accept + safe_kwargs = { + k: v for k, v in client_kwargs.items() + if k in {"api_key", "base_url", "default_headers", "project_id", "timeout"} + } + client = GeminiCloudCodeClient(**safe_kwargs) + logger.info( + "Gemini Cloud Code Assist client created (%s, shared=%s) %s", + reason, + shared, + self._client_log_context(), + ) + return client + if self.provider == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + base_url = str(client_kwargs.get("base_url", "") or "") + if is_native_gemini_base_url(base_url): + safe_kwargs = { + k: v for k, v in client_kwargs.items() + if k in {"api_key", "base_url", "default_headers", "timeout", "http_client"} + } + if "http_client" not in safe_kwargs: + keepalive_http = self._build_keepalive_http_client() + if keepalive_http is not None: + safe_kwargs["http_client"] = keepalive_http + client = GeminiNativeClient(**safe_kwargs) + logger.info( + "Gemini native client created (%s, shared=%s) %s", + reason, + shared, + self._client_log_context(), + ) + return client + # Inject TCP keepalives so the kernel detects dead provider connections + # instead of letting them sit silently in CLOSE-WAIT (#10324). Without + # this, a peer that drops mid-stream leaves the socket in a state where + # epoll_wait never fires, ``httpx`` read timeout may not trigger, and + # the agent hangs until manually killed. Probes after 30s idle, retry + # every 10s, give up after 3 → dead peer detected within ~60s. + # + # Safety against #10933: the ``client_kwargs = dict(client_kwargs)`` + # above means this injection only lands in the local per-call copy, + # never back into ``self._client_kwargs``. Each ``_create_openai_client`` + # invocation therefore gets its OWN fresh ``httpx.Client`` whose + # lifetime is tied to the OpenAI client it is passed to. When the + # OpenAI client is closed (rebuild, teardown, credential rotation), + # the paired ``httpx.Client`` closes with it, and the next call + # constructs a fresh one — no stale closed transport can be reused. + # Tests in ``tests/run_agent/test_create_openai_client_reuse.py`` and + # ``tests/run_agent/test_sequential_chats_live.py`` pin this invariant. + if "http_client" not in client_kwargs: + keepalive_http = self._build_keepalive_http_client() + if keepalive_http is not None: + client_kwargs["http_client"] = keepalive_http client = OpenAI(**client_kwargs) logger.info( "OpenAI client created (%s, shared=%s) %s", @@ -4618,15 +5660,22 @@ class AIAgent: pass try: - self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None)) + self._anthropic_client = build_anthropic_client( + new_token, + getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), + ) except Exception as exc: logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc) return False self._anthropic_api_key = new_token - # Update OAuth flag — token type may have changed (API key ↔ OAuth) + # Update OAuth flag — token type may have changed (API key ↔ OAuth). + # Only treat as OAuth on native Anthropic; third-party endpoints using + # the Anthropic protocol must not trip OAuth paths (#1739 & third-party + # identity-injection guard). from agent.anthropic_adapter import _is_oauth_token - self._is_anthropic_oauth = _is_oauth_token(new_token) + self._is_anthropic_oauth = _is_oauth_token(new_token) if self.provider == "anthropic" else False return True def _apply_client_headers_for_base_url(self, base_url: str) -> None: @@ -4643,6 +5692,11 @@ class AIAgent: self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} elif "portal.qwen.ai" in normalized: self._client_kwargs["default_headers"] = _qwen_portal_headers() + elif "chatgpt.com" in normalized: + from agent.auxiliary_client import _codex_cloudflare_headers + self._client_kwargs["default_headers"] = _codex_cloudflare_headers( + self._client_kwargs.get("api_key", "") + ) else: self._client_kwargs.pop("default_headers", None) @@ -4660,8 +5714,11 @@ class AIAgent: self._anthropic_api_key = runtime_key self._anthropic_base_url = runtime_base - self._anthropic_client = build_anthropic_client(runtime_key, runtime_base) - self._is_anthropic_oauth = _is_oauth_token(runtime_key) + self._anthropic_client = build_anthropic_client( + runtime_key, runtime_base, + timeout=get_provider_request_timeout(self.provider, self.model), + ) + self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False self.api_key = runtime_key self.base_url = runtime_base return @@ -4790,6 +5847,17 @@ class AIAgent: ) elif self.api_mode == "anthropic_messages": result["response"] = self._anthropic_messages_create(api_kwargs) + elif self.api_mode == "bedrock_converse": + # Bedrock uses boto3 directly — no OpenAI client needed. + from agent.bedrock_adapter import ( + _get_bedrock_runtime_client, + normalize_converse_response, + ) + region = api_kwargs.pop("__bedrock_region__", "us-east-1") + api_kwargs.pop("__bedrock_converse__", None) + client = _get_bedrock_runtime_client(region) + raw_response = client.converse(**api_kwargs) + result["response"] = normalize_converse_response(raw_response) else: request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request") result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs) @@ -4806,18 +5874,9 @@ class AIAgent: # httpx timeout (default 1800s) with zero feedback. The stale # detector kills the connection early so the main retry loop can # apply richer recovery (credential rotation, provider fallback). - _stale_base = float(os.getenv("HERMES_API_CALL_STALE_TIMEOUT", 300.0)) - _base_url = getattr(self, "_base_url", None) or "" - if _stale_base == 300.0 and _base_url and is_local_endpoint(_base_url): - _stale_timeout = float("inf") - else: - _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4 - if _est_tokens > 100_000: - _stale_timeout = max(_stale_base, 600.0) - elif _est_tokens > 50_000: - _stale_timeout = max(_stale_base, 450.0) - else: - _stale_timeout = _stale_base + _stale_timeout = self._compute_non_stream_stale_timeout( + api_kwargs.get("messages", []) + ) _call_start = time.time() self._touch_activity("waiting for non-streaming API response") @@ -4861,6 +5920,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), ) else: rc = request_client_holder.get("client") @@ -4892,6 +5952,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), ) else: request_client = request_client_holder.get("client") @@ -5029,7 +6090,66 @@ class AIAgent: finally: self._codex_on_first_delta = None - result = {"response": None, "error": None} + # Bedrock Converse uses boto3's converse_stream() with real-time delta + # callbacks — same UX as Anthropic and chat_completions streaming. + if self.api_mode == "bedrock_converse": + result = {"response": None, "error": None} + first_delta_fired = {"done": False} + deltas_were_sent = {"yes": False} + + def _fire_first(): + if not first_delta_fired["done"] and on_first_delta: + first_delta_fired["done"] = True + try: + on_first_delta() + except Exception: + pass + + def _bedrock_call(): + try: + from agent.bedrock_adapter import ( + _get_bedrock_runtime_client, + stream_converse_with_callbacks, + ) + region = api_kwargs.pop("__bedrock_region__", "us-east-1") + api_kwargs.pop("__bedrock_converse__", None) + client = _get_bedrock_runtime_client(region) + raw_response = client.converse_stream(**api_kwargs) + + def _on_text(text): + _fire_first() + self._fire_stream_delta(text) + deltas_were_sent["yes"] = True + + def _on_tool(name): + _fire_first() + self._fire_tool_gen_started(name) + + def _on_reasoning(text): + _fire_first() + self._fire_reasoning_delta(text) + + result["response"] = stream_converse_with_callbacks( + raw_response, + on_text_delta=_on_text if self._has_stream_consumers() else None, + on_tool_start=_on_tool, + on_reasoning_delta=_on_reasoning if self.reasoning_callback or self.stream_delta_callback else None, + on_interrupt_check=lambda: self._interrupt_requested, + ) + except Exception as e: + result["error"] = e + + t = threading.Thread(target=_bedrock_call, daemon=True) + t.start() + while t.is_alive(): + t.join(timeout=0.3) + if self._interrupt_requested: + raise InterruptedError("Agent interrupted during Bedrock API call") + if result["error"] is not None: + raise result["error"] + return result["response"] + + result = {"response": None, "error": None, "partial_tool_names": []} request_client_holder = {"client": None} first_delta_fired = {"done": False} deltas_were_sent = {"yes": False} # Track if any deltas were fired (for fallback) @@ -5049,18 +6169,30 @@ class AIAgent: def _call_chat_completions(): """Stream a chat completions response.""" import httpx as _httpx - _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) - _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) - # Local providers (Ollama, llama.cpp, vLLM) can take minutes for - # prefill on large contexts before producing the first token. - # Auto-increase the httpx read timeout unless the user explicitly - # overrode HERMES_STREAM_READ_TIMEOUT. - if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url): - _stream_read_timeout = _base_timeout - logger.debug( - "Local provider detected (%s) — stream read timeout raised to %.0fs", - self.base_url, _stream_read_timeout, - ) + # Per-provider / per-model request_timeout_seconds (from config.yaml) + # wins over the HERMES_API_TIMEOUT env default if the user set it. + _provider_timeout_cfg = get_provider_request_timeout(self.provider, self.model) + _base_timeout = ( + _provider_timeout_cfg + if _provider_timeout_cfg is not None + else float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) + ) + # Read timeout: config wins here too. Otherwise use + # HERMES_STREAM_READ_TIMEOUT (default 120s) for cloud providers. + if _provider_timeout_cfg is not None: + _stream_read_timeout = _provider_timeout_cfg + else: + _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) + # Local providers (Ollama, llama.cpp, vLLM) can take minutes for + # prefill on large contexts before producing the first token. + # Auto-increase the httpx read timeout unless the user explicitly + # overrode HERMES_STREAM_READ_TIMEOUT. + if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url): + _stream_read_timeout = _base_timeout + logger.debug( + "Local provider detected (%s) — stream read timeout raised to %.0fs", + self.base_url, _stream_read_timeout, + ) stream_kwargs = { **api_kwargs, "stream": True, @@ -5185,7 +6317,15 @@ class AIAgent: entry["id"] = tc_delta.id if tc_delta.function: if tc_delta.function.name: - entry["function"]["name"] += tc_delta.function.name + # Use assignment, not +=. Function names are + # atomic identifiers delivered complete in the + # first chunk (OpenAI spec). Some providers + # (MiniMax M2.7 via NVIDIA NIM) resend the full + # name in every chunk; concatenation would + # produce "read_fileread_file". Assignment + # (matching the OpenAI Node SDK / LiteLLM / + # Vercel AI patterns) is immune to this. + entry["function"]["name"] = tc_delta.function.name if tc_delta.function.arguments: entry["function"]["arguments"] += tc_delta.function.arguments extra = getattr(tc_delta, "extra_content", None) @@ -5201,6 +6341,14 @@ class AIAgent: tool_gen_notified.add(idx) _fire_first_delta() self._fire_tool_gen_started(name) + # Record the partial tool-call name so the outer + # stub-builder can surface a user-visible warning + # if streaming dies before this tool's arguments + # are fully delivered. Without this, a stall + # during tool-call JSON generation lets the stub + # at line ~6107 return `tool_calls=None`, silently + # discarding the attempted action. + result["partial_tool_names"].append(name) if chunk.choices[0].finish_reason: finish_reason = chunk.choices[0].finish_reason @@ -5411,6 +6559,7 @@ class AIAgent: ) except Exception: pass + self._emit_status("🔄 Reconnected — resuming…") continue self._emit_status( "❌ Connection to provider failed after " @@ -5477,9 +6626,27 @@ class AIAgent: t = threading.Thread(target=_call, daemon=True) t.start() + _last_heartbeat = time.time() + _HEARTBEAT_INTERVAL = 30.0 # seconds between gateway activity touches while t.is_alive(): t.join(timeout=0.3) + # Periodic heartbeat: touch the agent's activity tracker so the + # gateway's inactivity monitor knows we're alive while waiting + # for stream chunks. Without this, long thinking pauses (e.g. + # reasoning models) or slow prefill on local providers (Ollama) + # trigger false inactivity timeouts. The _call thread touches + # activity on each chunk, but the gap between API call start + # and first chunk can exceed the gateway timeout — especially + # when the stale-stream timeout is disabled (local providers). + _hb_now = time.time() + if _hb_now - _last_heartbeat >= _HEARTBEAT_INTERVAL: + _last_heartbeat = _hb_now + _waiting_secs = int(_hb_now - last_chunk_time["t"]) + self._touch_activity( + f"waiting for stream response ({_waiting_secs}s, no chunks yet)" + ) + # Detect stale streams: connections kept alive by SSE pings # but delivering no real chunks. Kill the client so the # inner retry loop can start a fresh connection. @@ -5526,6 +6693,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), ) else: request_client = request_client_holder.get("client") @@ -5548,13 +6716,44 @@ class AIAgent: _partial_text = ( getattr(self, "_current_streamed_assistant_text", "") or "" ).strip() or None - logger.warning( - "Partial stream delivered before error; returning stub " - "response with %s chars of recovered content to prevent " - "duplicate messages: %s", - len(_partial_text or ""), - result["error"], - ) + + # If the stream died while the model was emitting a tool call, + # the stub below will silently set `tool_calls=None` and the + # agent loop will treat the turn as complete — the attempted + # action is lost with no user-facing signal. Append a + # human-visible warning to the stub content so (a) the user + # knows something failed, and (b) the next turn's model sees + # in conversation history what was attempted and can retry. + _partial_names = list(result.get("partial_tool_names") or []) + if _partial_names: + _name_str = ", ".join(_partial_names[:3]) + if len(_partial_names) > 3: + _name_str += f", +{len(_partial_names) - 3} more" + _warn = ( + f"\n\n⚠ Stream stalled mid tool-call " + f"({_name_str}); the action was not executed. " + f"Ask me to retry if you want to continue." + ) + _partial_text = (_partial_text or "") + _warn + # Also fire as a streaming delta so the user sees it now + # instead of only in the persisted transcript. + try: + self._fire_stream_delta(_warn) + except Exception: + pass + logger.warning( + "Partial stream dropped tool call(s) %s after %s chars " + "of text; surfaced warning to user: %s", + _partial_names, len(_partial_text or ""), result["error"], + ) + else: + logger.warning( + "Partial stream delivered before error; returning stub " + "response with %s chars of recovered content to prevent " + "duplicate messages: %s", + len(_partial_text or ""), + result["error"], + ) _stub_msg = SimpleNamespace( role="assistant", content=_partial_text, tool_calls=None, reasoning_content=None, @@ -5633,10 +6832,16 @@ class AIAgent: fb_api_mode = "anthropic_messages" elif self._is_direct_openai_url(fb_base_url): fb_api_mode = "codex_responses" - elif self._model_requires_responses_api(fb_model): - # GPT-5.x models need Responses API on every provider - # (OpenRouter, Copilot, direct OpenAI, etc.) + elif self._provider_model_requires_responses_api( + fb_model, + provider=fb_provider, + ): + # GPT-5.x models usually need Responses API, but keep + # provider-specific exceptions like Copilot gpt-5-mini on + # chat completions. fb_api_mode = "codex_responses" + elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower(): + fb_api_mode = "bedrock_converse" old_model = self.model self.model = fb_model @@ -5645,6 +6850,11 @@ class AIAgent: self.api_mode = fb_api_mode self._fallback_activated = True + # Honor per-provider / per-model request_timeout_seconds for the + # fallback target (same knob the primary client uses). None = use + # SDK default. + _fb_timeout = get_provider_request_timeout(fb_provider, fb_model) + if fb_api_mode == "anthropic_messages": # Build native Anthropic client instead of using OpenAI client from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token @@ -5652,8 +6862,10 @@ class AIAgent: self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = fb_base_url - self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url) - self._is_anthropic_oauth = _is_oauth_token(effective_key) + self._anthropic_client = build_anthropic_client( + effective_key, self._anthropic_base_url, timeout=_fb_timeout, + ) + self._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False self.client = None self._client_kwargs = {} else: @@ -5676,12 +6888,21 @@ class AIAgent: "base_url": fb_base_url, **({"default_headers": dict(fb_headers)} if fb_headers else {}), } + if _fb_timeout is not None: + self._client_kwargs["timeout"] = _fb_timeout + # Rebuild the shared OpenAI client so the configured + # timeout takes effect on the very next fallback request, + # not only after a later credential-rotation rebuild. + self._replace_primary_openai_client(reason="fallback_timeout_apply") # Re-evaluate prompt caching for the new provider/model - is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()) - or is_native_anthropic + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy( + provider=fb_provider, + base_url=fb_base_url, + api_mode=fb_api_mode, + model=fb_model, + ) ) # Update context compressor limits for the fallback model. @@ -5741,6 +6962,12 @@ class AIAgent: self.api_key = rt["api_key"] self._client_kwargs = dict(rt["client_kwargs"]) self._use_prompt_caching = rt["use_prompt_caching"] + # Default to native layout when the restored snapshot predates the + # native-vs-proxy split (older sessions saved before this PR). + self._use_native_cache_layout = rt.get( + "use_native_cache_layout", + self.api_mode == "anthropic_messages" and self.provider == "anthropic", + ) # ── Rebuild client for the primary provider ── if self.api_mode == "anthropic_messages": @@ -5749,6 +6976,7 @@ class AIAgent: self._anthropic_base_url = rt["anthropic_base_url"] self._anthropic_client = build_anthropic_client( rt["anthropic_api_key"], rt["anthropic_base_url"], + timeout=get_provider_request_timeout(self.provider, self.model), ) self._is_anthropic_oauth = rt["is_anthropic_oauth"] self.client = None @@ -5845,6 +7073,7 @@ class AIAgent: self._anthropic_base_url = rt["anthropic_base_url"] self._anthropic_client = build_anthropic_client( rt["anthropic_api_key"], rt["anthropic_base_url"], + timeout=get_provider_request_timeout(self.provider, self.model), ) self._is_anthropic_oauth = rt["is_anthropic_oauth"] self.client = None @@ -6016,11 +7245,31 @@ class AIAgent: Alibaba/DashScope keeps dots (e.g. qwen3.5-plus). MiniMax keeps dots (e.g. MiniMax-M2.7). OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free). - ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1).""" - if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}: + ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1). + AWS Bedrock uses dotted inference-profile IDs + (e.g. ``global.anthropic.claude-opus-4-7``, + ``us.anthropic.claude-sonnet-4-5-20250929-v1:0``) and rejects + the hyphenated form with + ``HTTP 400 The provided model identifier is invalid``. + Regression for #11976; mirrors the opencode-go fix for #5211 + (commit f77be22c), which extended this same allowlist.""" + if (getattr(self, "provider", "") or "").lower() in { + "alibaba", "minimax", "minimax-cn", + "opencode-go", "opencode-zen", + "zai", "bedrock", + }: return True base = (getattr(self, "base_url", "") or "").lower() - return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base + return ( + "dashscope" in base + or "aliyuncs" in base + or "minimax" in base + or "opencode.ai/zen/" in base + or "bigmodel.cn" in base + # AWS Bedrock runtime endpoints — defense-in-depth when + # ``provider`` is unset but ``base_url`` still names Bedrock. + or "bedrock-runtime." in base + ) def _is_qwen_portal(self) -> bool: """Return True when the base URL targets Qwen Portal.""" @@ -6116,6 +7365,25 @@ class AIAgent: fast_mode=(self.request_overrides or {}).get("speed") == "fast", ) + # AWS Bedrock native Converse API — bypasses the OpenAI client entirely. + # The adapter handles message/tool conversion and boto3 calls directly. + if self.api_mode == "bedrock_converse": + from agent.bedrock_adapter import build_converse_kwargs + region = getattr(self, "_bedrock_region", None) or "us-east-1" + guardrail = getattr(self, "_bedrock_guardrail_config", None) + return { + "__bedrock_converse__": True, + "__bedrock_region__": region, + **build_converse_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + max_tokens=self.max_tokens or 4096, + temperature=None, # Let the model use its default + guardrail_config=guardrail, + ), + } + if self.api_mode == "codex_responses": instructions = "" payload_messages = api_messages @@ -6162,7 +7430,12 @@ class AIAgent: if not is_github_responses: kwargs["prompt_cache_key"] = self.session_id - if reasoning_enabled: + is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower() + + if reasoning_enabled and is_xai_responses: + # xAI reasons automatically — no effort param, just include encrypted content + kwargs["include"] = ["reasoning.encrypted_content"] + elif reasoning_enabled: if is_github_responses: # Copilot's Responses route advertises reasoning-effort support, # but not OpenAI-specific prompt cache or encrypted reasoning @@ -6173,7 +7446,7 @@ class AIAgent: else: kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} kwargs["include"] = ["reasoning.encrypted_content"] - elif not is_github_responses: + elif not is_github_responses and not is_xai_responses: kwargs["include"] = [] if self.request_overrides: @@ -6182,6 +7455,9 @@ class AIAgent: if self.max_tokens is not None and not is_codex_backend: kwargs["max_output_tokens"] = self.max_tokens + if is_xai_responses and getattr(self, "session_id", None): + kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id} + return kwargs sanitized_messages = api_messages @@ -6261,8 +7537,16 @@ class AIAgent: api_kwargs = { "model": self.model, "messages": sanitized_messages, - "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)), + "timeout": self._resolved_api_call_timeout(), } + try: + from agent.auxiliary_client import _fixed_temperature_for_model + except Exception: + _fixed_temperature_for_model = None + if _fixed_temperature_for_model is not None: + fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url) + if fixed_temperature is not None: + api_kwargs["temperature"] = fixed_temperature if self._is_qwen_portal(): api_kwargs["metadata"] = { "sessionId": self.session_id or "hermes", @@ -6271,8 +7555,20 @@ class AIAgent: if self.tools: api_kwargs["tools"] = self.tools - if self.max_tokens is not None: + # ── max_tokens for chat_completions ────────────────────────────── + # Priority: ephemeral override (error recovery / length-continuation + # boost) > user-configured max_tokens > provider-specific defaults. + _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if _ephemeral_out is not None: + self._ephemeral_max_output_tokens = None # consume immediately + api_kwargs.update(self._max_tokens_param(_ephemeral_out)) + elif self.max_tokens is not None: api_kwargs.update(self._max_tokens_param(self.max_tokens)) + elif "integrate.api.nvidia.com" in self._base_url_lower: + # NVIDIA NIM defaults to a very low max_tokens when omitted, + # causing models like GLM-4.7 to truncate immediately (thinking + # tokens alone exhaust the budget). 16384 provides adequate room. + api_kwargs.update(self._max_tokens_param(16384)) elif self._is_qwen_portal(): # Qwen Portal defaults to a very low max_tokens when omitted. # Reasoning models (qwen3-coder-plus) exhaust that budget on @@ -6346,18 +7642,24 @@ class AIAgent: options["num_ctx"] = self._ollama_num_ctx extra_body["options"] = options + # Ollama / custom provider: pass think=false when reasoning is disabled. + # Ollama does not recognise the OpenRouter-style `reasoning` extra_body + # field, so we use its native `think` parameter instead. + # This prevents thinking-capable models (Qwen3, etc.) from generating + # blocks and producing empty-response errors when the user has + # set reasoning_effort: none. + if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict): + _effort = (self.reasoning_config.get("effort") or "").strip().lower() + _enabled = self.reasoning_config.get("enabled", True) + if _effort == "none" or _enabled is False: + extra_body["think"] = False + if self._is_qwen_portal(): extra_body["vl_high_resolution_images"] = True if extra_body: api_kwargs["extra_body"] = extra_body - # xAI prompt caching: send x-grok-conv-id header to route requests - # to the same server, maximizing automatic cache hits. - # https://docs.x.ai/developers/advanced-api-usage/prompt-caching - if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id: - api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id} - # Priority Processing / generic request overrides (e.g. service_tier). # Applied last so overrides win over any defaults set above. if self.request_overrides: @@ -6462,15 +7764,36 @@ class AIAgent: # (gateway, batch, quiet) still get reasoning. # Any reasoning that wasn't shown during streaming is caught by the # CLI post-response display fallback (cli.py _reasoning_shown_this_turn). - if not self.stream_delta_callback: + if not self.stream_delta_callback and not self._stream_callback: try: self.reasoning_callback(reasoning_text) except Exception: pass + # Sanitize surrogates from API response — some models (e.g. Kimi/GLM via Ollama) + # can return invalid surrogate code points that crash json.dumps() on persist. + _raw_content = assistant_message.content or "" + _san_content = _sanitize_surrogates(_raw_content) + if reasoning_text: + reasoning_text = _sanitize_surrogates(reasoning_text) + + # Strip inline reasoning tags ( etc.) from the stored + # assistant content. Reasoning was already captured into + # ``reasoning_text`` above (either from structured fields or the + # inline-block fallback), so the raw tags in content are redundant. + # Leaving them in place caused reasoning to leak to messaging + # platforms (#8878, #9568), inflate context on subsequent turns + # (#9306 observed 16% content-size reduction on a real MiniMax + # session), and pollute generated session titles. One strip at the + # storage boundary cleans content for every downstream consumer: + # API replay, session transcript, gateway delivery, CLI display, + # compression, title generation. + if isinstance(_san_content, str) and _san_content: + _san_content = self._strip_think_blocks(_san_content).strip() + msg = { "role": "assistant", - "content": assistant_message.content or "", + "content": _san_content, "reasoning": reasoning_text, "finish_reason": finish_reason, } @@ -6660,14 +7983,22 @@ class AIAgent: # Use auxiliary client for the flush call when available -- # it's cheaper and avoids Codex Responses API incompatibility. - from agent.auxiliary_client import call_llm as _call_llm + from agent.auxiliary_client import ( + call_llm as _call_llm, + _fixed_temperature_for_model, + ) _aux_available = True + # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if + # the model has a strict contract; otherwise the historical 0.3 default. + _flush_temperature = _fixed_temperature_for_model(self.model, self.base_url) + if _flush_temperature is None: + _flush_temperature = 0.3 try: response = _call_llm( task="flush_memories", messages=api_messages, tools=[memory_tool_def], - temperature=0.3, + temperature=_flush_temperature, max_tokens=5120, # timeout resolved from auxiliary.flush_memories.timeout config ) @@ -6679,7 +8010,7 @@ class AIAgent: # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) - codex_kwargs["temperature"] = 0.3 + codex_kwargs["temperature"] = _flush_temperature if "max_output_tokens" in codex_kwargs: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) @@ -6698,7 +8029,7 @@ class AIAgent: "model": self.model, "messages": api_messages, "tools": [memory_tool_def], - "temperature": 0.3, + "temperature": _flush_temperature, **self._max_tokens_param(5120), } from agent.auxiliary_client import _get_task_timeout @@ -6793,6 +8124,8 @@ class AIAgent: try: # Propagate title to the new session with auto-numbering old_title = self._session_db.get_session_title(self.session_id) + # Trigger memory extraction on the old session before it rotates. + self.commit_memory_session(messages) self._session_db.end_session(self.session_id, "compression") old_session_id = self.session_id self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}" @@ -6835,20 +8168,6 @@ class AIAgent: self.context_compressor.last_prompt_tokens = _compressed_est self.context_compressor.last_completion_tokens = 0 - # Only reset the pressure warning if compression actually brought - # us below the warning level (85% of threshold). When compression - # can't reduce enough (e.g. threshold is very low, or system prompt - # alone exceeds the warning level), keep the tier set to prevent - # spamming the user with repeated warnings every loop iteration. - if self.context_compressor.threshold_tokens > 0: - _post_progress = _compressed_est / self.context_compressor.threshold_tokens - if _post_progress < 0.85: - self._context_pressure_warned_at = 0.0 - # Clear class-level dedup for this session so a fresh - # warning cycle can start if context grows again. - _sid = self.session_id or "default" - AIAgent._context_pressure_last_warned.pop(_sid, None) - # Clear the file-read dedup cache. After compression the original # read content is summarised away — if the model re-reads the same # file it needs the full content, not a "file unchanged" stub. @@ -6975,6 +8294,31 @@ class AIAgent: skip_pre_tool_call_hook=True, ) + @staticmethod + def _wrap_verbose(label: str, text: str, indent: str = " ") -> str: + """Word-wrap verbose tool output to fit the terminal width. + + Splits *text* on existing newlines and wraps each line individually, + preserving intentional line breaks (e.g. pretty-printed JSON). + Returns a ready-to-print string with *label* on the first line and + continuation lines indented. + """ + import shutil as _shutil + import textwrap as _tw + cols = _shutil.get_terminal_size((120, 24)).columns + wrap_width = max(40, cols - len(indent)) + out_lines: list[str] = [] + for raw_line in text.split("\n"): + if len(raw_line) <= wrap_width: + out_lines.append(raw_line) + else: + wrapped = _tw.wrap(raw_line, width=wrap_width, + break_long_words=True, + break_on_hyphens=False) + out_lines.extend(wrapped or [raw_line]) + body = ("\n" + indent).join(out_lines) + return f"{indent}{label}{body}" + def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: """Execute multiple tool calls concurrently using a thread pool. @@ -7045,7 +8389,7 @@ class AIAgent: args_str = json.dumps(args, ensure_ascii=False) if self.verbose_logging: print(f" 📞 Tool {i}: {name}({list(args.keys())})") - print(f" Args: {args_str}") + print(self._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False))) else: args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str print(f" 📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}") @@ -7069,8 +8413,38 @@ class AIAgent: # Each slot holds (function_name, function_args, function_result, duration, error_flag) results = [None] * num_tools + # Touch activity before launching workers so the gateway knows + # we're executing tools (not stuck). + self._current_tool = tool_names_str + self._touch_activity(f"executing {num_tools} tools concurrently: {tool_names_str}") + def _run_tool(index, tool_call, function_name, function_args): """Worker function executed in a thread.""" + # Register this worker tid so the agent can fan out an interrupt + # to it — see AIAgent.interrupt(). Must happen first thing, and + # must be paired with discard + clear in the finally block. + _worker_tid = threading.current_thread().ident + with self._tool_worker_threads_lock: + self._tool_worker_threads.add(_worker_tid) + # Race: if the agent was interrupted between fan-out (which + # snapshotted an empty/earlier set) and our registration, apply + # the interrupt to our own tid now so is_interrupted() inside + # the tool returns True on the next poll. + if self._interrupt_requested: + try: + from tools.interrupt import set_interrupt as _sif + _sif(True, _worker_tid) + except Exception: + pass + # Set the activity callback on THIS worker thread so + # _wait_for_process (terminal commands) can fire heartbeats. + # The callback is thread-local; the main thread's callback + # is invisible to worker threads. + try: + from tools.environments.base import set_activity_callback + set_activity_callback(self._touch_activity) + except Exception: + pass start = time.time() try: result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id) @@ -7084,11 +8458,21 @@ class AIAgent: else: logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result)) results[index] = (function_name, function_args, result, duration, is_error) + # Tear down worker-tid tracking. Clear any interrupt bit we may + # have set so the next task scheduled onto this recycled tid + # starts with a clean slate. + with self._tool_worker_threads_lock: + self._tool_worker_threads.discard(_worker_tid) + try: + from tools.interrupt import set_interrupt as _sif + _sif(False, _worker_tid) + except Exception: + pass # Start spinner for CLI mode (skip when TUI handles tool progress) spinner = None if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): - face = random.choice(KawaiiSpinner.KAWAII_WAITING) + face = random.choice(KawaiiSpinner.get_waiting_faces()) spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn) spinner.start() @@ -7100,8 +8484,52 @@ class AIAgent: f = executor.submit(_run_tool, i, tc, name, args) futures.append(f) - # Wait for all to complete (exceptions are captured inside _run_tool) - concurrent.futures.wait(futures) + # Wait for all to complete with periodic heartbeats so the + # gateway's inactivity monitor doesn't kill us during long + # concurrent tool batches. Also check for user interrupts + # so we don't block indefinitely when the user sends /stop + # or a new message during concurrent tool execution. + _conc_start = time.time() + _interrupt_logged = False + while True: + done, not_done = concurrent.futures.wait( + futures, timeout=5.0, + ) + if not not_done: + break + + # Check for interrupt — the per-thread interrupt signal + # already causes individual tools (terminal, execute_code) + # to abort, but tools without interrupt checks (web_search, + # read_file) will run to completion. Cancel any futures + # that haven't started yet so we don't block on them. + if self._interrupt_requested: + if not _interrupt_logged: + _interrupt_logged = True + self._vprint( + f"{self.log_prefix}⚡ Interrupt: cancelling " + f"{len(not_done)} pending concurrent tool(s)", + force=True, + ) + for f in not_done: + f.cancel() + # Give already-running tools a moment to notice the + # per-thread interrupt signal and exit gracefully. + concurrent.futures.wait(not_done, timeout=3.0) + break + + _conc_elapsed = int(time.time() - _conc_start) + # Heartbeat every ~30s (6 × 5s poll intervals) + if _conc_elapsed > 0 and _conc_elapsed % 30 < 6: + _still_running = [ + parsed_calls[futures.index(f)][1] + for f in not_done + if f in futures + ] + self._touch_activity( + f"concurrent tools running ({_conc_elapsed}s, " + f"{len(not_done)} remaining: {', '.join(_still_running[:3])})" + ) finally: if spinner: # Build a summary message for the spinner stop @@ -7113,8 +8541,11 @@ class AIAgent: for i, (tc, name, args) in enumerate(parsed_calls): r = results[i] if r is None: - # Shouldn't happen, but safety fallback - function_result = f"Error executing tool '{name}': thread did not return a result" + # Tool was cancelled (interrupt) or thread didn't return + if self._interrupt_requested: + function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]" + else: + function_result = f"Error executing tool '{name}': thread did not return a result" tool_duration = 0.0 else: function_name, function_args, function_result, tool_duration, is_error = r @@ -7143,7 +8574,7 @@ class AIAgent: elif not self.quiet_mode: if self.verbose_logging: print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s") - print(f" Result: {function_result}") + print(self._wrap_verbose("Result: ", function_result)) else: response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result print(f" ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}") @@ -7175,12 +8606,24 @@ class AIAgent: } messages.append(tool_msg) + # ── Per-tool /steer drain ─────────────────────────────────── + # Same as the sequential path: drain between each collected + # result so the steer lands as early as possible. + self._apply_pending_steer_to_tool_results(messages, 1) + # ── Per-turn aggregate budget enforcement ───────────────────────── num_tools = len(parsed_calls) if num_tools > 0: turn_tool_msgs = messages[-num_tools:] enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id)) + # ── /steer injection ────────────────────────────────────────────── + # Append any pending user steer text to the last tool result so the + # agent sees it on its next iteration. Runs AFTER budget enforcement + # so the steer marker is never truncated. See steer() for details. + if num_tools > 0: + self._apply_pending_steer_to_tool_results(messages, num_tools) + def _execute_tool_calls_sequential(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: """Execute tool calls sequentially (original behavior). Used for single calls or interactive tools.""" for i, tool_call in enumerate(assistant_message.tool_calls, 1): @@ -7236,7 +8679,7 @@ class AIAgent: args_str = json.dumps(function_args, ensure_ascii=False) if self.verbose_logging: print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())})") - print(f" Args: {args_str}") + print(self._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False))) else: args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}") @@ -7333,6 +8776,16 @@ class AIAgent: old_text=function_args.get("old_text"), store=self._memory_store, ) + # Bridge: notify external memory provider of built-in memory writes + if self._memory_manager and function_args.get("action") in ("add", "replace"): + try: + self._memory_manager.on_memory_write( + function_args.get("action", ""), + target, + function_args.get("content", ""), + ) + except Exception: + pass tool_duration = time.time() - tool_start_time if self._should_emit_quiet_tool_messages(): self._vprint(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}") @@ -7356,7 +8809,7 @@ class AIAgent: spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating" spinner = None if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): - face = random.choice(KawaiiSpinner.KAWAII_WAITING) + face = random.choice(KawaiiSpinner.get_waiting_faces()) spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn) spinner.start() self._delegate_spinner = spinner @@ -7382,8 +8835,8 @@ class AIAgent: elif self._context_engine_tool_names and function_name in self._context_engine_tool_names: # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.) spinner = None - if self.quiet_mode and not self.tool_progress_callback: - face = random.choice(KawaiiSpinner.KAWAII_WAITING) + if self._should_emit_quiet_tool_messages(): + face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) preview = _build_tool_preview(function_name, function_args) or function_name spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn) @@ -7400,14 +8853,14 @@ class AIAgent: cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result) if spinner: spinner.stop(cute_msg) - elif self.quiet_mode: + elif self._should_emit_quiet_tool_messages(): self._vprint(f" {cute_msg}") elif self._memory_manager and self._memory_manager.has_tool(function_name): # Memory provider tools (hindsight_retain, honcho_search, etc.) # These are not in the tool registry — route through MemoryManager. spinner = None if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): - face = random.choice(KawaiiSpinner.KAWAII_WAITING) + face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) preview = _build_tool_preview(function_name, function_args) or function_name spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn) @@ -7429,7 +8882,7 @@ class AIAgent: elif self.quiet_mode: spinner = None if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner(): - face = random.choice(KawaiiSpinner.KAWAII_WAITING) + face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) preview = _build_tool_preview(function_name, function_args) or function_name spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn) @@ -7521,10 +8974,16 @@ class AIAgent: } messages.append(tool_msg) + # ── Per-tool /steer drain ─────────────────────────────────── + # Drain pending steer BETWEEN individual tool calls so the + # injection lands as soon as a tool finishes — not after the + # entire batch. The model sees it on the next API iteration. + self._apply_pending_steer_to_tool_results(messages, 1) + if not self.quiet_mode: if self.verbose_logging: print(f" ✅ Tool {i} completed in {tool_duration:.2f}s") - print(f" Result: {function_result}") + print(self._wrap_verbose("Result: ", function_result)) else: response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}") @@ -7550,46 +9009,13 @@ class AIAgent: if num_tools_seq > 0: enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id)) + # ── /steer injection ────────────────────────────────────────────── + # See _execute_tool_calls_parallel for the rationale. Same hook, + # applied to sequential execution as well. + if num_tools_seq > 0: + self._apply_pending_steer_to_tool_results(messages, num_tools_seq) - def _emit_context_pressure(self, compaction_progress: float, compressor) -> None: - """Notify the user that context is approaching the compaction threshold. - - Args: - compaction_progress: How close to compaction (0.0–1.0, where 1.0 = fires). - compressor: The ContextCompressor instance (for threshold/context info). - - Purely user-facing — does NOT modify the message stream. - For CLI: prints a formatted line with a progress bar. - For gateway: fires status_callback so the platform can send a chat message. - """ - from agent.display import format_context_pressure, format_context_pressure_gateway - - threshold_pct = compressor.threshold_tokens / compressor.context_length if compressor.context_length else 0.5 - - # CLI output — always shown (these are user-facing status notifications, - # not verbose debug output, so they bypass quiet_mode). - # Gateway users also get the callback below. - if self.platform in (None, "cli"): - line = format_context_pressure( - compaction_progress=compaction_progress, - threshold_tokens=compressor.threshold_tokens, - threshold_percent=threshold_pct, - compression_enabled=self.compression_enabled, - ) - self._safe_print(line) - - # Gateway / external consumers - if self.status_callback: - try: - msg = format_context_pressure_gateway( - compaction_progress=compaction_progress, - threshold_percent=threshold_pct, - compression_enabled=self.compression_enabled, - ) - self.status_callback("context_pressure", msg) - except Exception: - logger.debug("status_callback error in context pressure", exc_info=True) def _handle_max_iterations(self, messages: list, api_call_count: int) -> str: """Request a summary when max iterations are reached. Returns the final response text.""" @@ -7626,6 +9052,15 @@ class AIAgent: api_messages.insert(sys_offset + idx, pfm.copy()) summary_extra_body = {} + try: + from agent.auxiliary_client import _fixed_temperature_for_model + except Exception: + _fixed_temperature_for_model = None + _summary_temperature = ( + _fixed_temperature_for_model(self.model, self.base_url) + if _fixed_temperature_for_model is not None + else None + ) _is_nous = "nousresearch" in self._base_url_lower if self._supports_reasoning_extra_body(): if self.reasoning_config is not None: @@ -7649,6 +9084,8 @@ class AIAgent: "model": self.model, "messages": api_messages, } + if _summary_temperature is not None: + summary_kwargs["temperature"] = _summary_temperature if self.max_tokens is not None: summary_kwargs.update(self._max_tokens_param(self.max_tokens)) @@ -7714,6 +9151,8 @@ class AIAgent: "model": self.model, "messages": api_messages, } + if _summary_temperature is not None: + summary_kwargs["temperature"] = _summary_temperature if self.max_tokens is not None: summary_kwargs.update(self._max_tokens_param(self.max_tokens)) if summary_extra_body: @@ -7792,6 +9231,16 @@ class AIAgent: if isinstance(persist_user_message, str): persist_user_message = _sanitize_surrogates(persist_user_message) + # Strip leaked blocks from user input. When Honcho's + # saveMessages persists a turn that included injected context, the block + # can reappear in the next turn's user message via message history. + # Stripping here prevents stale memory tags from leaking into the + # conversation and being visible to the user or the model as user text. + if isinstance(user_message, str): + user_message = sanitize_context(user_message) + if isinstance(persist_user_message, str): + persist_user_message = sanitize_context(persist_user_message) + # Store stream callback for _interruptible_api_call to pick up self._stream_callback = stream_callback self._persist_user_message_idx = None @@ -7807,7 +9256,9 @@ class AIAgent: self._incomplete_scratchpad_retries = 0 self._codex_incomplete_retries = 0 self._thinking_prefill_retries = 0 + self._post_tool_empty_retried = False self._last_content_with_tools = None + self._last_content_tools_all_housekeeping = False self._mute_post_response = False self._unicode_sanitization_passes = 0 @@ -7836,7 +9287,8 @@ class AIAgent: self.iteration_budget = IterationBudget(self.max_iterations) # Log conversation turn start for debugging/observability - _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message + _preview_text = _summarize_user_message_for_log(user_message) + _msg_preview = (_preview_text[:80] + "...") if len(_preview_text) > 80 else _preview_text _msg_preview = _msg_preview.replace("\n", " ") logger.info( "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r", @@ -7884,7 +9336,8 @@ class AIAgent: self._persist_user_message_idx = current_turn_user_idx if not self.quiet_mode: - self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'") + _print_preview = _summarize_user_message_for_log(user_message) + self._safe_print(f"💬 Starting conversation: '{_print_preview[:60]}{'...' if len(_print_preview) > 60 else ''}'") # ── System prompt (cached per session for prefix caching) ── # Built once on first call, reused for all subsequent calls. @@ -7987,6 +9440,16 @@ class AIAgent: # skipping them because conversation_history is still the # pre-compression length. conversation_history = None + # Fix: reset retry counters after compression so the model + # gets a fresh budget on the compressed context. Without + # this, pre-compression retries carry over and the model + # hits "(empty)" immediately after compression-induced + # context loss. + self._empty_content_retries = 0 + self._thinking_prefill_retries = 0 + self._last_content_with_tools = None + self._last_content_tools_all_housekeeping = False + self._mute_post_response = False # Re-estimate after compression _preflight_tokens = estimate_request_tokens_rough( messages, @@ -8045,11 +9508,29 @@ class AIAgent: # Record the execution thread so interrupt()/clear_interrupt() can # scope the tool-level interrupt signal to THIS agent's thread only. - # Must be set before clear_interrupt() which uses it. + # Must be set before any thread-scoped interrupt syncing. self._execution_thread_id = threading.current_thread().ident - # Clear any stale interrupt state at start - self.clear_interrupt() + # Always clear stale per-thread state from a previous turn. If an + # interrupt arrived before startup finished, preserve it and bind it + # to this execution thread now instead of dropping it on the floor. + _set_interrupt(False, self._execution_thread_id) + if self._interrupt_requested: + _set_interrupt(True, self._execution_thread_id) + self._interrupt_thread_signal_pending = False + else: + self._interrupt_message = None + self._interrupt_thread_signal_pending = False + + # Notify memory providers of the new turn so cadence tracking works. + # Must happen BEFORE prefetch_all() so providers know which turn it is + # and can gate context/dialectic refresh via contextCadence/dialecticCadence. + if self._memory_manager: + try: + _turn_msg = original_user_message if isinstance(original_user_message, str) else "" + self._memory_manager.on_turn_start(self._user_turn_count, _turn_msg) + except Exception: + pass # External memory provider: prefetch once before the tool loop. # Reuse the cached result on every iteration to avoid re-calling @@ -8109,6 +9590,7 @@ class AIAgent: { "name": tc["function"]["name"], "result": _results_by_id.get(tc.get("id")), + "arguments": tc["function"].get("arguments"), } for tc in _m["tool_calls"] if isinstance(tc, dict) @@ -8199,12 +9681,19 @@ class AIAgent: for idx, pfm in enumerate(self.prefill_messages): api_messages.insert(sys_offset + idx, pfm.copy()) - # Apply Anthropic prompt caching for Claude models via OpenRouter. - # Auto-detected: if model name contains "claude" and base_url is OpenRouter, - # inject cache_control breakpoints (system + last 3 messages) to reduce - # input token costs by ~75% on multi-turn conversations. + # Apply Anthropic prompt caching for Claude models on native + # Anthropic, OpenRouter, and third-party Anthropic-compatible + # gateways. Auto-detected: if ``_use_prompt_caching`` is set, + # inject cache_control breakpoints (system + last 3 messages) + # to reduce input token costs by ~75% on multi-turn + # conversations. Layout is chosen per endpoint by + # ``_anthropic_prompt_cache_policy``. if self._use_prompt_caching: - api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages')) + api_messages = apply_anthropic_cache_control( + api_messages, + cache_ttl=self._cache_ttl, + native_anthropic=self._use_native_cache_layout, + ) # Safety net: strip orphaned tool results / add stubs for missing # results before sending to the API. Runs unconditionally — not @@ -8238,10 +9727,19 @@ class AIAgent: ), }} except Exception: - pass + tc["function"]["arguments"] = _repair_tool_call_arguments( + tc["function"]["arguments"], + tc["function"].get("name", "?"), + ) new_tcs.append(tc) am["tool_calls"] = new_tcs + # Proactively strip any surrogate characters before the API call. + # Models served via Ollama (Kimi K2.5, GLM-5, Qwen) can return + # lone surrogates (U+D800-U+DFFF) that crash json.dumps() inside + # the OpenAI SDK. Sanitizing here prevents the 3-retry cycle. + _sanitize_messages_surrogates(api_messages) + # Calculate approximate request size for logging total_chars = sum(len(str(msg)) for msg in api_messages) approx_tokens = estimate_messages_tokens_rough(api_messages) @@ -8255,8 +9753,8 @@ class AIAgent: self._vprint(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}") else: # Animated thinking spinner in quiet mode - face = random.choice(KawaiiSpinner.KAWAII_THINKING) - verb = random.choice(KawaiiSpinner.THINKING_VERBS) + face = random.choice(KawaiiSpinner.get_thinking_faces()) + verb = random.choice(KawaiiSpinner.get_thinking_verbs()) if self.thinking_callback: # CLI TUI mode: use prompt_toolkit widget instead of raw spinner # (works in both streaming and non-streaming modes) @@ -8292,6 +9790,53 @@ class AIAgent: api_kwargs = None # Guard against UnboundLocalError in except handler while retry_count < max_retries: + # ── Nous Portal rate limit guard ────────────────────── + # If another session already recorded that Nous is rate- + # limited, skip the API call entirely. Each attempt + # (including SDK-level retries) counts against RPH and + # deepens the rate limit hole. + if self.provider == "nous": + try: + from agent.nous_rate_guard import ( + nous_rate_limit_remaining, + format_remaining as _fmt_nous_remaining, + ) + _nous_remaining = nous_rate_limit_remaining() + if _nous_remaining is not None and _nous_remaining > 0: + _nous_msg = ( + f"Nous Portal rate limit active — " + f"resets in {_fmt_nous_remaining(_nous_remaining)}." + ) + self._vprint( + f"{self.log_prefix}⏳ {_nous_msg} Trying fallback...", + force=True, + ) + self._emit_status(f"⏳ {_nous_msg}") + if self._try_activate_fallback(): + retry_count = 0 + compression_attempts = 0 + primary_recovery_attempted = False + continue + # No fallback available — return with clear message + self._persist_session(messages, conversation_history) + return { + "final_response": ( + f"⏳ {_nous_msg}\n\n" + "No fallback provider available. " + "Try again after the reset, or add a " + "fallback provider in config.yaml." + ), + "messages": messages, + "api_calls": api_call_count, + "completed": False, + "failed": True, + "error": _nous_msg, + } + except ImportError: + pass + except Exception: + pass # Never let rate guard break the agent loop + try: self._reset_stream_delivery_tracking() api_kwargs = self._build_api_kwargs(api_messages) @@ -8593,29 +10138,45 @@ class AIAgent: finish_reason = stop_reason_map.get(response.stop_reason, "stop") else: finish_reason = response.choices[0].finish_reason + assistant_message = response.choices[0].message + if self._should_treat_stop_as_truncated( + finish_reason, + assistant_message, + messages, + ): + self._vprint( + f"{self.log_prefix}⚠️ Treating suspicious Ollama/GLM stop response as truncated", + force=True, + ) + finish_reason = "length" if finish_reason == "length": self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True) + # Normalize the truncated response to a single OpenAI-style + # message shape so text-continuation and tool-call retry + # work uniformly across chat_completions, bedrock_converse, + # and anthropic_messages. For Anthropic we use the same + # adapter the agent loop already relies on so the rebuilt + # interim assistant message is byte-identical to what + # would have been appended in the non-truncated path. + _trunc_msg = None + if self.api_mode in ("chat_completions", "bedrock_converse"): + _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None + elif self.api_mode == "anthropic_messages": + from agent.anthropic_adapter import normalize_anthropic_response + _trunc_msg, _ = normalize_anthropic_response( + response, strip_tool_prefix=self._is_anthropic_oauth + ) + + _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None + _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False + # ── Detect thinking-budget exhaustion ────────────── # When the model spends ALL output tokens on reasoning # and has none left for the response, continuation # retries are pointless. Detect this early and give a # targeted error instead of wasting 3 API calls. - _trunc_content = None - _trunc_has_tool_calls = False - if self.api_mode == "chat_completions": - _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None - _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None - _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False - elif self.api_mode == "anthropic_messages": - # Anthropic response.content is a list of blocks - _text_parts = [] - for _blk in getattr(response, "content", []): - if getattr(_blk, "type", None) == "text": - _text_parts.append(getattr(_blk, "text", "")) - _trunc_content = "\n".join(_text_parts) if _text_parts else None - # A response is "thinking exhausted" only when the model # actually produced reasoning blocks but no visible text after # them. Models that do not use tags (e.g. GLM-4.7 on @@ -8659,8 +10220,7 @@ class AIAgent: "and had none left for the actual response.\n\n" "To fix this:\n" "→ Lower reasoning effort: `/thinkon low` or `/thinkon minimal`\n" - "→ Increase the output token limit: " - "set `model.max_tokens` in config.yaml" + "→ Or switch to a larger/non-reasoning model with `/model`" ) self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) @@ -8673,9 +10233,9 @@ class AIAgent: "error": _exhaust_error, } - if self.api_mode == "chat_completions": - assistant_message = response.choices[0].message - if not assistant_message.tool_calls: + if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assistant_message = _trunc_msg + if assistant_message is not None and not _trunc_has_tool_calls: length_continue_retries += 1 interim_msg = self._build_assistant_message(assistant_message, finish_reason) messages.append(interim_msg) @@ -8713,9 +10273,9 @@ class AIAgent: "error": "Response remained truncated after 3 continuation attempts", } - if self.api_mode == "chat_completions": - assistant_message = response.choices[0].message - if assistant_message.tool_calls: + if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assistant_message = _trunc_msg + if assistant_message is not None and _trunc_has_tool_calls: if truncated_tool_call_retries < 1: truncated_tool_call_retries += 1 self._vprint( @@ -8880,6 +10440,15 @@ class AIAgent: self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") has_retried_429 = False # Reset on success + # Clear Nous rate limit state on successful request — + # proves the limit has reset and other sessions can + # resume hitting Nous. + if self.provider == "nous": + try: + from agent.nous_rate_guard import clear_nous_rate_limit + clear_nous_rate_limit() + except Exception: + pass self._touch_activity(f"API call #{api_call_count} completed") break # Success, exit retry loop @@ -8918,20 +10487,70 @@ class AIAgent: if isinstance(api_error, UnicodeEncodeError) and getattr(self, '_unicode_sanitization_passes', 0) < 2: _err_str = str(api_error).lower() _is_ascii_codec = "'ascii'" in _err_str or "ascii" in _err_str + # Detect surrogate errors — utf-8 codec refusing to + # encode U+D800..U+DFFF. The error text is: + # "'utf-8' codec can't encode characters in position + # N-M: surrogates not allowed" + _is_surrogate_error = ( + "surrogate" in _err_str + or ("'utf-8'" in _err_str and not _is_ascii_codec) + ) + # Sanitize surrogates from both the canonical `messages` + # list AND `api_messages` (the API-copy, which may carry + # `reasoning_content`/`reasoning_details` transformed + # from `reasoning` — fields the canonical list doesn't + # have directly). Also clean `api_kwargs` if built and + # `prefill_messages` if present. Mirrors the ASCII + # codec recovery below. _surrogates_found = _sanitize_messages_surrogates(messages) - if _surrogates_found: + if isinstance(api_messages, list): + if _sanitize_messages_surrogates(api_messages): + _surrogates_found = True + if isinstance(api_kwargs, dict): + if _sanitize_structure_surrogates(api_kwargs): + _surrogates_found = True + if isinstance(getattr(self, "prefill_messages", None), list): + if _sanitize_messages_surrogates(self.prefill_messages): + _surrogates_found = True + # Gate the retry on the error type, not on whether we + # found anything — _force_ascii_payload / the extended + # surrogate walker above cover all known paths, but a + # new transformed field could still slip through. If + # the error was a surrogate encode failure, always let + # the retry run; the proactive sanitizer at line ~8781 + # runs again on the next iteration. Bounded by + # _unicode_sanitization_passes < 2 (outer guard). + if _surrogates_found or _is_surrogate_error: self._unicode_sanitization_passes += 1 - self._vprint( - f"{self.log_prefix}⚠️ Stripped invalid surrogate characters from messages. Retrying...", - force=True, - ) + if _surrogates_found: + self._vprint( + f"{self.log_prefix}⚠️ Stripped invalid surrogate characters from messages. Retrying...", + force=True, + ) + else: + self._vprint( + f"{self.log_prefix}⚠️ Surrogate encoding error — retrying after full-payload sanitization...", + force=True, + ) continue if _is_ascii_codec: self._force_ascii_payload = True # ASCII codec: the system encoding can't handle # non-ASCII characters at all. Sanitize all # non-ASCII content from messages/tool schemas and retry. + # Sanitize both the canonical `messages` list and + # `api_messages` (the API-copy built before the retry + # loop, which may contain extra fields like + # reasoning_content that are not in `messages`). _messages_sanitized = _sanitize_messages_non_ascii(messages) + if isinstance(api_messages, list): + _sanitize_messages_non_ascii(api_messages) + # Also sanitize the last api_kwargs if already built, + # so a leftover non-ASCII value in a transformed field + # (e.g. extra_body, reasoning_content) doesn't survive + # into the next attempt via _build_api_kwargs cache paths. + if isinstance(api_kwargs, dict): + _sanitize_structure_non_ascii(api_kwargs) _prefill_sanitized = False if isinstance(getattr(self, "prefill_messages", None), list): _prefill_sanitized = _sanitize_messages_non_ascii(self.prefill_messages) @@ -8962,21 +10581,61 @@ class AIAgent: if isinstance(_default_headers, dict): _headers_sanitized = _sanitize_structure_non_ascii(_default_headers) - if ( + # Sanitize the API key — non-ASCII characters in + # credentials (e.g. ʋ instead of v from a bad + # copy-paste) cause httpx to fail when encoding + # the Authorization header as ASCII. This is the + # most common cause of persistent UnicodeEncodeError + # that survives message/tool sanitization (#6843). + _credential_sanitized = False + _raw_key = getattr(self, "api_key", None) or "" + if _raw_key: + _clean_key = _strip_non_ascii(_raw_key) + if _clean_key != _raw_key: + self.api_key = _clean_key + if isinstance(getattr(self, "_client_kwargs", None), dict): + self._client_kwargs["api_key"] = _clean_key + # Also update the live client — it holds its + # own copy of api_key which auth_headers reads + # dynamically on every request. + if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"): + self.client.api_key = _clean_key + _credential_sanitized = True + self._vprint( + f"{self.log_prefix}⚠️ API key contained non-ASCII characters " + f"(bad copy-paste?) — stripped them. If auth fails, " + f"re-copy the key from your provider's dashboard.", + force=True, + ) + + # Always retry on ASCII codec detection — + # _force_ascii_payload guarantees the full + # api_kwargs payload is sanitized on the + # next iteration (line ~8475). Even when + # per-component checks above find nothing + # (e.g. non-ASCII only in api_messages' + # reasoning_content), the flag catches it. + # Bounded by _unicode_sanitization_passes < 2. + self._unicode_sanitization_passes += 1 + _any_sanitized = ( _messages_sanitized or _prefill_sanitized or _tools_sanitized or _system_sanitized or _headers_sanitized - ): - self._unicode_sanitization_passes += 1 + or _credential_sanitized + ) + if _any_sanitized: self._vprint( f"{self.log_prefix}⚠️ System encoding is ASCII — stripped non-ASCII characters from request payload. Retrying...", force=True, ) - continue - # Nothing to sanitize in any payload component. - # Fall through to normal error path. + else: + self._vprint( + f"{self.log_prefix}⚠️ System encoding is ASCII — enabling full-payload sanitization for retry...", + force=True, + ) + continue status_code = getattr(api_error, "status_code", None) error_context = self._extract_api_error_context(api_error) @@ -9049,7 +10708,7 @@ class AIAgent: _dhh = _dhh_fn() print(f"{self.log_prefix} • Check ANTHROPIC_TOKEN in {_dhh}/.env for Hermes-managed OAuth/setup tokens") print(f"{self.log_prefix} • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values") - print(f"{self.log_prefix} • For API keys: verify at https://console.anthropic.com/settings/keys") + print(f"{self.log_prefix} • For API keys: verify at https://platform.claude.com/settings/keys") print(f"{self.log_prefix} • For Claude Code: run 'claude /login' to refresh, then retry") print(f"{self.log_prefix} • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"") print(f"{self.log_prefix} • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"") @@ -9239,6 +10898,38 @@ class AIAgent: primary_recovery_attempted = False continue + # ── Nous Portal: record rate limit & skip retries ───── + # When Nous returns a 429, record the reset time to a + # shared file so ALL sessions (cron, gateway, auxiliary) + # know not to pile on. Then skip further retries — + # each one burns another RPH request and deepens the + # rate limit hole. The retry loop's top-of-iteration + # guard will catch this on the next pass and try + # fallback or bail with a clear message. + if ( + is_rate_limited + and self.provider == "nous" + and classified.reason == FailoverReason.rate_limit + and not recovered_with_pool + ): + try: + from agent.nous_rate_guard import record_nous_rate_limit + _err_resp = getattr(api_error, "response", None) + _err_hdrs = ( + getattr(_err_resp, "headers", None) + if _err_resp else None + ) + record_nous_rate_limit( + headers=_err_hdrs, + error_context=error_context, + ) + except Exception: + pass + # Skip straight to max_retries — the top-of-loop + # guard will handle fallback or bail cleanly. + retry_count = max_retries + continue + is_payload_too_large = ( classified.reason == FailoverReason.payload_too_large ) @@ -9255,7 +10946,9 @@ class AIAgent: "completed": False, "api_calls": api_call_count, "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.", - "partial": True + "partial": True, + "failed": True, + "compression_exhausted": True, } self._emit_status(f"⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...") @@ -9284,7 +10977,9 @@ class AIAgent: "completed": False, "api_calls": api_call_count, "error": "Request payload too large (413). Cannot compress further.", - "partial": True + "partial": True, + "failed": True, + "compression_exhausted": True, } # Check for context-length errors BEFORE generic 4xx handler. @@ -9335,7 +11030,9 @@ class AIAgent: "completed": False, "api_calls": api_call_count, "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.", - "partial": True + "partial": True, + "failed": True, + "compression_exhausted": True, } restart_with_compressed_messages = True break @@ -9385,7 +11082,9 @@ class AIAgent: "completed": False, "api_calls": api_call_count, "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.", - "partial": True + "partial": True, + "failed": True, + "compression_exhausted": True, } self._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...") @@ -9416,7 +11115,9 @@ class AIAgent: "completed": False, "api_calls": api_call_count, "error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.", - "partial": True + "partial": True, + "failed": True, + "compression_exhausted": True, } # Check for non-retryable client errors. The classifier @@ -9598,9 +11299,9 @@ class AIAgent: pass wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0) if is_rate_limited: - self._emit_status(f"⏱️ Rate limit reached. Waiting {wait_time}s before retry (attempt {retry_count + 1}/{max_retries})...") + self._emit_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...") else: - self._emit_status(f"⏳ Retrying in {wait_time}s (attempt {retry_count}/{max_retries})...") + self._emit_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...") logger.warning( "Retrying API call in %ss (attempt %s/%s) %s error=%s", wait_time, @@ -9651,6 +11352,12 @@ class AIAgent: continue if restart_with_length_continuation: + # Progressively boost the output token budget on each retry. + # Retry 1 → 2× base, retry 2 → 3× base, capped at 32 768. + # Applies to all providers via _ephemeral_max_output_tokens. + _boost_base = self.max_tokens if self.max_tokens else 4096 + _boost = _boost_base * (length_continue_retries + 1) + self._ephemeral_max_output_tokens = min(_boost, 32768) continue # Guard: if all retries exhausted without a successful response @@ -10010,9 +11717,10 @@ class AIAgent: tc.function.name in _HOUSEKEEPING_TOOLS for tc in assistant_message.tool_calls ) + self._last_content_tools_all_housekeeping = _all_housekeeping if _all_housekeeping and self._has_stream_consumers(): self._mute_post_response = True - elif self.quiet_mode: + elif self._should_emit_quiet_tool_messages(): clean = self._strip_think_blocks(turn_content).strip() if clean: self._vprint(f" ┊ 💬 {clean}") @@ -10038,6 +11746,10 @@ class AIAgent: if _had_prefill: self._thinking_prefill_retries = 0 self._empty_content_retries = 0 + # Successful tool execution — reset the post-tool nudge + # flag so it can fire again if the model goes empty on + # a LATER tool round. + self._post_tool_empty_retried = False messages.append(assistant_msg) self._emit_interim_assistant_message(assistant_msg) @@ -10092,45 +11804,15 @@ class AIAgent: # should_compress(0) never fires. (#2153) _compressor = self.context_compressor if _compressor.last_prompt_tokens > 0: - _real_tokens = ( - _compressor.last_prompt_tokens - + _compressor.last_completion_tokens - ) + # Only use prompt_tokens — completion/reasoning + # tokens don't consume context window space. + # Thinking models (GLM-5.1, QwQ, DeepSeek R1) + # inflate completion_tokens with reasoning, + # causing premature compression. (#12026) + _real_tokens = _compressor.last_prompt_tokens else: _real_tokens = estimate_messages_tokens_rough(messages) - # ── Context pressure warnings (user-facing only) ────────── - # Notify the user (NOT the LLM) as context approaches the - # compaction threshold. Thresholds are relative to where - # compaction fires, not the raw context window. - # Does not inject into messages — just prints to CLI output - # and fires status_callback for gateway platforms. - # Tiered: 85% (orange) and 95% (red/critical). - if _compressor.threshold_tokens > 0: - _compaction_progress = _real_tokens / _compressor.threshold_tokens - # Determine the warning tier for this progress level - _warn_tier = 0.0 - if _compaction_progress >= 0.95: - _warn_tier = 0.95 - elif _compaction_progress >= 0.85: - _warn_tier = 0.85 - if _warn_tier > self._context_pressure_warned_at: - # Class-level dedup: check if this session was already - # warned at this tier within the cooldown window. - _sid = self.session_id or "default" - _last = AIAgent._context_pressure_last_warned.get(_sid) - _now = time.time() - if _last is None or _last[0] < _warn_tier or (_now - _last[1]) >= self._CONTEXT_PRESSURE_COOLDOWN: - self._context_pressure_warned_at = _warn_tier - AIAgent._context_pressure_last_warned[_sid] = (_warn_tier, _now) - self._emit_context_pressure(_compaction_progress, _compressor) - # Evict stale entries (older than 2x cooldown) - _cutoff = _now - self._CONTEXT_PRESSURE_COOLDOWN * 2 - AIAgent._context_pressure_last_warned = { - k: v for k, v in AIAgent._context_pressure_last_warned.items() - if v[1] > _cutoff - } - if self.compression_enabled and _compressor.should_compress(_real_tokens): self._safe_print(" ⟳ compacting context…") messages, active_system_prompt = self._compress_context( @@ -10154,6 +11836,13 @@ class AIAgent: # No tool calls - this is the final response final_response = assistant_message.content or "" + # Fix: unmute output when entering the no-tool-call branch + # so the user can see empty-response warnings and recovery + # status messages. _mute_post_response was set during a + # prior housekeeping tool turn and should not silence the + # final response path. + self._mute_post_response = False + # Check if response only has think block with no actual content after it if not self._has_content_after_think_block(final_response): # ── Partial stream recovery ───────────────────── @@ -10181,30 +11870,83 @@ class AIAgent: break # If the previous turn already delivered real content alongside - # tool calls (e.g. "You're welcome!" + memory save), the model - # has nothing more to say. Use the earlier content immediately - # instead of wasting API calls on retries that won't help. + # HOUSEKEEPING tool calls (e.g. "You're welcome!" + memory save), + # the model has nothing more to say. Use the earlier content + # immediately instead of wasting API calls on retries. + # NOTE: Only use this shortcut when ALL tools in that turn were + # housekeeping (memory, todo, etc.). When substantive tools + # were called (terminal, search_files, etc.), the content was + # likely mid-task narration ("I'll scan the directory...") and + # the empty follow-up means the model choked — let the + # post-tool nudge below handle that instead of exiting early. fallback = getattr(self, '_last_content_with_tools', None) - if fallback: + if fallback and getattr(self, '_last_content_tools_all_housekeeping', False): _turn_exit_reason = "fallback_prior_turn_content" logger.info("Empty follow-up after tool calls — using prior turn content as final response") self._emit_status("↻ Empty response after tool calls — using earlier content as final answer") self._last_content_with_tools = None + self._last_content_tools_all_housekeeping = False self._empty_content_retries = 0 - for i in range(len(messages) - 1, -1, -1): - msg = messages[i] - if msg.get("role") == "assistant" and msg.get("tool_calls"): - tool_names = [] - for tc in msg["tool_calls"]: - if not tc or not isinstance(tc, dict): continue - fn = tc.get("function", {}) - tool_names.append(fn.get("name", "unknown")) - msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..." - break + # Do NOT modify the assistant message content — the + # old code injected "Calling the X tools..." which + # poisoned the conversation history. Just use the + # fallback text as the final response and break. final_response = self._strip_think_blocks(fallback).strip() self._response_was_previewed = True break + # ── Post-tool-call empty response nudge ─────────── + # The model returned empty after executing tool calls. + # This covers two cases: + # (a) No prior-turn content at all — model went silent + # (b) Prior turn had content + SUBSTANTIVE tools (the + # fallback above was skipped because the content + # was mid-task narration, not a final answer) + # Instead of giving up, nudge the model to continue by + # appending a user-level hint. This is the #9400 case: + # weaker models (mimo-v2-pro, GLM-5, etc.) sometimes + # return empty after tool results instead of continuing + # to the next step. One retry with a nudge usually + # fixes it. + _prior_was_tool = any( + m.get("role") == "tool" + for m in messages[-5:] # check recent messages + ) + if ( + _prior_was_tool + and not getattr(self, "_post_tool_empty_retried", False) + ): + self._post_tool_empty_retried = True + # Clear stale narration so it doesn't resurface + # on a later empty response after the nudge. + self._last_content_with_tools = None + self._last_content_tools_all_housekeeping = False + logger.info( + "Empty response after tool calls — nudging model " + "to continue processing" + ) + self._emit_status( + "⚠️ Model returned empty after tool calls — " + "nudging to continue" + ) + # Append the empty assistant message first so the + # message sequence stays valid: + # tool(result) → assistant("(empty)") → user(nudge) + # Without this, we'd have tool → user which most + # APIs reject as an invalid sequence. + _nudge_msg = self._build_assistant_message(assistant_message, finish_reason) + _nudge_msg["content"] = "(empty)" + messages.append(_nudge_msg) + messages.append({ + "role": "user", + "content": ( + "You just executed tool calls but returned an " + "empty response. Please process the tool " + "results above and continue with the task." + ), + }) + continue + # ── Thinking-only prefill continuation ────────── # The model produced structured reasoning (via API # fields) but no visible text content. Rather than @@ -10466,8 +12208,9 @@ class AIAgent: # Determine if conversation completed successfully completed = final_response is not None and api_call_count < self.max_iterations - # Save trajectory if enabled - self._save_trajectory(messages, user_message, completed) + # Save trajectory if enabled. ``user_message`` may be a multimodal + # list of parts; the trajectory format wants a plain string. + self._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed) # Clean up VM and browser for this task after conversation completes self._cleanup_task_resources(effective_task_id) @@ -10571,6 +12314,12 @@ class AIAgent: "cost_status": self.session_cost_status, "cost_source": self.session_cost_source, } + # If a /steer landed after the final assistant turn (no more tool + # batches to drain into), hand it back to the caller so it can be + # delivered as the next user turn instead of being silently lost. + _leftover_steer = self._drain_pending_steer() + if _leftover_steer: + result["pending_steer"] = _leftover_steer self._response_was_previewed = False # Include interrupt message if one triggered the interrupt diff --git a/scripts/install.ps1 b/scripts/install.ps1 index d644c6221f..144113d5a0 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -630,7 +630,7 @@ function Copy-ConfigTemplates { New-Item -ItemType Directory -Force -Path "$HermesHome\audio_cache" | Out-Null New-Item -ItemType Directory -Force -Path "$HermesHome\memories" | Out-Null New-Item -ItemType Directory -Force -Path "$HermesHome\skills" | Out-Null - New-Item -ItemType Directory -Force -Path "$HermesHome\whatsapp\session" | Out-Null + # Create .env $envPath = "$HermesHome\.env" @@ -721,19 +721,21 @@ function Install-NodeDeps { } } - # Install WhatsApp bridge dependencies - $bridgeDir = "$InstallDir\scripts\whatsapp-bridge" - if (Test-Path "$bridgeDir\package.json") { - Write-Info "Installing WhatsApp bridge dependencies..." - Push-Location $bridgeDir + # Install TUI dependencies + $tuiDir = "$InstallDir\ui-tui" + if (Test-Path "$tuiDir\package.json") { + Write-Info "Installing TUI dependencies..." + Push-Location $tuiDir try { npm install --silent 2>&1 | Out-Null - Write-Success "WhatsApp bridge dependencies installed" + Write-Success "TUI dependencies installed" } catch { - Write-Warn "WhatsApp bridge npm install failed (WhatsApp may not work)" + Write-Warn "TUI npm install failed (hermes --tui may not work)" } Pop-Location } + + Pop-Location } diff --git a/scripts/install.sh b/scripts/install.sh index aa6f4f79b5..166d984fac 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -28,7 +28,7 @@ BOLD='\033[1m' # Configuration REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git" REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git" -HERMES_HOME="$HOME/.hermes" +HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}" INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}" PYTHON_VERSION="3.11" NODE_VERSION="22" @@ -66,6 +66,10 @@ while [[ $# -gt 0 ]]; do INSTALL_DIR="$2" shift 2 ;; + --hermes-home) + HERMES_HOME="$2" + shift 2 + ;; -h|--help) echo "Hermes Agent Installer" echo "" @@ -76,6 +80,7 @@ while [[ $# -gt 0 ]]; do echo " --skip-setup Skip interactive setup wizard" echo " --branch NAME Git branch to install (default: main)" echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)" + echo " --hermes-home PATH Data directory (default: ~/.hermes, or \$HERMES_HOME)" echo " -h, --help Show this help" exit 0 ;; @@ -117,6 +122,43 @@ log_error() { echo -e "${RED}✗${NC} $1" } +prompt_yes_no() { + local question="$1" + local default="${2:-yes}" + local prompt_suffix + local answer="" + + # Use case patterns (not ${var,,}) so this works on bash 3.2 (macOS /bin/bash). + case "$default" in + [yY]|[yY][eE][sS]|[tT][rR][uU][eE]|1) prompt_suffix="[Y/n]" ;; + *) prompt_suffix="[y/N]" ;; + esac + + if [ "$IS_INTERACTIVE" = true ]; then + read -r -p "$question $prompt_suffix " answer || answer="" + elif [ -r /dev/tty ] && [ -w /dev/tty ]; then + printf "%s %s " "$question" "$prompt_suffix" > /dev/tty + IFS= read -r answer < /dev/tty || answer="" + else + answer="" + fi + + answer="${answer#"${answer%%[![:space:]]*}"}" + answer="${answer%"${answer##*[![:space:]]}"}" + + if [ -z "$answer" ]; then + case "$default" in + [yY]|[yY][eE][sS]|[tT][rR][uU][eE]|1) return 0 ;; + *) return 1 ;; + esac + fi + + case "$answer" in + [yY]|[yY][eE][sS]) return 0 ;; + *) return 1 ;; + esac +} + is_termux() { [ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]] } @@ -255,7 +297,7 @@ check_python() { if command -v python >/dev/null 2>&1; then PYTHON_PATH="$(command -v python)" if "$PYTHON_PATH" -c 'import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)' 2>/dev/null; then - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python found: $PYTHON_FOUND_VERSION" return 0 fi @@ -264,7 +306,7 @@ check_python() { log_info "Installing Python via pkg..." pkg install -y python >/dev/null PYTHON_PATH="$(command -v python)" - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python installed: $PYTHON_FOUND_VERSION" return 0 fi @@ -273,18 +315,17 @@ check_python() { # Let uv handle Python — it can download and manage Python versions # First check if a suitable Python is already available - if $UV_CMD python find "$PYTHON_VERSION" &> /dev/null; then - PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION") - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python found: $PYTHON_FOUND_VERSION" return 0 fi # Python not found — use uv to install it (no sudo needed!) log_info "Python $PYTHON_VERSION not found, installing via uv..." - if $UV_CMD python install "$PYTHON_VERSION"; then - PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION") - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + if "$UV_CMD" python install "$PYTHON_VERSION"; then + PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")" + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python installed: $PYTHON_FOUND_VERSION" else log_error "Failed to install Python $PYTHON_VERSION" @@ -601,9 +642,7 @@ install_system_packages() { echo "" log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager." log_info "Hermes Agent itself does not require or retain root access." - read -p "Install ${description}? (requires sudo) [y/N] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]]; then + if prompt_yes_no "Install ${description}? (requires sudo)" "no"; then if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd; then [ "$need_ripgrep" = true ] && HAS_RIPGREP=true && log_success "ripgrep installed" [ "$need_ffmpeg" = true ] && HAS_FFMPEG=true && log_success "ffmpeg installed" @@ -616,9 +655,7 @@ install_system_packages() { echo "" log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager." log_info "Hermes Agent itself does not require or retain root access." - read -p "Install ${description}? [Y/n] " -n 1 -r < /dev/tty - echo - if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + if prompt_yes_no "Install ${description}?" "yes"; then if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd < /dev/tty; then [ "$need_ripgrep" = true ] && HAS_RIPGREP=true && log_success "ripgrep installed" [ "$need_ffmpeg" = true ] && HAS_FFMPEG=true && log_success "ffmpeg installed" @@ -858,9 +895,7 @@ install_deps() { else log_info "sudo is needed ONLY to install build tools (build-essential, python3-dev, libffi-dev) via apt." log_info "Hermes Agent itself does not require or retain root access." - read -p "Install build tools? [Y/n] " -n 1 -r < /dev/tty - echo - if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + if prompt_yes_no "Install build tools?" "yes"; then sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get install -y -qq build-essential python3-dev libffi-dev >/dev/null 2>&1 || true log_success "Build tools installed" fi @@ -1016,7 +1051,7 @@ copy_config_templates() { log_info "Setting up configuration files..." # Create ~/.hermes directory structure (config at top level, code in subdir) - mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills,whatsapp/session} + mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills} # Create .env at ~/.hermes/.env (top level, easy to find) if [ ! -f "$HERMES_HOME/.env" ]; then @@ -1086,7 +1121,7 @@ install_node_deps() { if [ "$DISTRO" = "termux" ]; then log_info "Skipping automatic Node/browser dependency setup on Termux" - log_info "Browser automation and WhatsApp bridge are not part of the tested Termux install path yet." + log_info "Browser automation is not part of the tested Termux install path yet." log_info "If you want to experiment manually later, run: cd $INSTALL_DIR && npm install" return 0 fi @@ -1158,15 +1193,17 @@ install_node_deps() { log_success "Browser engine setup complete" fi - # Install WhatsApp bridge dependencies - if [ -f "$INSTALL_DIR/scripts/whatsapp-bridge/package.json" ]; then - log_info "Installing WhatsApp bridge dependencies..." - cd "$INSTALL_DIR/scripts/whatsapp-bridge" + # Install TUI dependencies + if [ -f "$INSTALL_DIR/ui-tui/package.json" ]; then + log_info "Installing TUI dependencies..." + cd "$INSTALL_DIR/ui-tui" npm install --silent 2>/dev/null || { - log_warn "WhatsApp bridge npm install failed (WhatsApp may not work)" + log_warn "TUI npm install failed (hermes --tui may not work)" } - log_success "WhatsApp bridge dependencies installed" + log_success "TUI dependencies installed" fi + + } run_setup_wizard() { @@ -1231,9 +1268,7 @@ maybe_start_gateway() { log_info "WhatsApp is enabled but not yet paired." log_info "Running 'hermes whatsapp' to pair via QR code..." echo "" - read -p "Pair WhatsApp now? [Y/n] " -n 1 -r - echo - if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + if prompt_yes_no "Pair WhatsApp now?" "yes"; then HERMES_CMD="$(get_hermes_command_path)" $HERMES_CMD whatsapp || true fi @@ -1248,14 +1283,18 @@ maybe_start_gateway() { fi echo "" + local should_install_gateway=false if [ "$DISTRO" = "termux" ]; then - read -p "Would you like to start the gateway in the background? [Y/n] " -n 1 -r < /dev/tty + if prompt_yes_no "Would you like to start the gateway in the background?" "yes"; then + should_install_gateway=true + fi else - read -p "Would you like to install the gateway as a background service? [Y/n] " -n 1 -r < /dev/tty + if prompt_yes_no "Would you like to install the gateway as a background service?" "yes"; then + should_install_gateway=true + fi fi - echo - if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + if [ "$should_install_gateway" = true ]; then HERMES_CMD="$(get_hermes_command_path)" if [ "$DISTRO" != "termux" ] && command -v systemctl &> /dev/null; then diff --git a/scripts/lib/node-bootstrap.sh b/scripts/lib/node-bootstrap.sh new file mode 100644 index 0000000000..9eadc479dd --- /dev/null +++ b/scripts/lib/node-bootstrap.sh @@ -0,0 +1,238 @@ +#!/usr/bin/env bash +# ============================================================================ +# scripts/lib/node-bootstrap.sh +# ---------------------------------------------------------------------------- +# Sourceable helper: ensure Node.js >= MIN_VERSION is available for the TUI +# (React + Ink), browser tools, and the WhatsApp bridge. +# +# Strategy (first hit wins — respects the user's existing tooling): +# 1. modern `node` already on PATH +# 2. ~/.hermes/node/ from a prior Hermes-managed install +# 3. fnm, proto, nvm (in that order) if the user already uses a version manager +# 4. Termux `pkg`, macOS Homebrew +# 5. pinned nodejs.org tarball into ~/.hermes/node/ (always works, zero shell rc edits) +# +# Usage: +# source scripts/lib/node-bootstrap.sh +# ensure_node # returns 0 on success, non-zero on failure +# if [ "$HERMES_NODE_AVAILABLE" = true ]; then ...; fi +# +# Env inputs (set before sourcing to override defaults): +# HERMES_NODE_MIN_VERSION (default: 20) — accepted on PATH +# HERMES_NODE_TARGET_MAJOR (default: 22) — installed when we install +# HERMES_HOME (default: $HOME/.hermes) +# ============================================================================ + +HERMES_NODE_MIN_VERSION="${HERMES_NODE_MIN_VERSION:-20}" +HERMES_NODE_TARGET_MAJOR="${HERMES_NODE_TARGET_MAJOR:-22}" +HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}" +HERMES_NODE_AVAILABLE=false + +# --------------------------------------------------------------------------- +# Logging — prefer the host script's log_* helpers when present +# --------------------------------------------------------------------------- + +_nb_log() { declare -F log_info >/dev/null 2>&1 && log_info "$*" || printf '→ %s\n' "$*" >&2; } +_nb_ok() { declare -F log_success >/dev/null 2>&1 && log_success "$*" || printf '✓ %s\n' "$*" >&2; } +_nb_warn() { declare -F log_warn >/dev/null 2>&1 && log_warn "$*" || printf '⚠ %s\n' "$*" >&2; } + +# --------------------------------------------------------------------------- +# Platform + version helpers +# --------------------------------------------------------------------------- + +_nb_is_termux() { + [ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]] +} + +_nb_node_major() { + local v + v=$(node --version 2>/dev/null | sed 's/^v//' | cut -d. -f1) + [[ "$v" =~ ^[0-9]+$ ]] && echo "$v" || echo 0 +} + +_nb_have_modern_node() { + command -v node >/dev/null 2>&1 || return 1 + [ "$(_nb_node_major)" -ge "$HERMES_NODE_MIN_VERSION" ] +} + +# --------------------------------------------------------------------------- +# Version-manager paths — respect what the user already uses +# --------------------------------------------------------------------------- + +_nb_try_fnm() { + command -v fnm >/dev/null 2>&1 || return 1 + _nb_log "fnm detected — installing Node $HERMES_NODE_TARGET_MAJOR..." + eval "$(fnm env 2>/dev/null)" || true + fnm install "$HERMES_NODE_TARGET_MAJOR" >/dev/null 2>&1 || return 1 + fnm use "$HERMES_NODE_TARGET_MAJOR" >/dev/null 2>&1 || return 1 + _nb_have_modern_node || return 1 + _nb_ok "Node $(node --version) activated via fnm" + return 0 +} + +_nb_try_proto() { + command -v proto >/dev/null 2>&1 || return 1 + _nb_log "proto detected — installing Node $HERMES_NODE_TARGET_MAJOR..." + proto install node "$HERMES_NODE_TARGET_MAJOR" >/dev/null 2>&1 || return 1 + _nb_have_modern_node || return 1 + _nb_ok "Node $(node --version) activated via proto" + return 0 +} + +_nb_try_nvm() { + local nvm_sh="${NVM_DIR:-$HOME/.nvm}/nvm.sh" + [ -s "$nvm_sh" ] || return 1 + # shellcheck source=/dev/null + \. "$nvm_sh" >/dev/null 2>&1 || return 1 + _nb_log "nvm detected — installing Node $HERMES_NODE_TARGET_MAJOR..." + nvm install "$HERMES_NODE_TARGET_MAJOR" >/dev/null 2>&1 || return 1 + nvm use "$HERMES_NODE_TARGET_MAJOR" >/dev/null 2>&1 || return 1 + _nb_have_modern_node || return 1 + _nb_ok "Node $(node --version) activated via nvm" + return 0 +} + +# --------------------------------------------------------------------------- +# Platform package managers +# --------------------------------------------------------------------------- + +_nb_try_termux_pkg() { + _nb_is_termux || return 1 + _nb_log "Installing Node.js via pkg..." + pkg install -y nodejs >/dev/null 2>&1 || return 1 + _nb_have_modern_node || return 1 + _nb_ok "Node $(node --version) installed via pkg" + return 0 +} + +_nb_try_brew() { + [ "$(uname -s)" = "Darwin" ] || return 1 + command -v brew >/dev/null 2>&1 || return 1 + _nb_log "Installing Node via Homebrew..." + brew install "node@${HERMES_NODE_TARGET_MAJOR}" >/dev/null 2>&1 \ + || brew install node >/dev/null 2>&1 \ + || return 1 + brew link --overwrite --force "node@${HERMES_NODE_TARGET_MAJOR}" >/dev/null 2>&1 || true + _nb_have_modern_node || return 1 + _nb_ok "Node $(node --version) installed via Homebrew" + return 0 +} + +# --------------------------------------------------------------------------- +# Bundled binary fallback — always works, no shell rc edits +# --------------------------------------------------------------------------- + +_nb_install_bundled_node() { + local arch node_arch os_name node_os + arch=$(uname -m) + case "$arch" in + x86_64) node_arch="x64" ;; + aarch64|arm64) node_arch="arm64" ;; + armv7l) node_arch="armv7l" ;; + *) + _nb_warn "Unsupported arch ($arch) — install Node.js manually: https://nodejs.org/" + return 1 + ;; + esac + + os_name=$(uname -s) + case "$os_name" in + Linux*) node_os="linux" ;; + Darwin*) node_os="darwin" ;; + *) + _nb_warn "Unsupported OS ($os_name) — install Node.js manually: https://nodejs.org/" + return 1 + ;; + esac + + local index_url="https://nodejs.org/dist/latest-v${HERMES_NODE_TARGET_MAJOR}.x/" + local tarball + tarball=$(curl -fsSL "$index_url" \ + | grep -oE "node-v${HERMES_NODE_TARGET_MAJOR}\.[0-9]+\.[0-9]+-${node_os}-${node_arch}\.tar\.xz" \ + | head -1) + if [ -z "$tarball" ]; then + tarball=$(curl -fsSL "$index_url" \ + | grep -oE "node-v${HERMES_NODE_TARGET_MAJOR}\.[0-9]+\.[0-9]+-${node_os}-${node_arch}\.tar\.gz" \ + | head -1) + fi + if [ -z "$tarball" ]; then + _nb_warn "Could not resolve Node $HERMES_NODE_TARGET_MAJOR binary for $node_os-$node_arch" + return 1 + fi + + local tmp + tmp=$(mktemp -d) + _nb_log "Downloading $tarball..." + curl -fsSL "${index_url}${tarball}" -o "$tmp/$tarball" || { + _nb_warn "Download failed"; rm -rf "$tmp"; return 1 + } + + _nb_log "Extracting to $HERMES_HOME/node/..." + if [[ "$tarball" == *.tar.xz ]]; then + tar xf "$tmp/$tarball" -C "$tmp" || { rm -rf "$tmp"; return 1; } + else + tar xzf "$tmp/$tarball" -C "$tmp" || { rm -rf "$tmp"; return 1; } + fi + + local extracted + extracted=$(find "$tmp" -maxdepth 1 -type d -name 'node-v*' 2>/dev/null | head -1) + if [ ! -d "$extracted" ]; then + _nb_warn "Extraction produced no node-v* directory" + rm -rf "$tmp" + return 1 + fi + + mkdir -p "$HERMES_HOME" + rm -rf "$HERMES_HOME/node" + mv "$extracted" "$HERMES_HOME/node" + rm -rf "$tmp" + + mkdir -p "$HOME/.local/bin" + ln -sf "$HERMES_HOME/node/bin/node" "$HOME/.local/bin/node" + ln -sf "$HERMES_HOME/node/bin/npm" "$HOME/.local/bin/npm" + ln -sf "$HERMES_HOME/node/bin/npx" "$HOME/.local/bin/npx" + export PATH="$HERMES_HOME/node/bin:$PATH" + + _nb_have_modern_node || return 1 + _nb_ok "Node $(node --version) installed to $HERMES_HOME/node/" + return 0 +} + +# --------------------------------------------------------------------------- +# Public entry point +# --------------------------------------------------------------------------- + +ensure_node() { + HERMES_NODE_AVAILABLE=false + + if _nb_have_modern_node; then + _nb_ok "Node $(node --version) found" + HERMES_NODE_AVAILABLE=true + return 0 + fi + + if [ -x "$HERMES_HOME/node/bin/node" ]; then + export PATH="$HERMES_HOME/node/bin:$PATH" + if _nb_have_modern_node; then + _nb_ok "Node $(node --version) found (Hermes-managed)" + HERMES_NODE_AVAILABLE=true + return 0 + fi + fi + + # Version managers first — respect the user's existing setup. + _nb_try_fnm && { HERMES_NODE_AVAILABLE=true; return 0; } + _nb_try_proto && { HERMES_NODE_AVAILABLE=true; return 0; } + _nb_try_nvm && { HERMES_NODE_AVAILABLE=true; return 0; } + + # Platform package managers. + _nb_try_termux_pkg && { HERMES_NODE_AVAILABLE=true; return 0; } + _nb_try_brew && { HERMES_NODE_AVAILABLE=true; return 0; } + + # Last resort: pinned nodejs.org tarball. + _nb_install_bundled_node && { HERMES_NODE_AVAILABLE=true; return 0; } + + _nb_warn "Node.js install failed — TUI and browser tools will be unavailable." + _nb_warn "Install manually: https://nodejs.org/en/download/ (or: \`brew install node\`, \`fnm install $HERMES_NODE_TARGET_MAJOR\`, etc.)" + return 1 +} diff --git a/scripts/release.py b/scripts/release.py index 08af431f25..56ff878f55 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -44,14 +44,20 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1", # contributors (from noreply pattern) + "snreynolds2506@gmail.com": "snreynolds", "35742124+0xbyt4@users.noreply.github.com": "0xbyt4", "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor", + "kshitijk4poor@users.noreply.github.com": "kshitijk4poor", + "kshitijk4poor@gmail.com": "kshitijk4poor", "16443023+stablegenius49@users.noreply.github.com": "stablegenius49", "185121704+stablegenius49@users.noreply.github.com": "stablegenius49", "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit", + "valdi.jorge@gmail.com": "jvcl", "126368201+vilkasdev@users.noreply.github.com": "vilkasdev", "137614867+cutepawss@users.noreply.github.com": "cutepawss", "96793918+memosr@users.noreply.github.com": "memosr", + "milkoor@users.noreply.github.com": "milkoor", + "xuerui911@gmail.com": "Fatty911", "131039422+SHL0MS@users.noreply.github.com": "SHL0MS", "77628552+raulvidis@users.noreply.github.com": "raulvidis", "145567217+Aum08Desai@users.noreply.github.com": "Aum08Desai", @@ -60,10 +66,34 @@ AUTHOR_MAP = { "104278804+Sertug17@users.noreply.github.com": "Sertug17", "112503481+caentzminger@users.noreply.github.com": "caentzminger", "258577966+voidborne-d@users.noreply.github.com": "voidborne-d", + "sir_even@icloud.com": "sirEven", + "36056348+sirEven@users.noreply.github.com": "sirEven", "70424851+insecurejezza@users.noreply.github.com": "insecurejezza", + "254021826+dodo-reach@users.noreply.github.com": "dodo-reach", "259807879+Bartok9@users.noreply.github.com": "Bartok9", + "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter", "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1", + "27917469+nosleepcassette@users.noreply.github.com": "nosleepcassette", + "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter", + "109555139+davetist@users.noreply.github.com": "davetist", + "39405770+yyq4193@users.noreply.github.com": "yyq4193", + "Asunfly@users.noreply.github.com": "Asunfly", + "2500400+honghua@users.noreply.github.com": "honghua", + "462836+jplew@users.noreply.github.com": "jplew", + "nish3451@users.noreply.github.com": "nish3451", + "Mibayy@users.noreply.github.com": "Mibayy", + "mibayy@users.noreply.github.com": "Mibayy", + "135070653+sgaofen@users.noreply.github.com": "sgaofen", + "nocoo@users.noreply.github.com": "nocoo", + "30841158+n-WN@users.noreply.github.com": "n-WN", + "leoyuan0099@gmail.com": "keyuyuan", + "bxzt2006@163.com": "Only-Code-A", + "i@troy-y.org": "TroyMitchell911", + "mygamez@163.com": "zhongyueming1121", + "hansnow@users.noreply.github.com": "hansnow", # contributors (manual mapping from git names) + "ahmedsherif95@gmail.com": "asheriif", + "liujinkun@bytedance.com": "liujinkun2025", "dmayhem93@gmail.com": "dmahan93", "samherring99@gmail.com": "samherring99", "desaiaum08@gmail.com": "Aum08Desai", @@ -74,17 +104,37 @@ AUTHOR_MAP = { "xaydinoktay@gmail.com": "aydnOktay", "abdullahfarukozden@gmail.com": "Farukest", "lovre.pesut@gmail.com": "rovle", + "kevinskysunny@gmail.com": "kevinskysunny", + "xiewenxuan462@gmail.com": "yule975", + "yiweimeng.dlut@hotmail.com": "meng93", "hakanerten02@hotmail.com": "teyrebaz33", + "linux2010@users.noreply.github.com": "Linux2010", + "elmatadorgh@users.noreply.github.com": "elmatadorgh", + "alexazzjjtt@163.com": "alexzhu0", + "1180176+Swift42@users.noreply.github.com": "Swift42", + "ruzzgarcn@gmail.com": "Ruzzgar", "alireza78.crypto@gmail.com": "alireza78a", "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson", + "withapurpose37@gmail.com": "StefanIsMe", + "4317663+helix4u@users.noreply.github.com": "helix4u", + "331214+counterposition@users.noreply.github.com": "counterposition", + "blspear@gmail.com": "BrennerSpear", + "akhater@gmail.com": "akhater", + "239876380+handsdiff@users.noreply.github.com": "handsdiff", + "hesapacicam112@gmail.com": "etherman-os", + "mark.ramsell@rivermounts.com": "mark-ramsell", + "taeng02@icloud.com": "taeng0204", "gpickett00@gmail.com": "gpickett00", "mcosma@gmail.com": "wakamex", "clawdia.nash@proton.me": "clawdia-nash", "pickett.austin@gmail.com": "austinpickett", + "dangtc94@gmail.com": "dieutx", "jaisehgal11299@gmail.com": "jaisup", "percydikec@gmail.com": "PercyDikec", + "noonou7@gmail.com": "HenkDz", "dean.kerr@gmail.com": "deankerr", "socrates1024@gmail.com": "socrates1024", + "seanalt555@gmail.com": "Salt-555", "satelerd@gmail.com": "satelerd", "numman.ali@gmail.com": "nummanali", "0xNyk@users.noreply.github.com": "0xNyk", @@ -95,10 +145,15 @@ AUTHOR_MAP = { "vincentcharlebois@gmail.com": "vincentcharlebois", "aryan@synvoid.com": "aryansingh", "johnsonblake1@gmail.com": "blakejohnson", + "hcn518@gmail.com": "pedh", + "haileymarshall005@gmail.com": "haileymarshall", + "greer.guthrie@gmail.com": "g-guthrie", "kennyx102@gmail.com": "bobashopcashier", + "shokatalishaikh95@gmail.com": "areu01or00", "bryan@intertwinesys.com": "bryanyoung", "christo.mitov@gmail.com": "christomitov", "hermes@nousresearch.com": "NousResearch", + "hermes@noushq.ai": "benbarclay", "chinmingcock@gmail.com": "ChimingLiu", "openclaw@sparklab.ai": "openclaw", "semihcvlk53@gmail.com": "Himess", @@ -113,13 +168,20 @@ AUTHOR_MAP = { "jack.47@gmail.com": "JackTheGit", "dalvidjr2022@gmail.com": "Jr-kenny", "m@statecraft.systems": "mbierling", - "balyan.sid@gmail.com": "balyansid", + "balyan.sid@gmail.com": "alt-glitch", "oluwadareab12@gmail.com": "bennytimz", + "simon@simonmarcus.org": "simon-marcus", + "xowiekk@gmail.com": "Xowiek", + "1243352777@qq.com": "zons-zhaozhy", # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply # crossref, and GH contributor list matching (April 2026 audit) ── "1115117931@qq.com": "aaronagent", "1506751656@qq.com": "hqhq1025", "364939526@qq.com": "luyao618", + "hgk324@gmail.com": "houziershi", + "176644217+PStarH@users.noreply.github.com": "PStarH", + "51058514+Sanjays2402@users.noreply.github.com": "Sanjays2402", + "906014227@qq.com": "bingo906", "aaronwong1999@icloud.com": "AaronWong1999", "agents@kylefrench.dev": "DeployFaith", "angelos@oikos.lan.home.malaiwah.com": "angelos", @@ -142,6 +204,7 @@ AUTHOR_MAP = { "duerzy@gmail.com": "duerzy", "emozilla@nousresearch.com": "emozilla", "fancydirty@gmail.com": "fancydirty", + "farion1231@gmail.com": "farion1231", "floptopbot33@gmail.com": "flobo3", "fontana.pedro93@gmail.com": "pefontana", "francis.x.fitzpatrick@gmail.com": "fxfitz", @@ -158,13 +221,35 @@ AUTHOR_MAP = { "juan.ovalle@mistral.ai": "jjovalle99", "julien.talbot@ergonomia.re": "Julientalbot", "kagura.chen28@gmail.com": "kagura-agent", + "1342088860@qq.com": "youngDoo", "kamil@gwozdz.me": "kamil-gwozdz", + "skmishra1991@gmail.com": "bugkill3r", "karamusti912@gmail.com": "MustafaKara7", "kira@ariaki.me": "kira-ariaki", "knopki@duck.com": "knopki", "limars874@gmail.com": "limars874", "lisicheng168@gmail.com": "lesterli", "mingjwan@microsoft.com": "MagicRay1217", + "orangeko@gmail.com": "GenKoKo", + "82095453+iacker@users.noreply.github.com": "iacker", + "sontianye@users.noreply.github.com": "sontianye", + "jackjin1997@users.noreply.github.com": "jackjin1997", + "1037461232@qq.com": "jackjin1997", + "danieldoderlein@users.noreply.github.com": "danieldoderlein", + "lrawnsley@users.noreply.github.com": "lrawnsley", + "taeuk178@users.noreply.github.com": "taeuk178", + "ogzerber@users.noreply.github.com": "ogzerber", + "cola-runner@users.noreply.github.com": "cola-runner", + "ygd58@users.noreply.github.com": "ygd58", + "vominh1919@users.noreply.github.com": "vominh1919", + "iamagenius00@users.noreply.github.com": "iamagenius00", + "9219265+cresslank@users.noreply.github.com": "cresslank", + "trevmanthony@gmail.com": "trevthefoolish", + "ziliangpeng@users.noreply.github.com": "ziliangpeng", + "centripetal-star@users.noreply.github.com": "centripetal-star", + "LeonSGP43@users.noreply.github.com": "LeonSGP43", + "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", + "Lubrsy706@users.noreply.github.com": "Lubrsy706", "niyant@spicefi.xyz": "spniyant", "olafthiele@gmail.com": "olafthiele", "oncuevtv@gmail.com": "sprmn24", @@ -187,12 +272,44 @@ AUTHOR_MAP = { "yangzhi.see@gmail.com": "SeeYangZhi", "yongtenglei@gmail.com": "yongtenglei", "young@YoungdeMacBook-Pro.local": "YoungYang963", - "ysfalweshcan@gmail.com": "Awsh1", + "ysfalweshcan@gmail.com": "Junass1", "ysfwaxlycan@gmail.com": "WAXLYY", "yusufalweshdemir@gmail.com": "Dusk1e", "zhouboli@gmail.com": "zhouboli", "zqiao@microsoft.com": "tomqiaozc", "zzn+pa@zzn.im": "xinbenlv", + "zaynjarvis@gmail.com": "ZaynJarvis", + "zhiheng.liu@bytedance.com": "ZaynJarvis", + "mbelleau@Michels-MacBook-Pro.local": "malaiwah", + "michel.belleau@malaiwah.com": "malaiwah", + "gnanasekaran.sekareee@gmail.com": "gnanam1990", + "jz.pentest@gmail.com": "0xyg3n", + "hypnosis.mda@gmail.com": "Hypn0sis", + "ywt000818@gmail.com": "OwenYWT", + "dhandhalyabhavik@gmail.com": "v1k22", + "rucchizhao@zhaochenfeideMacBook-Pro.local": "RucchiZ", + "lehaolin98@outlook.com": "LehaoLin", + "yuewang1@microsoft.com": "imink", + "1736355688@qq.com": "hedgeho9X", + "bernylinville@devopsthink.org": "bernylinville", + "brian@bde.io": "briandevans", + "hubin_ll@qq.com": "LLQWQ", + "memosr_email@gmail.com": "memosr", + "anthhub@163.com": "anthhub", + "shenuu@gmail.com": "shenuu", + "xiayh17@gmail.com": "xiayh0107", + "asurla@nvidia.com": "anniesurla", + "limkuan24@gmail.com": "WideLee", + "aviralarora002@gmail.com": "AviArora02-commits", + "draixagent@gmail.com": "draix", + "junminliu@gmail.com": "JimLiu", + "jarvischer@gmail.com": "maxchernin", + "levantam.98.2324@gmail.com": "LVT382009", + "zhurongcheng@rcrai.com": "heykb", + "withapurpose37@gmail.com": "StefanIsMe", + "261797239+lumenradley@users.noreply.github.com": "lumenradley", + "166376523+sjz-ks@users.noreply.github.com": "sjz-ks", + "haileymarshall005@gmail.com": "haileymarshall", } diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh new file mode 100755 index 0000000000..0ad2dc464b --- /dev/null +++ b/scripts/run_tests.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# Canonical test runner for hermes-agent. Run this instead of calling +# `pytest` directly to guarantee your local run matches CI behavior. +# +# What this script enforces: +# * -n 4 xdist workers (CI has 4 cores; -n auto diverges locally) +# * TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0 (deterministic) +# * Credential env vars blanked (conftest.py also does this, but this +# is belt-and-suspenders for anyone running `pytest` outside of +# our conftest path — e.g. calling pytest on a single file) +# * Proper venv activation +# +# Usage: +# scripts/run_tests.sh # full suite +# scripts/run_tests.sh tests/agent/ # one directory +# scripts/run_tests.sh tests/agent/test_foo.py::TestClass::test_method +# scripts/run_tests.sh --tb=long -v # pass-through pytest args + +set -euo pipefail + +# ── Locate repo root ──────────────────────────────────────────────────────── +# Works whether this is the main checkout or a worktree. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# ── Activate venv ─────────────────────────────────────────────────────────── +# Prefer a .venv in the current tree, fall back to the main checkout's venv +# (useful for worktrees where we don't always duplicate the venv). +VENV="" +for candidate in "$REPO_ROOT/.venv" "$REPO_ROOT/venv" "$HOME/.hermes/hermes-agent/venv"; do + if [ -f "$candidate/bin/activate" ]; then + VENV="$candidate" + break + fi +done + +if [ -z "$VENV" ]; then + echo "error: no virtualenv found in $REPO_ROOT/.venv or $REPO_ROOT/venv" >&2 + exit 1 +fi + +PYTHON="$VENV/bin/python" + +# ── Ensure pytest-split is installed (required for shard-equivalent runs) ── +if ! "$PYTHON" -c "import pytest_split" 2>/dev/null; then + echo "→ installing pytest-split into $VENV" + "$PYTHON" -m pip install --quiet "pytest-split>=0.9,<1" +fi + +# ── Hermetic environment ──────────────────────────────────────────────────── +# Mirror what CI does in .github/workflows/tests.yml + what conftest.py does. +# Unset every credential-shaped var currently in the environment. +while IFS='=' read -r name _; do + case "$name" in + *_API_KEY|*_TOKEN|*_SECRET|*_PASSWORD|*_CREDENTIALS|*_ACCESS_KEY| \ + *_SECRET_ACCESS_KEY|*_PRIVATE_KEY|*_OAUTH_TOKEN|*_WEBHOOK_SECRET| \ + *_ENCRYPT_KEY|*_APP_SECRET|*_CLIENT_SECRET|*_CORP_SECRET|*_AES_KEY| \ + AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|FAL_KEY| \ + GH_TOKEN|GITHUB_TOKEN) + unset "$name" + ;; + esac +done < <(env) + +# Unset HERMES_* behavioral vars too. +unset HERMES_YOLO_MODE HERMES_INTERACTIVE HERMES_QUIET HERMES_TOOL_PROGRESS \ + HERMES_TOOL_PROGRESS_MODE HERMES_MAX_ITERATIONS HERMES_SESSION_PLATFORM \ + HERMES_SESSION_CHAT_ID HERMES_SESSION_CHAT_NAME HERMES_SESSION_THREAD_ID \ + HERMES_SESSION_SOURCE HERMES_SESSION_KEY HERMES_GATEWAY_SESSION \ + HERMES_PLATFORM HERMES_INFERENCE_PROVIDER HERMES_MANAGED HERMES_DEV \ + HERMES_CONTAINER HERMES_EPHEMERAL_SYSTEM_PROMPT HERMES_TIMEZONE \ + HERMES_REDACT_SECRETS HERMES_BACKGROUND_NOTIFICATIONS HERMES_EXEC_ASK \ + HERMES_HOME_MODE 2>/dev/null || true + +# Pin deterministic runtime. +export TZ=UTC +export LANG=C.UTF-8 +export LC_ALL=C.UTF-8 +export PYTHONHASHSEED=0 + +# ── Worker count ──────────────────────────────────────────────────────────── +# CI uses `-n auto` on ubuntu-latest which gives 4 workers. A 20-core +# workstation with `-n auto` gets 20 workers and exposes test-ordering +# flakes that CI will never see. Pin to 4 so local matches CI. +WORKERS="${HERMES_TEST_WORKERS:-4}" + +# ── Run pytest ────────────────────────────────────────────────────────────── +cd "$REPO_ROOT" + +# If the first argument starts with `-` treat all args as pytest flags; +# otherwise treat them as test paths. +ARGS=("$@") + +echo "▶ running pytest with $WORKERS workers, hermetic env, in $REPO_ROOT" +echo " (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; all credential env vars unset)" + +# -o "addopts=" clears pyproject.toml's `-n auto` so our -n wins. +exec "$PYTHON" -m pytest \ + -o "addopts=" \ + -n "$WORKERS" \ + --ignore=tests/integration \ + --ignore=tests/e2e \ + -m "not integration" \ + "${ARGS[@]}" diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 70cf8e95d9..401651c8a8 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -229,6 +229,14 @@ async function startSocket() { // Check allowlist for messages from others (resolve LID ↔ phone aliases) if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) { + try { + console.log(JSON.stringify({ + event: 'ignored', + reason: 'allowlist_mismatch', + chatId, + senderId, + })); + } catch {} continue; } diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index 9e0b412f54..d19471c80d 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -313,7 +313,7 @@ Type these during an interactive chat session. ``` ~/.hermes/config.yaml Main configuration ~/.hermes/.env API keys and secrets -~/.hermes/skills/ Installed skills +$HERMES_HOME/skills/ Installed skills ~/.hermes/sessions/ Session transcripts ~/.hermes/logs/ Gateway and error logs ~/.hermes/auth.json OAuth tokens and credential pools @@ -338,7 +338,6 @@ Edit with `hermes config edit` or `hermes config set section.key value`. | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` | | `security` | `tirith_enabled`, `website_blocklist` | | `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` | -| `smart_model_routing` | `enabled`, `cheap_model` | | `checkpoints` | `enabled`, `max_snapshots` (50) | Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/configuration @@ -351,8 +350,8 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con |----------|------|-------------| | OpenRouter | API key | `OPENROUTER_API_KEY` | | Anthropic | API key | `ANTHROPIC_API_KEY` | -| Nous Portal | OAuth | `hermes login --provider nous` | -| OpenAI Codex | OAuth | `hermes login --provider openai-codex` | +| Nous Portal | OAuth | `hermes auth` | +| OpenAI Codex | OAuth | `hermes auth` | | GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` | | Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` | | DeepSeek | API key | `DEEPSEEK_API_KEY` | @@ -650,9 +649,9 @@ registry.register( ) ``` -**2. Add import** in `model_tools.py` → `_discover_tools()` list. +**2. Add to `toolsets.py`** → `_HERMES_CORE_TOOLS` list. -**3. Add to `toolsets.py`** → `_HERMES_CORE_TOOLS` list. +Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual list needed. All handlers must return JSON strings. Use `get_hermes_home()` for paths, never hardcode `~/.hermes`. diff --git a/skills/creative/architecture-diagram/SKILL.md b/skills/creative/architecture-diagram/SKILL.md new file mode 100644 index 0000000000..1e1749db87 --- /dev/null +++ b/skills/creative/architecture-diagram/SKILL.md @@ -0,0 +1,147 @@ +--- +name: architecture-diagram +description: Generate dark-themed SVG diagrams of software systems and cloud infrastructure as standalone HTML files with inline SVG graphics. Semantic component colors (cyan=frontend, emerald=backend, violet=database, amber=cloud/AWS, rose=security, orange=message bus), JetBrains Mono font, grid background. Best suited for software architecture, cloud/VPC topology, microservice maps, service-mesh diagrams, database + API layer diagrams, security groups, message buses — anything that fits a tech-infra deck with a dark aesthetic. If a more specialized diagramming skill exists for the subject (scientific, educational, hand-drawn, animated, etc.), prefer that — otherwise this skill can also serve as a general-purpose SVG diagram fallback. Based on Cocoon AI's architecture-diagram-generator (MIT). +version: 1.0.0 +author: Cocoon AI (hello@cocoon-ai.com), ported by Hermes Agent +license: MIT +dependencies: [] +metadata: + hermes: + tags: [architecture, diagrams, SVG, HTML, visualization, infrastructure, cloud] + related_skills: [concept-diagrams, excalidraw] +--- + +# Architecture Diagram Skill + +Generate professional, dark-themed technical architecture diagrams as standalone HTML files with inline SVG graphics. No external tools, no API keys, no rendering libraries — just write the HTML file and open it in a browser. + +## Scope + +**Best suited for:** +- Software system architecture (frontend / backend / database layers) +- Cloud infrastructure (VPC, regions, subnets, managed services) +- Microservice / service-mesh topology +- Database + API map, deployment diagrams +- Anything with a tech-infra subject that fits a dark, grid-backed aesthetic + +**Look elsewhere first for:** +- Physics, chemistry, math, biology, or other scientific subjects +- Physical objects (vehicles, hardware, anatomy, cross-sections) +- Floor plans, narrative journeys, educational / textbook-style visuals +- Hand-drawn whiteboard sketches (consider `excalidraw`) +- Animated explainers (consider an animation skill) + +If a more specialized skill is available for the subject, prefer that. If none fits, this skill can also serve as a general SVG diagram fallback — the output will just carry the dark tech aesthetic described below. + +Based on [Cocoon AI's architecture-diagram-generator](https://github.com/Cocoon-AI/architecture-diagram-generator) (MIT). + +## Workflow + +1. User describes their system architecture (components, connections, technologies) +2. Generate the HTML file following the design system below +3. Save with `write_file` to a `.html` file (e.g. `~/architecture-diagram.html`) +4. User opens in any browser — works offline, no dependencies + +### Output Location + +Save diagrams to a user-specified path, or default to the current working directory: +``` +./[project-name]-architecture.html +``` + +### Preview + +After saving, suggest the user open it: +```bash +# macOS +open ./my-architecture.html +# Linux +xdg-open ./my-architecture.html +``` + +## Design System & Visual Language + +### Color Palette (Semantic Mapping) + +Use specific `rgba` fills and hex strokes to categorize components: + +| Component Type | Fill (rgba) | Stroke (Hex) | +| :--- | :--- | :--- | +| **Frontend** | `rgba(8, 51, 68, 0.4)` | `#22d3ee` (cyan-400) | +| **Backend** | `rgba(6, 78, 59, 0.4)` | `#34d399` (emerald-400) | +| **Database** | `rgba(76, 29, 149, 0.4)` | `#a78bfa` (violet-400) | +| **AWS/Cloud** | `rgba(120, 53, 15, 0.3)` | `#fbbf24` (amber-400) | +| **Security** | `rgba(136, 19, 55, 0.4)` | `#fb7185` (rose-400) | +| **Message Bus** | `rgba(251, 146, 60, 0.3)` | `#fb923c` (orange-400) | +| **External** | `rgba(30, 41, 59, 0.5)` | `#94a3b8` (slate-400) | + +### Typography & Background +- **Font:** JetBrains Mono (Monospace), loaded from Google Fonts +- **Sizes:** 12px (Names), 9px (Sublabels), 8px (Annotations), 7px (Tiny labels) +- **Background:** Slate-950 (`#020617`) with a subtle 40px grid pattern + +```svg + + + + +``` + +## Technical Implementation Details + +### Component Rendering +Components are rounded rectangles (`rx="6"`) with 1.5px strokes. To prevent arrows from showing through semi-transparent fills, use a **double-rect masking technique**: +1. Draw an opaque background rect (`#0f172a`) +2. Draw the semi-transparent styled rect on top + +### Connection Rules +- **Z-Order:** Draw arrows *early* in the SVG (after the grid) so they render behind component boxes +- **Arrowheads:** Defined via SVG markers +- **Security Flows:** Use dashed lines in rose color (`#fb7185`) +- **Boundaries:** + - *Security Groups:* Dashed (`4,4`), rose color + - *Regions:* Large dashed (`8,4`), amber color, `rx="12"` + +### Spacing & Layout Logic +- **Standard Height:** 60px (Services); 80-120px (Large components) +- **Vertical Gap:** Minimum 40px between components +- **Message Buses:** Must be placed *in the gap* between services, not overlapping them +- **Legend Placement:** **CRITICAL.** Must be placed outside all boundary boxes. Calculate the lowest Y-coordinate of all boundaries and place the legend at least 20px below it. + +## Document Structure + +The generated HTML file follows a four-part layout: +1. **Header:** Title with a pulsing dot indicator and subtitle +2. **Main SVG:** The diagram contained within a rounded border card +3. **Summary Cards:** A grid of three cards below the diagram for high-level details +4. **Footer:** Minimal metadata + +### Info Card Pattern +```html +
+
+
+

Title

+
+
    +
  • • Item one
  • +
  • • Item two
  • +
+
+``` + +## Output Requirements +- **Single File:** One self-contained `.html` file +- **No External Dependencies:** All CSS and SVG must be inline (except Google Fonts) +- **No JavaScript:** Use pure CSS for any animations (like pulsing dots) +- **Compatibility:** Must render correctly in any modern web browser + +## Template Reference + +Load the full HTML template for the exact structure, CSS, and SVG component examples: + +``` +skill_view(name="architecture-diagram", file_path="templates/template.html") +``` + +The template contains working examples of every component type (frontend, backend, database, cloud, security), arrow styles (standard, dashed, curved), security groups, region boundaries, and the legend — use it as your structural reference when generating diagrams. diff --git a/skills/creative/architecture-diagram/templates/template.html b/skills/creative/architecture-diagram/templates/template.html new file mode 100644 index 0000000000..f5b32fbe7f --- /dev/null +++ b/skills/creative/architecture-diagram/templates/template.html @@ -0,0 +1,319 @@ + + + + + + [PROJECT NAME] Architecture Diagram + + + + +
+ +
+
+
+

[PROJECT NAME] Architecture

+
+

[Subtitle description]

+
+ + +
+ + + + + + + + + + + + + + + + + + + Users + Browser/Mobile + + + + Auth Provider + OAuth 2.0 + + + + AWS Region: us-west-2 + + + + CloudFront + CDN + + + + S3 Buckets + • bucket-one + • bucket-two + • bucket-three + OAI Protected + + + + sg-name :port + + + + Load Balancer + HTTPS :443 + + + + API Server + FastAPI :8000 + + + + Database + PostgreSQL + + + + Frontend + React + TypeScript + Additional detail + More info + domain.example.com + + + + + + HTTPS + + + + + + + OAI + + + + + TLS + + + + JWT + PKCE + + + Legend + + + Frontend + + + Backend + + + Cloud Service + + + Database + + + Security + + + Auth Flow + + + Security Group + +
+ + +
+
+
+
+

Card Title 1

+
+
    +
  • • Item one
  • +
  • • Item two
  • +
  • • Item three
  • +
  • • Item four
  • +
+
+ +
+
+
+

Card Title 2

+
+
    +
  • • Item one
  • +
  • • Item two
  • +
  • • Item three
  • +
  • • Item four
  • +
+
+ +
+
+
+

Card Title 3

+
+
    +
  • • Item one
  • +
  • • Item two
  • +
  • • Item three
  • +
  • • Item four
  • +
+
+
+ + + +
+ + diff --git a/skills/creative/baoyu-infographic/PORT_NOTES.md b/skills/creative/baoyu-infographic/PORT_NOTES.md new file mode 100644 index 0000000000..0a2d86d89c --- /dev/null +++ b/skills/creative/baoyu-infographic/PORT_NOTES.md @@ -0,0 +1,43 @@ +# Port Notes — baoyu-infographic + +Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1. + +## Changes from upstream + +Only `SKILL.md` was modified. All 45 reference files are verbatim copies. + +### SKILL.md adaptations + +| Change | Upstream | Hermes | +|--------|----------|--------| +| Metadata namespace | `openclaw` | `hermes` | +| Trigger | `/baoyu-infographic` slash command | Natural language skill matching | +| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra | +| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one at a time) | +| Image generation | baoyu-imagine (Bun/TypeScript) | `image_generate` tool | +| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only | +| File operations | Bash commands | Hermes file tools (write_file, read_file) | + +### What was preserved + +- All layout definitions (21 files) +- All style definitions (21 files) +- Core reference files (analysis-framework, base-prompt, structured-content-template) +- Recommended combinations table +- Keyword shortcuts table +- Core principles and workflow structure +- Author, version, homepage attribution + +## Syncing with upstream + +To pull upstream updates: +```bash +# Compare versions +curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-infographic/SKILL.md | head -5 +# Look for version: line + +# Diff reference files +diff <(curl -sL https://raw.githubusercontent.com/.../references/layouts/bento-grid.md) references/layouts/bento-grid.md +``` + +Reference files can be overwritten directly (they're unchanged from upstream). SKILL.md must be manually merged since it contains Hermes-specific adaptations. diff --git a/skills/creative/baoyu-infographic/SKILL.md b/skills/creative/baoyu-infographic/SKILL.md new file mode 100644 index 0000000000..fea3499cbf --- /dev/null +++ b/skills/creative/baoyu-infographic/SKILL.md @@ -0,0 +1,236 @@ +--- +name: baoyu-infographic +description: Generate professional infographics with 21 layout types and 21 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics. Use when user asks to create "infographic", "visual summary", "信息图", "可视化", or "高密度信息大图". +version: 1.56.1 +author: 宝玉 (JimLiu) +license: MIT +metadata: + hermes: + tags: [infographic, visual-summary, creative, image-generation] + homepage: https://github.com/JimLiu/baoyu-skills#baoyu-infographic +--- + +# Infographic Generator + +Adapted from [baoyu-infographic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem. + +Two dimensions: **layout** (information structure) × **style** (visual aesthetics). Freely combine any layout with any style. + +## When to Use + +Trigger this skill when the user asks to create an infographic, visual summary, information graphic, or uses terms like "信息图", "可视化", or "高密度信息大图". The user provides content (text, file path, URL, or topic) and optionally specifies layout, style, aspect ratio, or language. + +## Options + +| Option | Values | +|--------|--------| +| Layout | 21 options (see Layout Gallery), default: bento-grid | +| Style | 21 options (see Style Gallery), default: craft-handmade | +| Aspect | Named: landscape (16:9), portrait (9:16), square (1:1). Custom: any W:H ratio (e.g., 3:4, 4:3, 2.35:1) | +| Language | en, zh, ja, etc. | + +## Layout Gallery + +| Layout | Best For | +|--------|----------| +| `linear-progression` | Timelines, processes, tutorials | +| `binary-comparison` | A vs B, before-after, pros-cons | +| `comparison-matrix` | Multi-factor comparisons | +| `hierarchical-layers` | Pyramids, priority levels | +| `tree-branching` | Categories, taxonomies | +| `hub-spoke` | Central concept with related items | +| `structural-breakdown` | Exploded views, cross-sections | +| `bento-grid` | Multiple topics, overview (default) | +| `iceberg` | Surface vs hidden aspects | +| `bridge` | Problem-solution | +| `funnel` | Conversion, filtering | +| `isometric-map` | Spatial relationships | +| `dashboard` | Metrics, KPIs | +| `periodic-table` | Categorized collections | +| `comic-strip` | Narratives, sequences | +| `story-mountain` | Plot structure, tension arcs | +| `jigsaw` | Interconnected parts | +| `venn-diagram` | Overlapping concepts | +| `winding-roadmap` | Journey, milestones | +| `circular-flow` | Cycles, recurring processes | +| `dense-modules` | High-density modules, data-rich guides | + +Full definitions: `references/layouts/.md` + +## Style Gallery + +| Style | Description | +|-------|-------------| +| `craft-handmade` | Hand-drawn, paper craft (default) | +| `claymation` | 3D clay figures, stop-motion | +| `kawaii` | Japanese cute, pastels | +| `storybook-watercolor` | Soft painted, whimsical | +| `chalkboard` | Chalk on black board | +| `cyberpunk-neon` | Neon glow, futuristic | +| `bold-graphic` | Comic style, halftone | +| `aged-academia` | Vintage science, sepia | +| `corporate-memphis` | Flat vector, vibrant | +| `technical-schematic` | Blueprint, engineering | +| `origami` | Folded paper, geometric | +| `pixel-art` | Retro 8-bit | +| `ui-wireframe` | Grayscale interface mockup | +| `subway-map` | Transit diagram | +| `ikea-manual` | Minimal line art | +| `knolling` | Organized flat-lay | +| `lego-brick` | Toy brick construction | +| `pop-laboratory` | Blueprint grid, coordinate markers, lab precision | +| `morandi-journal` | Hand-drawn doodle, warm Morandi tones | +| `retro-pop-grid` | 1970s retro pop art, Swiss grid, thick outlines | +| `hand-drawn-edu` | Macaron pastels, hand-drawn wobble, stick figures | + +Full definitions: `references/styles/