diff --git a/.github/actions/nix-setup/action.yml b/.github/actions/nix-setup/action.yml new file mode 100644 index 0000000000..0fcd7784bc --- /dev/null +++ b/.github/actions/nix-setup/action.yml @@ -0,0 +1,8 @@ +name: 'Setup Nix' +description: 'Install Nix with DeterminateSystems and enable magic-nix-cache' + +runs: + using: composite + steps: + - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 + - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13 diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index f9e846e68c..228ee33964 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -3,8 +3,13 @@ name: Docker Build and Publish on: push: branches: [main] - pull_request: - branches: [main] + paths: + - '**/*.py' + - 'pyproject.toml' + - 'uv.lock' + - 'Dockerfile' + - 'docker/**' + - '.github/workflows/docker-publish.yml' release: types: [published] @@ -49,6 +54,14 @@ jobs: - name: Test image starts run: | + # The image runs as the hermes user (UID 10000). GitHub Actions + # creates /tmp/hermes-test root-owned by default, which hermes + # can't write to — chown it to match the in-container UID before + # bind-mounting. Real users doing `docker run -v ~/.hermes:...` + # with their own UID hit the same issue and have their own + # remediations (HERMES_UID env var, or chown locally). + mkdir -p /tmp/hermes-test + sudo chown -R 10000:10000 /tmp/hermes-test docker run --rm \ -v /tmp/hermes-test:/opt/data \ --entrypoint /opt/hermes/docker/entrypoint.sh \ diff --git a/.github/workflows/nix-lockfile-check.yml b/.github/workflows/nix-lockfile-check.yml new file mode 100644 index 0000000000..9c9bc734a6 --- /dev/null +++ b/.github/workflows/nix-lockfile-check.yml @@ -0,0 +1,68 @@ +name: Nix Lockfile Check + +on: + pull_request: + workflow_dispatch: + +permissions: + contents: read + pull-requests: write + +concurrency: + group: nix-lockfile-check-${{ github.ref }} + cancel-in-progress: true + +jobs: + check: + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - uses: ./.github/actions/nix-setup + + - name: Resolve head SHA + id: sha + shell: bash + run: | + FULL="${{ github.event.pull_request.head.sha || github.sha }}" + echo "full=$FULL" >> "$GITHUB_OUTPUT" + echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT" + + - name: Check lockfile hashes + id: check + continue-on-error: true + env: + LINK_SHA: ${{ steps.sha.outputs.full }} + run: nix run .#fix-lockfiles -- --check + + - name: Post sticky PR comment (stale) + if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1 + with: + header: nix-lockfile-check + message: | + ### ⚠️ npm lockfile hash out of date + + Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time). + + The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`: + + ${{ steps.check.outputs.report }} + + #### Apply the fix + + - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch + - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`) + - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff + + - name: Clear sticky PR comment (resolved) + if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request' + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1 + with: + header: nix-lockfile-check + delete: true + + - name: Fail if stale + if: steps.check.outputs.stale == 'true' + run: exit 1 diff --git a/.github/workflows/nix-lockfile-fix.yml b/.github/workflows/nix-lockfile-fix.yml new file mode 100644 index 0000000000..a1c7dd6e5c --- /dev/null +++ b/.github/workflows/nix-lockfile-fix.yml @@ -0,0 +1,149 @@ +name: Nix Lockfile Fix + +on: + workflow_dispatch: + inputs: + pr_number: + description: 'PR number to fix (leave empty to run on the selected branch)' + required: false + type: string + issue_comment: + types: [edited] + +permissions: + contents: write + pull-requests: write + +concurrency: + group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }} + cancel-in-progress: false + +jobs: + fix: + # Run on manual dispatch OR when a task-list checkbox in the sticky + # lockfile-check comment flips from `[ ]` to `[x]`. + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'issue_comment' + && github.event.issue.pull_request != null + && contains(github.event.comment.body, '[x] **Apply lockfile fix**') + && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**')) + runs-on: ubuntu-latest + timeout-minutes: 25 + steps: + - name: Authorize & resolve PR + id: resolve + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + // 1. Verify the actor has write access — applies to both checkbox + // clicks and manual dispatch. + const { data: perm } = + await github.rest.repos.getCollaboratorPermissionLevel({ + owner: context.repo.owner, + repo: context.repo.repo, + username: context.actor, + }); + if (!['admin', 'write', 'maintain'].includes(perm.permission)) { + core.setFailed( + `${context.actor} lacks write access (has: ${perm.permission})` + ); + return; + } + + // 2. Resolve which ref to check out. + let prNumber = ''; + if (context.eventName === 'issue_comment') { + prNumber = String(context.payload.issue.number); + } else if (context.eventName === 'workflow_dispatch') { + prNumber = context.payload.inputs.pr_number || ''; + } + + if (!prNumber) { + core.setOutput('ref', context.ref.replace(/^refs\/heads\//, '')); + core.setOutput('repo', context.repo.repo); + core.setOutput('owner', context.repo.owner); + core.setOutput('pr', ''); + return; + } + + const { data: pr } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: Number(prNumber), + }); + core.setOutput('ref', pr.head.ref); + core.setOutput('repo', pr.head.repo.name); + core.setOutput('owner', pr.head.repo.owner.login); + core.setOutput('pr', String(pr.number)); + + # Wipe the sticky lockfile-check comment to a "running" state as soon + # as the job is authorized, so the user sees their click was picked up + # before the ~minute of nix build work. + - name: Mark sticky as running + if: steps.resolve.outputs.pr != '' + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1 + with: + header: nix-lockfile-check + number: ${{ steps.resolve.outputs.pr }} + message: | + ### 🔄 Applying lockfile fix… + + Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}). + + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }} + ref: ${{ steps.resolve.outputs.ref }} + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 + + - uses: ./.github/actions/nix-setup + + - name: Apply lockfile hashes + id: apply + run: nix run .#fix-lockfiles -- --apply + + - name: Commit & push + if: steps.apply.outputs.changed == 'true' + shell: bash + run: | + set -euo pipefail + git config user.name 'github-actions[bot]' + git config user.email '41898282+github-actions[bot]@users.noreply.github.com' + git add nix/tui.nix nix/web.nix + git commit -m "fix(nix): refresh npm lockfile hashes" + git push + + - name: Update sticky (applied) + if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != '' + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1 + with: + header: nix-lockfile-check + number: ${{ steps.resolve.outputs.pr }} + message: | + ### ✅ Lockfile fix applied + + Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}). + + - name: Update sticky (already current) + if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != '' + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1 + with: + header: nix-lockfile-check + number: ${{ steps.resolve.outputs.pr }} + message: | + ### ✅ Lockfile hashes already current + + Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}). + + - name: Update sticky (failed) + if: failure() && steps.resolve.outputs.pr != '' + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1 + with: + header: nix-lockfile-check + number: ${{ steps.resolve.outputs.pr }} + message: | + ### ❌ Lockfile fix failed + + See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs. diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 387c9e5d13..7cae6f8151 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -4,15 +4,6 @@ on: push: branches: [main] pull_request: - paths: - - 'flake.nix' - - 'flake.lock' - - 'nix/**' - - 'pyproject.toml' - - 'uv.lock' - - 'hermes_cli/**' - - 'run_agent.py' - - 'acp_adapter/**' permissions: contents: read @@ -29,9 +20,8 @@ jobs: runs-on: ${{ matrix.os }} timeout-minutes: 30 steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13 + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: ./.github/actions/nix-setup - name: Check flake if: runner.os == 'Linux' run: nix flake check --print-build-logs diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml index 4aa0fd321a..417e7b21f8 100644 --- a/.github/workflows/supply-chain-audit.yml +++ b/.github/workflows/supply-chain-audit.yml @@ -3,14 +3,31 @@ name: Supply Chain Audit on: pull_request: types: [opened, synchronize, reopened] + paths: + - '**/*.py' + - '**/*.pth' + - '**/setup.py' + - '**/setup.cfg' + - '**/sitecustomize.py' + - '**/usercustomize.py' + - '**/__init__.pth' permissions: pull-requests: write contents: read +# Narrow, high-signal scanner. Only fires on critical indicators of supply +# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics +# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits, +# Actions version unpinning, outbound POST/PUT) were intentionally +# removed — they fired on nearly every PR and trained reviewers to ignore +# the scanner. Keep this file's checks ruthlessly narrow: if you find +# yourself adding WARNING-tier patterns here again, make a separate +# advisory-only workflow instead. + jobs: scan: - name: Scan PR for supply chain risks + name: Scan PR for critical supply chain risks runs-on: ubuntu-latest steps: - name: Checkout @@ -18,7 +35,7 @@ jobs: with: fetch-depth: 0 - - name: Scan diff for suspicious patterns + - name: Scan diff for critical patterns id: scan env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -28,19 +45,19 @@ jobs: BASE="${{ github.event.pull_request.base.sha }}" HEAD="${{ github.event.pull_request.head.sha }}" - # Get the full diff (added lines only) + # Added lines only, excluding lockfiles. DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true) FINDINGS="" - CRITICAL=false # --- .pth files (auto-execute on Python startup) --- + # The exact mechanism used in the litellm supply chain attack: + # https://github.com/BerriAI/litellm/issues/24512 PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true) if [ -n "$PTH_FILES" ]; then - CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: .pth file added or modified - Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512). + Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. **Files:** \`\`\` @@ -49,13 +66,12 @@ jobs: " fi - # --- base64 + exec/eval combo (the litellm attack pattern) --- + # --- base64 decode + exec/eval on the same line (the litellm attack pattern) --- B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true) if [ -n "$B64_EXEC_HITS" ]; then - CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: base64 decode + exec/eval combo - This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads. + Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads. **Matches:** \`\`\` @@ -64,41 +80,12 @@ jobs: " fi - # --- base64 decode/encode (alone — legitimate uses exist) --- - B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true) - if [ -n "$B64_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: base64 encoding/decoding detected - Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate. - - **Matches (first 20):** - \`\`\` - ${B64_HITS} - \`\`\` - " - fi - - # --- exec/eval with string arguments --- - EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true) - if [ -n "$EXEC_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: exec() or eval() usage - Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches. - - **Matches (first 20):** - \`\`\` - ${EXEC_HITS} - \`\`\` - " - fi - - # --- subprocess with encoded/obfuscated commands --- - PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true) + # --- subprocess with encoded/obfuscated command argument --- + PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true) if [ -n "$PROC_HITS" ]; then - CRITICAL=true FINDINGS="${FINDINGS} ### 🚨 CRITICAL: subprocess with encoded/obfuscated command - Subprocess calls with encoded arguments are a strong indicator of payload execution. + Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution. **Matches:** \`\`\` @@ -107,25 +94,12 @@ jobs: " fi - # --- Network calls to non-standard domains --- - EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true) - if [ -n "$EXFIL_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Outbound network calls (POST/PUT) - Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate. - - **Matches (first 10):** - \`\`\` - ${EXFIL_HITS} - \`\`\` - " - fi - - # --- setup.py / setup.cfg install hooks --- - SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true) + # --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) --- + # These execute during pip install or interpreter startup. + SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true) if [ -n "$SETUP_HITS" ]; then FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Install hook files modified + ### 🚨 CRITICAL: Install-hook file added or modified These files can execute code during package installation or interpreter startup. **Files:** @@ -135,114 +109,31 @@ jobs: " fi - # --- Compile/marshal/pickle (code object injection) --- - MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true) - if [ -n "$MARSHAL_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: marshal/pickle/compile usage - These can deserialize or construct executable code objects. - - **Matches:** - \`\`\` - ${MARSHAL_HITS} - \`\`\` - " - fi - - # --- CI/CD workflow files modified --- - WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true) - if [ -n "$WORKFLOW_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: CI/CD workflow files modified - Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added. - - **Files:** - \`\`\` - ${WORKFLOW_HITS} - \`\`\` - " - fi - - # --- Dockerfile / container build files modified --- - DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true) - if [ -n "$DOCKER_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Container build files modified - Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands. - - **Files:** - \`\`\` - ${DOCKER_HITS} - \`\`\` - " - fi - - # --- Dependency manifest files modified --- - DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true) - if [ -n "$DEP_HITS" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: Dependency manifest files modified - Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources. - - **Files:** - \`\`\` - ${DEP_HITS} - \`\`\` - " - fi - - # --- GitHub Actions version unpinning (mutable tags instead of SHAs) --- - ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true) - if [ -n "$ACTIONS_UNPIN" ]; then - FINDINGS="${FINDINGS} - ### ⚠️ WARNING: GitHub Actions with mutable version tags - Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised. - - **Matches:** - \`\`\` - ${ACTIONS_UNPIN} - \`\`\` - " - fi - - # --- Output results --- if [ -n "$FINDINGS" ]; then echo "found=true" >> "$GITHUB_OUTPUT" - if [ "$CRITICAL" = true ]; then - echo "critical=true" >> "$GITHUB_OUTPUT" - else - echo "critical=false" >> "$GITHUB_OUTPUT" - fi - # Write findings to a file (multiline env vars are fragile) echo "$FINDINGS" > /tmp/findings.md else echo "found=false" >> "$GITHUB_OUTPUT" - echo "critical=false" >> "$GITHUB_OUTPUT" fi - - name: Post warning comment + - name: Post critical finding comment if: steps.scan.outputs.found == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - SEVERITY="⚠️ Supply Chain Risk Detected" - if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then - SEVERITY="🚨 CRITICAL Supply Chain Risk Detected" - fi + BODY="## 🚨 CRITICAL Supply Chain Risk Detected - BODY="## ${SEVERITY} - - This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging. + This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging. $(cat /tmp/findings.md) --- - *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*" + *Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*" gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)" - name: Fail on critical findings - if: steps.scan.outputs.critical == 'true' + if: steps.scan.outputs.found == 'true' run: | echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details." exit 1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7d0822690a..a92afdfa40 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,8 +3,14 @@ name: Tests on: push: branches: [main] + paths-ignore: + - '**/*.md' + - 'docs/**' pull_request: branches: [main] + paths-ignore: + - '**/*.md' + - 'docs/**' permissions: contents: read @@ -17,7 +23,7 @@ concurrency: jobs: test: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 20 steps: - name: Checkout code uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 diff --git a/.gitignore b/.gitignore index e516d154f3..8b455cf506 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,11 @@ environments/benchmarks/evals/ # Web UI build output hermes_cli/web_dist/ +# Web UI assets — synced from @nous-research/ui at build time via +# `npm run sync-assets` (see web/package.json). +web/public/fonts/ +web/public/ds-assets/ + # Release script temp files .release_notes.md mini-swe-agent/ diff --git a/AGENTS.md b/AGENTS.md index 8bd979b058..0f5ce15f28 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4 Worker count above 4 will surface test-ordering flakes that CI never sees. Always run the full suite before pushing changes. + +### Don't write change-detector tests + +A test is a **change-detector** if it fails whenever data that is **expected +to change** gets updated — model catalogs, config version numbers, +enumeration counts, hardcoded lists of provider models. These tests add no +behavioral coverage; they just guarantee that routine source updates break +CI and cost engineering time to "fix." + +**Do not write:** + +```python +# catalog snapshot — breaks every model release +assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"] +assert "MiniMax-M2.7" in models + +# config version literal — breaks every schema bump +assert DEFAULT_CONFIG["_config_version"] == 21 + +# enumeration count — breaks every time a skill/provider is added +assert len(_PROVIDER_MODELS["huggingface"]) == 8 +``` + +**Do write:** + +```python +# behavior: does the catalog plumbing work at all? +assert "gemini" in _PROVIDER_MODELS +assert len(_PROVIDER_MODELS["gemini"]) >= 1 + +# behavior: does migration bump the user's version to current latest? +assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] + +# invariant: no plan-only model leaks into the legacy list +assert not (set(moonshot_models) & coding_plan_only_models) + +# invariant: every model in the catalog has a context-length entry +for m in _PROVIDER_MODELS["huggingface"]: + assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER +``` + +The rule: if the test reads like a snapshot of current data, delete it. If +it reads like a contract about how two pieces of data must relate, keep it. +When a PR adds a new provider/model and you want a test, make the test +assert the relationship (e.g. "catalog entries all have context lengths"), +not the specific names. + +Reviewers should reject new change-detector tests; authors should convert +them into invariants before re-requesting review. diff --git a/Dockerfile b/Dockerfile index 0d3da72eb7..a684f9fb31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,12 +27,10 @@ WORKDIR /opt/hermes # Copy only package manifests first so npm install + Playwright are cached # unless the lockfiles themselves change. COPY package.json package-lock.json ./ -COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/ COPY web/package.json web/package-lock.json web/ RUN npm install --prefer-offline --no-audit && \ npx playwright install --with-deps chromium --only-shell && \ - (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \ (cd web && npm install --prefer-offline --no-audit) && \ npm cache clean --force diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py index 7db5747a4d..3089f78c27 100644 --- a/acp_adapter/entry.py +++ b/acp_adapter/entry.py @@ -20,6 +20,46 @@ from pathlib import Path from hermes_constants import get_hermes_home +# Methods clients send as periodic liveness probes. They are not part of the +# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the +# caller — but the supervisor task that dispatches the request then surfaces +# the raised RequestError via ``logging.exception("Background task failed")``, +# which dumps a traceback to stderr every probe interval. Clients like +# acp-bridge already treat the -32601 response as "agent alive", so the +# traceback is pure noise. We keep the protocol response intact and only +# silence the stderr noise for this specific benign case. +_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"}) + + +class _BenignProbeMethodFilter(logging.Filter): + """Suppress acp 'Background task failed' tracebacks caused by unknown + liveness-probe methods (e.g. ``ping``) while leaving every other + background-task error — including method_not_found for any non-probe + method — visible in stderr. + """ + + def filter(self, record: logging.LogRecord) -> bool: + if record.getMessage() != "Background task failed": + return True + exc_info = record.exc_info + if not exc_info: + return True + exc = exc_info[1] + # Imported lazily so this module stays importable when the optional + # ``agent-client-protocol`` dependency is not installed. + try: + from acp.exceptions import RequestError + except ImportError: + return True + if not isinstance(exc, RequestError): + return True + if getattr(exc, "code", None) != -32601: + return True + data = getattr(exc, "data", None) + method = data.get("method") if isinstance(data, dict) else None + return method not in _BENIGN_PROBE_METHODS + + def _setup_logging() -> None: """Route all logging to stderr so stdout stays clean for ACP stdio.""" handler = logging.StreamHandler(sys.stderr) @@ -29,6 +69,7 @@ def _setup_logging() -> None: datefmt="%Y-%m-%d %H:%M:%S", ) ) + handler.addFilter(_BenignProbeMethodFilter()) root = logging.getLogger() root.handlers.clear() root.addHandler(handler) diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py index 68f61e340a..c2e1a59826 100644 --- a/acp_adapter/permissions.py +++ b/acp_adapter/permissions.py @@ -63,6 +63,9 @@ def make_approval_callback( logger.warning("Permission request timed out or failed: %s", exc) return "deny" + if response is None: + return "deny" + outcome = response.outcome if isinstance(outcome, AllowedOutcome): option_id = outcome.option_id diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 4685a68a8c..d73c71157a 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -4,6 +4,7 @@ from __future__ import annotations import asyncio import logging +import os from collections import defaultdict, deque from concurrent.futures import ThreadPoolExecutor from typing import Any, Deque, Optional @@ -51,7 +52,7 @@ try: except ImportError: from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined] -from acp_adapter.auth import detect_provider, has_provider +from acp_adapter.auth import detect_provider from acp_adapter.events import ( make_message_cb, make_step_cb, @@ -71,6 +72,11 @@ except Exception: # Thread pool for running AIAgent (synchronous) in parallel. _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent") +# Server-side page size for list_sessions. The ACP ListSessionsRequest schema +# does not expose a client-side limit, so this is a fixed cap that clients +# paginate against using `cursor` / `next_cursor`. +_LIST_SESSIONS_PAGE_SIZE = 50 + def _extract_text( prompt: list[ @@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent): ) async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None: - if has_provider(): - return AuthenticateResponse() - return None + # Only accept authenticate() calls whose method_id matches the + # provider we advertised in initialize(). Without this check, + # authenticate() would acknowledge any method_id as long as the + # server has provider credentials configured — harmless under + # Hermes' threat model (ACP is stdio-only, local-trust), but poor + # API hygiene and confusing if ACP ever grows multi-method auth. + provider = detect_provider() + if not provider: + return None + if not isinstance(method_id, str) or method_id.strip().lower() != provider: + return None + return AuthenticateResponse() # ---- Session management ------------------------------------------------- @@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent): cwd: str | None = None, **kwargs: Any, ) -> ListSessionsResponse: + """List ACP sessions with optional ``cwd`` filtering and cursor pagination. + + ``cwd`` is passed through to ``SessionManager.list_sessions`` which already + normalizes and filters by working directory. ``cursor`` is a ``session_id`` + previously returned as ``next_cursor``; results resume after that entry. + Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more + results remain, ``next_cursor`` is set to the last returned ``session_id``. + """ infos = self.session_manager.list_sessions(cwd=cwd) + + if cursor: + for idx, s in enumerate(infos): + if s["session_id"] == cursor: + infos = infos[idx + 1:] + break + else: + # Unknown cursor -> empty page (do not fall back to full list). + infos = [] + + has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE + infos = infos[:_LIST_SESSIONS_PAGE_SIZE] + sessions = [] for s in infos: updated_at = s.get("updated_at") @@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent): updated_at=updated_at, ) ) - return ListSessionsResponse(sessions=sessions) + + next_cursor = sessions[-1].session_id if has_more and sessions else None + return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor) # ---- Prompt (core) ------------------------------------------------------ @@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent): agent.step_callback = step_cb agent.message_callback = message_cb - if approval_cb: - try: - from tools import terminal_tool as _terminal_tool - previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None) - _terminal_tool.set_approval_callback(approval_cb) - except Exception: - logger.debug("Could not set ACP approval callback", exc_info=True) + # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr). + # Set it INSIDE _run_agent so the TLS write happens in the executor + # thread — setting it here would write to the event-loop thread's TLS, + # not the executor's. Also set HERMES_INTERACTIVE so approval.py + # takes the CLI-interactive path (which calls the registered + # callback via prompt_dangerous_approval) instead of the + # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff). + # ACP's conn.request_permission maps cleanly to the interactive + # callback shape — not the gateway-queue HERMES_EXEC_ASK path, + # which requires a notify_cb registered in _gateway_notify_cbs. + previous_approval_cb = None + previous_interactive = None def _run_agent() -> dict: + nonlocal previous_approval_cb, previous_interactive + if approval_cb: + try: + from tools import terminal_tool as _terminal_tool + previous_approval_cb = _terminal_tool._get_approval_callback() + _terminal_tool.set_approval_callback(approval_cb) + except Exception: + logger.debug("Could not set ACP approval callback", exc_info=True) + # Signal to tools.approval that we have an interactive callback + # and the non-interactive auto-approve path must not fire. + previous_interactive = os.environ.get("HERMES_INTERACTIVE") + os.environ["HERMES_INTERACTIVE"] = "1" try: result = agent.run_conversation( user_message=user_text, @@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent): logger.exception("Agent error in session %s", session_id) return {"final_response": f"Error: {e}", "messages": state.history} finally: + # Restore HERMES_INTERACTIVE. + if previous_interactive is None: + os.environ.pop("HERMES_INTERACTIVE", None) + else: + os.environ["HERMES_INTERACTIVE"] = previous_interactive if approval_cb: try: from tools import terminal_tool as _terminal_tool @@ -613,8 +673,8 @@ class HermesACPAgent(acp.Agent): await self._conn.session_update( session_id=session_id, update=AvailableCommandsUpdate( - sessionUpdate="available_commands_update", - availableCommands=self._available_commands(), + session_update="available_commands_update", + available_commands=self._available_commands(), ), ) except Exception: diff --git a/agent/account_usage.py b/agent/account_usage.py new file mode 100644 index 0000000000..0e9562dcc9 --- /dev/null +++ b/agent/account_usage.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Optional + +import httpx + +from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token +from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials +from hermes_cli.runtime_provider import resolve_runtime_provider + + +def _utc_now() -> datetime: + return datetime.now(timezone.utc) + + +@dataclass(frozen=True) +class AccountUsageWindow: + label: str + used_percent: Optional[float] = None + reset_at: Optional[datetime] = None + detail: Optional[str] = None + + +@dataclass(frozen=True) +class AccountUsageSnapshot: + provider: str + source: str + fetched_at: datetime + title: str = "Account limits" + plan: Optional[str] = None + windows: tuple[AccountUsageWindow, ...] = () + details: tuple[str, ...] = () + unavailable_reason: Optional[str] = None + + @property + def available(self) -> bool: + return bool(self.windows or self.details) and not self.unavailable_reason + + +def _title_case_slug(value: Optional[str]) -> Optional[str]: + cleaned = str(value or "").strip() + if not cleaned: + return None + return cleaned.replace("_", " ").replace("-", " ").title() + + +def _parse_dt(value: Any) -> Optional[datetime]: + if value in (None, ""): + return None + if isinstance(value, (int, float)): + return datetime.fromtimestamp(float(value), tz=timezone.utc) + if isinstance(value, str): + text = value.strip() + if not text: + return None + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + dt = datetime.fromisoformat(text) + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) + except ValueError: + return None + return None + + +def _format_reset(dt: Optional[datetime]) -> str: + if not dt: + return "unknown" + local_dt = dt.astimezone() + delta = dt - _utc_now() + total_seconds = int(delta.total_seconds()) + if total_seconds <= 0: + return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})" + hours, rem = divmod(total_seconds, 3600) + minutes = rem // 60 + if hours >= 24: + days, hours = divmod(hours, 24) + rel = f"in {days}d {hours}h" + elif hours > 0: + rel = f"in {hours}h {minutes}m" + else: + rel = f"in {minutes}m" + return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})" + + +def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]: + if not snapshot: + return [] + header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}" + lines = [header] + if snapshot.plan: + lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})") + else: + lines.append(f"Provider: {snapshot.provider}") + for window in snapshot.windows: + if window.used_percent is None: + base = f"{window.label}: unavailable" + else: + remaining = max(0, round(100 - float(window.used_percent))) + used = max(0, round(float(window.used_percent))) + base = f"{window.label}: {remaining}% remaining ({used}% used)" + if window.reset_at: + base += f" • resets {_format_reset(window.reset_at)}" + elif window.detail: + base += f" • {window.detail}" + lines.append(base) + for detail in snapshot.details: + lines.append(detail) + if snapshot.unavailable_reason: + lines.append(f"Unavailable: {snapshot.unavailable_reason}") + return lines + + +def _resolve_codex_usage_url(base_url: str) -> str: + normalized = (base_url or "").strip().rstrip("/") + if not normalized: + normalized = "https://chatgpt.com/backend-api/codex" + if normalized.endswith("/codex"): + normalized = normalized[: -len("/codex")] + if "/backend-api" in normalized: + return normalized + "/wham/usage" + return normalized + "/api/codex/usage" + + +def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]: + creds = resolve_codex_runtime_credentials(refresh_if_expiring=True) + token_data = _read_codex_tokens() + tokens = token_data.get("tokens") or {} + account_id = str(tokens.get("account_id", "") or "").strip() or None + headers = { + "Authorization": f"Bearer {creds['api_key']}", + "Accept": "application/json", + "User-Agent": "codex-cli", + } + if account_id: + headers["ChatGPT-Account-Id"] = account_id + with httpx.Client(timeout=15.0) as client: + response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers) + response.raise_for_status() + payload = response.json() or {} + rate_limit = payload.get("rate_limit") or {} + windows: list[AccountUsageWindow] = [] + for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")): + window = rate_limit.get(key) or {} + used = window.get("used_percent") + if used is None: + continue + windows.append( + AccountUsageWindow( + label=label, + used_percent=float(used), + reset_at=_parse_dt(window.get("reset_at")), + ) + ) + details: list[str] = [] + credits = payload.get("credits") or {} + if credits.get("has_credits"): + balance = credits.get("balance") + if isinstance(balance, (int, float)): + details.append(f"Credits balance: ${float(balance):.2f}") + elif credits.get("unlimited"): + details.append("Credits balance: unlimited") + return AccountUsageSnapshot( + provider="openai-codex", + source="usage_api", + fetched_at=_utc_now(), + plan=_title_case_slug(payload.get("plan_type")), + windows=tuple(windows), + details=tuple(details), + ) + + +def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]: + token = (resolve_anthropic_token() or "").strip() + if not token: + return None + if not _is_oauth_token(token): + return AccountUsageSnapshot( + provider="anthropic", + source="oauth_usage_api", + fetched_at=_utc_now(), + unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.", + ) + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + "Content-Type": "application/json", + "anthropic-beta": "oauth-2025-04-20", + "User-Agent": "claude-code/2.1.0", + } + with httpx.Client(timeout=15.0) as client: + response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers) + response.raise_for_status() + payload = response.json() or {} + windows: list[AccountUsageWindow] = [] + mapping = ( + ("five_hour", "Current session"), + ("seven_day", "Current week"), + ("seven_day_opus", "Opus week"), + ("seven_day_sonnet", "Sonnet week"), + ) + for key, label in mapping: + window = payload.get(key) or {} + util = window.get("utilization") + if util is None: + continue + used = float(util) * 100 if float(util) <= 1 else float(util) + windows.append( + AccountUsageWindow( + label=label, + used_percent=used, + reset_at=_parse_dt(window.get("resets_at")), + ) + ) + details: list[str] = [] + extra = payload.get("extra_usage") or {} + if extra.get("is_enabled"): + used_credits = extra.get("used_credits") + monthly_limit = extra.get("monthly_limit") + currency = extra.get("currency") or "USD" + if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)): + details.append( + f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}" + ) + return AccountUsageSnapshot( + provider="anthropic", + source="oauth_usage_api", + fetched_at=_utc_now(), + windows=tuple(windows), + details=tuple(details), + ) + + +def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]: + runtime = resolve_runtime_provider( + requested="openrouter", + explicit_base_url=base_url, + explicit_api_key=api_key, + ) + token = str(runtime.get("api_key", "") or "").strip() + if not token: + return None + normalized = str(runtime.get("base_url", "") or "").rstrip("/") + credits_url = f"{normalized}/credits" + key_url = f"{normalized}/key" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + } + with httpx.Client(timeout=10.0) as client: + credits_resp = client.get(credits_url, headers=headers) + credits_resp.raise_for_status() + credits = (credits_resp.json() or {}).get("data") or {} + try: + key_resp = client.get(key_url, headers=headers) + key_resp.raise_for_status() + key_data = (key_resp.json() or {}).get("data") or {} + except Exception: + key_data = {} + total_credits = float(credits.get("total_credits") or 0.0) + total_usage = float(credits.get("total_usage") or 0.0) + details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"] + windows: list[AccountUsageWindow] = [] + limit = key_data.get("limit") + limit_remaining = key_data.get("limit_remaining") + limit_reset = str(key_data.get("limit_reset") or "").strip() + usage = key_data.get("usage") + if ( + isinstance(limit, (int, float)) + and float(limit) > 0 + and isinstance(limit_remaining, (int, float)) + and 0 <= float(limit_remaining) <= float(limit) + ): + limit_value = float(limit) + remaining_value = float(limit_remaining) + used_percent = ((limit_value - remaining_value) / limit_value) * 100 + detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"] + if limit_reset: + detail_parts.append(f"resets {limit_reset}") + windows.append( + AccountUsageWindow( + label="API key quota", + used_percent=used_percent, + detail=" • ".join(detail_parts), + ) + ) + if isinstance(usage, (int, float)): + usage_parts = [f"API key usage: ${float(usage):.2f} total"] + for value, label in ( + (key_data.get("usage_daily"), "today"), + (key_data.get("usage_weekly"), "this week"), + (key_data.get("usage_monthly"), "this month"), + ): + if isinstance(value, (int, float)) and float(value) > 0: + usage_parts.append(f"${float(value):.2f} {label}") + details.append(" • ".join(usage_parts)) + return AccountUsageSnapshot( + provider="openrouter", + source="credits_api", + fetched_at=_utc_now(), + windows=tuple(windows), + details=tuple(details), + ) + + +def fetch_account_usage( + provider: Optional[str], + *, + base_url: Optional[str] = None, + api_key: Optional[str] = None, +) -> Optional[AccountUsageSnapshot]: + normalized = str(provider or "").strip().lower() + if normalized in {"", "auto", "custom"}: + return None + try: + if normalized == "openai-codex": + return _fetch_codex_account_usage() + if normalized == "anthropic": + return _fetch_anthropic_account_usage() + if normalized == "openrouter": + return _fetch_openrouter_account_usage(base_url, api_key) + except Exception: + return None + return None diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 64b9522517..5e36b1f37e 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -19,6 +19,7 @@ from pathlib import Path from hermes_constants import get_hermes_home from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple +from utils import normalize_proxy_env_vars try: import anthropic as _anthropic_sdk @@ -265,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool: return True # Any other endpoint is a third-party proxy +def _is_kimi_coding_endpoint(base_url: str | None) -> bool: + """Return True for Kimi's /coding endpoint that requires claude-code UA.""" + normalized = _normalize_base_url_text(base_url) + if not normalized: + return False + return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding") + + def _requires_bearer_auth(base_url: str | None) -> bool: """Return True for Anthropic-compatible providers that require Bearer auth. @@ -292,9 +301,15 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]: return _COMMON_BETAS -def build_anthropic_client(api_key: str, base_url: str = None): +def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None): """Create an Anthropic client, auto-detecting setup-tokens vs API keys. + If *timeout* is provided it overrides the default 900s read timeout. The + connect timeout stays at 10s. Callers pass this from the per-provider / + per-model ``request_timeout_seconds`` config so Anthropic-native and + Anthropic-compatible providers respect the same knob as OpenAI-wire + providers. + Returns an anthropic.Anthropic instance. """ if _anthropic_sdk is None: @@ -302,19 +317,32 @@ def build_anthropic_client(api_key: str, base_url: str = None): "The 'anthropic' package is required for the Anthropic provider. " "Install it with: pip install 'anthropic>=0.39.0'" ) + + normalize_proxy_env_vars() + from httpx import Timeout normalized_base_url = _normalize_base_url_text(base_url) + _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0 kwargs = { - "timeout": Timeout(timeout=900.0, connect=10.0), + "timeout": Timeout(timeout=float(_read_timeout), connect=10.0), } if normalized_base_url: kwargs["base_url"] = normalized_base_url common_betas = _common_betas_for_base_url(normalized_base_url) - if _requires_bearer_auth(normalized_base_url): + if _is_kimi_coding_endpoint(base_url): + # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0 + # to be recognized as a valid Coding Agent. Without it, returns 403. + # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding. + kwargs["api_key"] = api_key + kwargs["default_headers"] = { + "User-Agent": "claude-code/0.1.0", + **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} ) + } + elif _requires_bearer_auth(normalized_base_url): # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in - # Authorization: Bearer even for regular API keys. Route those endpoints + # Authorization: Bearer *** for regular API keys. Route those endpoints # through auth_token so the SDK sends Bearer auth instead of x-api-key. # Check this before OAuth token shape detection because MiniMax secrets do # not use Anthropic's sk-ant-api prefix and would otherwise be misread as @@ -1055,6 +1083,31 @@ def convert_messages_to_anthropic( "name": fn.get("name", ""), "input": parsed_args, }) + # Kimi's /coding endpoint (Anthropic protocol) requires assistant + # tool-call messages to carry reasoning_content when thinking is + # enabled server-side. Preserve it as a thinking block so Kimi + # can validate the message history. See hermes-agent#13848. + # + # Accept empty string "" — _copy_reasoning_content_for_api() + # injects "" as a tier-3 fallback for Kimi tool-call messages + # that had no reasoning. Kimi requires the field to exist, even + # if empty. + # + # Prepend (not append): Anthropic protocol requires thinking + # blocks before text and tool_use blocks. + # + # Guard: only add when reasoning_details didn't already contribute + # thinking blocks. On native Anthropic, reasoning_details produces + # signed thinking blocks — adding another unsigned one from + # reasoning_content would create a duplicate (same text) that gets + # downgraded to a spurious text block on the last assistant message. + reasoning_content = m.get("reasoning_content") + _already_has_thinking = any( + isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking") + for b in blocks + ) + if isinstance(reasoning_content, str) and not _already_has_thinking: + blocks.insert(0, {"type": "thinking", "thinking": reasoning_content}) # Anthropic rejects empty assistant content effective = blocks or content if not effective or effective == "": @@ -1210,6 +1263,7 @@ def convert_messages_to_anthropic( # cache markers can interfere with signature validation. _THINKING_TYPES = frozenset(("thinking", "redacted_thinking")) _is_third_party = _is_third_party_anthropic_endpoint(base_url) + _is_kimi = _is_kimi_coding_endpoint(base_url) last_assistant_idx = None for i in range(len(result) - 1, -1, -1): @@ -1221,7 +1275,25 @@ def convert_messages_to_anthropic( if m.get("role") != "assistant" or not isinstance(m.get("content"), list): continue - if _is_third_party or idx != last_assistant_idx: + if _is_kimi: + # Kimi's /coding endpoint enables thinking server-side and + # requires unsigned thinking blocks on replayed assistant + # tool-call messages. Strip signed Anthropic blocks (Kimi + # can't validate signatures) but preserve the unsigned ones + # we synthesised from reasoning_content above. + new_content = [] + for b in m["content"]: + if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES: + new_content.append(b) + continue + if b.get("signature") or b.get("data"): + # Anthropic-signed block — Kimi can't validate, strip + continue + # Unsigned thinking (synthesised from reasoning_content) — + # keep it: Kimi needs it for message-history validation. + new_content.append(b) + m["content"] = new_content or [{"type": "text", "text": "(empty)"}] + elif _is_third_party or idx != last_assistant_idx: # Third-party endpoint: strip ALL thinking blocks from every # assistant message — signatures are Anthropic-proprietary. # Direct Anthropic: strip from non-latest assistant messages only. @@ -1398,11 +1470,25 @@ def build_anthropic_kwargs( # MiniMax Anthropic-compat endpoints support thinking (manual mode only, # not adaptive). Haiku does NOT support extended thinking — skip entirely. # + # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has + # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi + # validates the message history and requires every prior assistant + # tool-call message to carry OpenAI-style ``reasoning_content``. The + # Anthropic path never populates that field, and + # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks + # on third-party endpoints — so the request fails with HTTP 400 + # "thinking is enabled but reasoning_content is missing in assistant + # tool call message at index N". Kimi's reasoning is driven server-side + # on the /coding route, so skip Anthropic's thinking parameter entirely + # for that host. (Kimi on chat_completions enables thinking via + # extra_body in the ChatCompletionsTransport — see #13503.) + # # On 4.7+ the `thinking.display` field defaults to "omitted", which # silently hides reasoning text that Hermes surfaces in its CLI. We # request "summarized" so the reasoning blocks stay populated — matching # 4.6 behavior and preserving the activity-feed UX during long tool runs. - if reasoning_config and isinstance(reasoning_config, dict): + _is_kimi_coding = _is_kimi_coding_endpoint(base_url) + if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding: if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): effort = str(reasoning_config.get("effort", "medium")).lower() budget = THINKING_BUDGET.get(effort, 8000) @@ -1518,3 +1604,42 @@ def normalize_anthropic_response( ), finish_reason, ) + + +def normalize_anthropic_response_v2( + response, + strip_tool_prefix: bool = False, +) -> "NormalizedResponse": + """Normalize Anthropic response to NormalizedResponse. + + Wraps the existing normalize_anthropic_response() and maps its output + to the shared transport types. This allows incremental migration — + one call site at a time — without changing the original function. + """ + from agent.transports.types import NormalizedResponse, build_tool_call + + assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix) + + tool_calls = None + if assistant_msg.tool_calls: + tool_calls = [ + build_tool_call( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + ) + for tc in assistant_msg.tool_calls + ] + + provider_data = {} + if getattr(assistant_msg, "reasoning_details", None): + provider_data["reasoning_details"] = assistant_msg.reasoning_details + + return NormalizedResponse( + content=assistant_msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=getattr(assistant_msg, "reasoning", None), + usage=None, # Anthropic usage is on the raw response, not the normaliser + provider_data=provider_data or None, + ) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 19bde946ee..4f8c9a0a46 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -48,6 +48,7 @@ from openai import OpenAI from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL +from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars logger = logging.getLogger(__name__) @@ -95,51 +96,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: return _PROVIDER_ALIASES.get(normalized, normalized) -_FIXED_TEMPERATURE_MODELS: Dict[str, float] = { - "kimi-for-coding": 0.6, -} - -# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents: -# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed -# value 0.6. Any other value will result in an error." The same lock applies -# to the other k2.* models served on that endpoint. Enumerated explicitly so -# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on -# the standard chat API and third parties) are NOT clamped. -# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart -_KIMI_INSTANT_MODELS: frozenset = frozenset({ - "kimi-k2.5", - "kimi-k2-turbo-preview", - "kimi-k2-0905-preview", -}) -_KIMI_THINKING_MODELS: frozenset = frozenset({ - "kimi-k2-thinking", - "kimi-k2-thinking-turbo", -}) +# Sentinel: when returned by _fixed_temperature_for_model(), callers must +# strip the ``temperature`` key from API kwargs entirely so the provider's +# server-side default applies. Kimi/Moonshot models manage temperature +# internally — sending *any* value (even the "correct" one) can conflict +# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6). +OMIT_TEMPERATURE: object = object() -def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]: - """Return a required temperature override for models with strict contracts. +def _is_kimi_model(model: Optional[str]) -> bool: + """True for any Kimi / Moonshot model that manages temperature server-side.""" + bare = (model or "").strip().lower().rsplit("/", 1)[-1] + return bare.startswith("kimi-") or bare == "kimi" - Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on - the k2.5 family. Non-thinking variants require exactly 0.6; thinking - variants require 1.0. An optional ``vendor/`` prefix (e.g. - ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. - Returns ``None`` for every other model, including ``kimi-k2-instruct*`` - which is the separate non-coding K2 family with variable temperature. +def _fixed_temperature_for_model( + model: Optional[str], + base_url: Optional[str] = None, +) -> "Optional[float] | object": + """Return a temperature directive for models with strict contracts. + + Returns: + ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the + provider chooses its own default. Used for all Kimi / Moonshot + models whose gateway selects temperature server-side. + ``float`` — a specific value the caller must use (reserved for future + models with fixed-temperature contracts). + ``None`` — no override; caller should use its own default. """ - normalized = (model or "").strip().lower() - fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) - if fixed is not None: - logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) - return fixed - bare = normalized.rsplit("/", 1)[-1] - if bare in _KIMI_THINKING_MODELS: - logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) - return 1.0 - if bare in _KIMI_INSTANT_MODELS: - logger.debug("Forcing temperature=0.6 for kimi instant model %r", model) - return 0.6 + if _is_kimi_model(model): + logger.debug("Omitting temperature for Kimi model %r (server-managed)", model) + return OMIT_TEMPERATURE return None # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) @@ -147,6 +134,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "gemini": "gemini-3-flash-preview", "zai": "glm-4.5-flash", "kimi-coding": "kimi-k2-turbo-preview", + "stepfun": "step-3.5-flash", "kimi-coding-cn": "kimi-k2-turbo-preview", "minimax": "MiniMax-M2.7", "minimax-cn": "MiniMax-M2.7", @@ -174,6 +162,16 @@ _OR_HEADERS = { "X-OpenRouter-Categories": "productivity,cli-agent", } +# Vercel AI Gateway app attribution headers. HTTP-Referer maps to +# referrerUrl and X-Title maps to appName in the gateway's analytics. +from hermes_cli import __version__ as _HERMES_VERSION + +_AI_GATEWAY_HEADERS = { + "HTTP-Referer": "https://hermes-agent.nousresearch.com", + "X-Title": "Hermes Agent", + "User-Agent": f"HermesAgent/{_HERMES_VERSION}", +} + # Nous Portal extra_body for product attribution. # Callers should pass this as extra_body in chat.completions.create() # when the auxiliary client is backed by Nous Portal. @@ -185,8 +183,6 @@ auxiliary_is_nous: bool = False # Default auxiliary models per provider _OPENROUTER_MODEL = "google/gemini-3-flash-preview" _NOUS_MODEL = "google/gemini-3-flash-preview" -_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni" -_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro" _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com" _AUTH_JSON_PATH = get_hermes_home() / "auth.json" @@ -200,6 +196,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex" _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" +def _codex_cloudflare_headers(access_token: str) -> Dict[str, str]: + """Headers required to avoid Cloudflare 403s on chatgpt.com/backend-api/codex. + + The Cloudflare layer in front of the Codex endpoint whitelists a small set of + first-party originators (``codex_cli_rs``, ``codex_vscode``, ``codex_sdk_ts``, + anything starting with ``Codex``). Requests from non-residential IPs (VPS, + server-hosted agents) that don't advertise an allowed originator are served + a 403 with ``cf-mitigated: challenge`` regardless of auth correctness. + + We pin ``originator: codex_cli_rs`` to match the upstream codex-rs CLI, set + ``User-Agent`` to a codex_cli_rs-shaped string (beats SDK fingerprinting), + and extract ``ChatGPT-Account-ID`` (canonical casing, from codex-rs + ``auth.rs``) out of the OAuth JWT's ``chatgpt_account_id`` claim. + + Malformed tokens are tolerated — we drop the account-ID header rather than + raise, so a bad token still surfaces as an auth error (401) instead of a + crash at client construction. + """ + headers = { + "User-Agent": "codex_cli_rs/0.0.0 (Hermes Agent)", + "originator": "codex_cli_rs", + } + if not isinstance(access_token, str) or not access_token.strip(): + return headers + try: + import base64 + parts = access_token.split(".") + if len(parts) < 2: + return headers + payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4) + claims = json.loads(base64.urlsafe_b64decode(payload_b64)) + acct_id = claims.get("https://api.openai.com/auth", {}).get("chatgpt_account_id") + if isinstance(acct_id, str) and acct_id: + headers["ChatGPT-Account-ID"] = acct_id + except Exception: + pass + return headers + + def _to_openai_base_url(base_url: str) -> str: """Normalize an Anthropic-style base URL to OpenAI-compatible format. @@ -692,6 +727,33 @@ def _nous_base_url() -> str: return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL) +def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]: + """Return fresh Nous runtime credentials when available. + + This mirrors the main agent's 401 recovery path and keeps auxiliary + clients aligned with the singleton auth store + mint flow instead of + relying only on whatever raw tokens happen to be sitting in auth.json + or the credential pool. + """ + try: + from hermes_cli.auth import resolve_nous_runtime_credentials + + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + force_mint=force_refresh, + ) + except Exception as exc: + logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc) + return None + + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip().rstrip("/") + if not api_key or not base_url: + return None + return api_key, base_url + + def _read_codex_access_token() -> Optional[str]: """Read a valid, non-expired Codex OAuth access token from Hermes auth store. @@ -775,10 +837,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if model is None: continue # skip provider if we don't know a valid aux model logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) + if provider_id == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + if is_native_gemini_base_url(base_url): + return GeminiNativeClient(api_key=api_key, base_url=base_url), model extra = {} - if "api.kimi.com" in base_url.lower(): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} - elif "api.githubcopilot.com" in base_url.lower(): + if base_url_host_matches(base_url, "api.kimi.com"): + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() @@ -796,10 +863,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if model is None: continue # skip provider if we don't know a valid aux model logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model) + if provider_id == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + if is_native_gemini_base_url(base_url): + return GeminiNativeClient(api_key=api_key, base_url=base_url), model extra = {} - if "api.kimi.com" in base_url.lower(): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} - elif "api.githubcopilot.com" in base_url.lower(): + if base_url_host_matches(base_url, "api.kimi.com"): + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() @@ -848,29 +920,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: pass nous = _read_nous_auth() - if not nous: + runtime = _resolve_nous_runtime_api(force_refresh=False) + if runtime is None and not nous: return None, None global auxiliary_is_nous auxiliary_is_nous = True logger.debug("Auxiliary client: Nous Portal") - if nous.get("source") == "pool": - model = "gemini-3-flash" - else: - model = _NOUS_MODEL - # Free-tier users can't use paid auxiliary models — use the free - # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks. + + # Ask the Portal which model it currently recommends for this task type. + # The /api/nous/recommended-models endpoint is the authoritative source: + # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model + # auto-detects the caller's tier via check_nous_free_tier(). Fall back to + # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable + # or returns a null recommendation for this task type. + model = _NOUS_MODEL try: - from hermes_cli.models import check_nous_free_tier - if check_nous_free_tier(): - model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL - logger.debug("Free-tier Nous account — using %s for auxiliary/%s", - model, "vision" if vision else "text") - except Exception: - pass + from hermes_cli.models import get_nous_recommended_aux_model + recommended = get_nous_recommended_aux_model(vision=vision) + if recommended: + model = recommended + logger.debug( + "Auxiliary/%s: using Portal-recommended model %s", + "vision" if vision else "text", model, + ) + else: + logger.debug( + "Auxiliary/%s: no Portal recommendation, falling back to %s", + "vision" if vision else "text", model, + ) + except Exception as exc: + logger.debug( + "Auxiliary/%s: recommended-models lookup failed (%s); " + "falling back to %s", + "vision" if vision else "text", exc, model, + ) + + if runtime is not None: + api_key, base_url = runtime + else: + api_key = _nous_api_key(nous or {}) + base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/") return ( OpenAI( - api_key=_nous_api_key(nous), - base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"), + api_key=api_key, + base_url=base_url, ), model, ) @@ -948,7 +1041,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st return None, None, None custom_base = custom_base.strip().rstrip("/") - if "openrouter.ai" in custom_base.lower(): + if base_url_host_matches(custom_base, "openrouter.ai"): # requested='custom' falls back to OpenRouter when no custom endpoint is # configured. Treat that as "no custom endpoint" for auxiliary routing. return None, None, None @@ -982,6 +1075,8 @@ def _validate_proxy_env_urls() -> None: """ from urllib.parse import urlparse + normalize_proxy_env_vars() + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = str(os.environ.get(key) or "").strip() @@ -1016,7 +1111,7 @@ def _validate_base_url(base_url: str) -> None: ) from exc -def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: +def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: runtime = _resolve_custom_runtime() if len(runtime) == 2: custom_base, custom_key = runtime @@ -1032,6 +1127,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: if custom_mode == "codex_responses": real_client = OpenAI(api_key=custom_key, base_url=custom_base) return CodexAuxiliaryClient(real_client, model), model + if custom_mode == "anthropic_messages": + # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM, + # LiteLLM proxies, etc.). Must NEVER be treated as OAuth — + # Anthropic OAuth claims only apply to api.anthropic.com. + try: + from agent.anthropic_adapter import build_anthropic_client + real_client = build_anthropic_client(custom_key, custom_base) + except ImportError: + logger.warning( + "Custom endpoint declares api_mode=anthropic_messages but the " + "anthropic SDK is not installed — falling back to OpenAI-wire." + ) + return OpenAI(api_key=custom_key, base_url=custom_base), model + return ( + AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False), + model, + ) return OpenAI(api_key=custom_key, base_url=custom_base), model @@ -1052,7 +1164,11 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]: return None, None base_url = _CODEX_AUX_BASE_URL logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=base_url) + real_client = OpenAI( + api_key=codex_token, + base_url=base_url, + default_headers=_codex_cloudflare_headers(codex_token), + ) return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL @@ -1191,6 +1307,15 @@ def _is_connection_error(exc: Exception) -> bool: return False +def _is_auth_error(exc: Exception) -> bool: + """Detect auth failures that should trigger provider-specific refresh.""" + status = getattr(exc, "status_code", None) + if status == 401: + return True + err_lower = str(exc).lower() + return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -1348,6 +1473,13 @@ def _to_async_client(sync_client, model: str): return AsyncCodexAuxiliaryClient(sync_client), model if isinstance(sync_client, AnthropicAuxiliaryClient): return AsyncAnthropicAuxiliaryClient(sync_client), model + try: + from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient + + if isinstance(sync_client, GeminiNativeClient): + return AsyncGeminiNativeClient(sync_client), model + except ImportError: + pass try: from agent.copilot_acp_client import CopilotACPClient if isinstance(sync_client, CopilotACPClient): @@ -1359,15 +1491,15 @@ def _to_async_client(sync_client, model: str): "api_key": sync_client.api_key, "base_url": str(sync_client.base_url), } - base_lower = str(sync_client.base_url).lower() - if "openrouter" in base_lower: + sync_base_url = str(sync_client.base_url) + if base_url_host_matches(sync_base_url, "openrouter.ai"): async_kwargs["default_headers"] = dict(_OR_HEADERS) - elif "api.githubcopilot.com" in base_lower: + elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers async_kwargs["default_headers"] = copilot_default_headers() - elif "api.kimi.com" in base_lower: - async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} + elif base_url_host_matches(sync_base_url, "api.kimi.com"): + async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"} return AsyncOpenAI(**async_kwargs), model @@ -1443,8 +1575,7 @@ def resolve_provider_client( # Auto-detect: api.openai.com + codex model name pattern if api_mode and api_mode != "codex_responses": return False # explicit non-codex mode - normalized_base = (base_url_str or "").strip().lower() - if "api.openai.com" in normalized_base and "openrouter" not in normalized_base: + if base_url_hostname(base_url_str) == "api.openai.com": model_lower = (model_str or "").lower() if "codex" in model_lower: return True @@ -1492,7 +1623,13 @@ def resolve_provider_client( # ── Nous Portal (OAuth) ────────────────────────────────────────── if provider == "nous": - client, default = _try_nous() + # Detect vision tasks: either explicit model override from + # _PROVIDER_VISION_MODELS, or caller passed a known vision model. + _is_vision = ( + model in _PROVIDER_VISION_MODELS.values() + or (model or "").strip().lower() == "mimo-v2-omni" + ) + client, default = _try_nous(vision=_is_vision) if client is None: logger.warning("resolve_provider_client: nous requested " "but Nous Portal not configured (run: hermes auth)") @@ -1512,7 +1649,11 @@ def resolve_provider_client( "but no Codex OAuth token found (run: hermes model)") return None, None final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider) - raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + raw_client = OpenAI( + api_key=codex_token, + base_url=_CODEX_AUX_BASE_URL, + default_headers=_codex_cloudflare_headers(codex_token), + ) return (raw_client, final_model) # Standard path: wrap in CodexAuxiliaryClient adapter client, default = _try_codex() @@ -1544,9 +1685,9 @@ def resolve_provider_client( provider, ) extra = {} - if "api.kimi.com" in custom_base.lower(): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} - elif "api.githubcopilot.com" in custom_base.lower(): + if base_url_host_matches(custom_base, "api.kimi.com"): + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(custom_base, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() client = OpenAI(api_key=custom_key, base_url=custom_base, **extra) @@ -1640,11 +1781,20 @@ def resolve_provider_client( default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "") final_model = _normalize_resolved_model(model or default_model, provider) + if provider == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + if is_native_gemini_base_url(base_url): + client = GeminiNativeClient(api_key=api_key, base_url=base_url) + logger.debug("resolve_provider_client: %s (%s)", provider, final_model) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + # Provider-specific headers headers = {} - if "api.kimi.com" in base_url.lower(): - headers["User-Agent"] = "KimiCLI/1.30.0" - elif "api.githubcopilot.com" in base_url.lower(): + if base_url_host_matches(base_url, "api.kimi.com"): + headers["User-Agent"] = "claude-code/0.1.0" + elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers headers.update(copilot_default_headers()) @@ -1875,24 +2025,35 @@ def resolve_vision_provider_client( # _PROVIDER_VISION_MODELS provides per-provider vision model # overrides when the provider has a dedicated multimodal model # that differs from the chat model (e.g. xiaomi → mimo-v2-omni, - # zai → glm-5v-turbo). + # zai → glm-5v-turbo). Nous is the exception: it has a dedicated + # strict vision backend with tier-aware defaults, so it must not + # fall through to the user's text chat model here. # 2. OpenRouter (vision-capable aggregator fallback) # 3. Nous Portal (vision-capable aggregator fallback) # 4. Stop main_provider = _read_main_provider() main_model = _read_main_model() if main_provider and main_provider not in ("auto", ""): - vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model) - rpc_client, rpc_model = resolve_provider_client( - main_provider, vision_model, - api_mode=resolved_api_mode) - if rpc_client is not None: - logger.info( - "Vision auto-detect: using main provider %s (%s)", - main_provider, rpc_model or vision_model, - ) - return _finalize( - main_provider, rpc_client, rpc_model or vision_model) + if main_provider == "nous": + sync_client, default_model = _resolve_strict_vision_backend(main_provider) + if sync_client is not None: + logger.info( + "Vision auto-detect: using main provider %s (%s)", + main_provider, default_model or resolved_model or main_model, + ) + return _finalize(main_provider, sync_client, default_model) + else: + vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model) + rpc_client, rpc_model = resolve_provider_client( + main_provider, vision_model, + api_mode=resolved_api_mode) + if rpc_client is not None: + logger.info( + "Vision auto-detect: using main provider %s (%s)", + main_provider, rpc_model or vision_model, + ) + return _finalize( + main_provider, rpc_client, rpc_model or vision_model) # Fall back through aggregators (uses their dedicated vision model, # not the user's main model) when main provider has no client. @@ -1939,7 +2100,7 @@ def auxiliary_max_tokens_param(value: int) -> dict: # Only use max_completion_tokens for direct OpenAI custom endpoints if (not or_key and _read_nous_auth() is None - and "api.openai.com" in custom_base.lower()): + and base_url_hostname(custom_base) == "api.openai.com"): return {"max_completion_tokens": value} return {"max_tokens": value} @@ -1967,6 +2128,76 @@ _client_cache_lock = threading.Lock() _CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded +def _client_cache_key( + provider: str, + *, + async_mode: bool, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + api_mode: Optional[str] = None, + main_runtime: Optional[Dict[str, Any]] = None, +) -> tuple: + runtime = _normalize_main_runtime(main_runtime) + runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () + return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) + + +def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None: + with _client_cache_lock: + old_entry = _client_cache.get(cache_key) + if old_entry is not None and old_entry[0] is not client: + _force_close_async_httpx(old_entry[0]) + try: + close_fn = getattr(old_entry[0], "close", None) + if callable(close_fn): + close_fn() + except Exception: + pass + _client_cache[cache_key] = (client, default_model, bound_loop) + + +def _refresh_nous_auxiliary_client( + *, + cache_provider: str, + model: Optional[str], + async_mode: bool, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + api_mode: Optional[str] = None, + main_runtime: Optional[Dict[str, Any]] = None, +) -> Tuple[Optional[Any], Optional[str]]: + """Refresh Nous runtime creds, rebuild the client, and replace the cache entry.""" + runtime = _resolve_nous_runtime_api(force_refresh=True) + if runtime is None: + return None, model + + fresh_key, fresh_base_url = runtime + sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url) + final_model = model + + current_loop = None + if async_mode: + try: + import asyncio as _aio + current_loop = _aio.get_event_loop() + except RuntimeError: + pass + client, final_model = _to_async_client(sync_client, final_model or "") + else: + client = sync_client + + cache_key = _client_cache_key( + cache_provider, + async_mode=async_mode, + base_url=base_url, + api_key=api_key, + api_mode=api_mode, + main_runtime=main_runtime, + ) + _store_cached_client(cache_key, client, final_model, bound_loop=current_loop) + return client, final_model + + def neuter_async_httpx_del() -> None: """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op. @@ -2068,7 +2299,7 @@ def cleanup_stale_async_clients() -> None: def _is_openrouter_client(client: Any) -> bool: for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)): - if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower(): + if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"): return True return False @@ -2120,8 +2351,14 @@ def _get_cached_client( except RuntimeError: pass runtime = _normalize_main_runtime(main_runtime) - runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () - cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) + cache_key = _client_cache_key( + provider, + async_mode=async_mode, + base_url=base_url, + api_key=api_key, + api_mode=api_mode, + main_runtime=main_runtime, + ) with _client_cache_lock: if cache_key in _client_cache: cached_client, cached_default, cached_loop = _client_cache[cache_key] @@ -2190,7 +2427,6 @@ def _resolve_task_provider_model( to "custom" and the task uses that direct endpoint. api_mode is one of "chat_completions", "codex_responses", or None (auto-detect). """ - config = {} cfg_provider = None cfg_model = None cfg_base_url = None @@ -2198,16 +2434,7 @@ def _resolve_task_provider_model( cfg_api_mode = None if task: - try: - from hermes_cli.config import load_config - config = load_config() - except ImportError: - config = {} - - aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} - task_config = aux.get(task, {}) if isinstance(aux, dict) else {} - if not isinstance(task_config, dict): - task_config = {} + task_config = _get_auxiliary_task_config(task) cfg_provider = str(task_config.get("provider", "")).strip() or None cfg_model = str(task_config.get("model", "")).strip() or None cfg_base_url = str(task_config.get("base_url", "")).strip() or None @@ -2237,17 +2464,25 @@ def _resolve_task_provider_model( _DEFAULT_AUX_TIMEOUT = 30.0 -def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float: - """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*.""" +def _get_auxiliary_task_config(task: str) -> Dict[str, Any]: + """Return the config dict for auxiliary., or {} when unavailable.""" if not task: - return default + return {} try: from hermes_cli.config import load_config config = load_config() except ImportError: - return default + return {} aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} task_config = aux.get(task, {}) if isinstance(aux, dict) else {} + return task_config if isinstance(task_config, dict) else {} + + +def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float: + """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*.""" + if not task: + return default + task_config = _get_auxiliary_task_config(task) raw = task_config.get("timeout") if raw is not None: try: @@ -2257,6 +2492,15 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float return default +def _get_task_extra_body(task: str) -> Dict[str, Any]: + """Read auxiliary..extra_body and return a shallow copy when valid.""" + task_config = _get_auxiliary_task_config(task) + raw = task_config.get("extra_body") + if isinstance(raw, dict): + return dict(raw) + return {} + + # --------------------------------------------------------------------------- # Anthropic-compatible endpoint detection + image block conversion # --------------------------------------------------------------------------- @@ -2344,8 +2588,10 @@ def _build_call_kwargs( "timeout": timeout, } - fixed_temperature = _fixed_temperature_for_model(model) - if fixed_temperature is not None: + fixed_temperature = _fixed_temperature_for_model(model, base_url) + if fixed_temperature is OMIT_TEMPERATURE: + temperature = None # strip — let server choose + elif fixed_temperature is not None: temperature = fixed_temperature # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently @@ -2365,7 +2611,7 @@ def _build_call_kwargs( # Direct OpenAI api.openai.com with newer models needs max_completion_tokens. if provider == "custom": custom_base = base_url or _current_custom_base_url() - if "api.openai.com" in custom_base.lower(): + if base_url_hostname(custom_base) == "api.openai.com": kwargs["max_completion_tokens"] = max_tokens else: kwargs["max_tokens"] = max_tokens @@ -2457,6 +2703,8 @@ def call_llm( """ resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model( task, provider, model, base_url, api_key) + effective_extra_body = _get_task_extra_body(task) + effective_extra_body.update(extra_body or {}) if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( @@ -2525,11 +2773,14 @@ def call_llm( task, resolved_provider or "auto", final_model or "default", f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "") + # Pass the client's actual base_url (not just resolved_base_url) so + # endpoint-specific temperature overrides can distinguish + # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes. kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=effective_timeout, extra_body=extra_body, - base_url=resolved_base_url) + tools=tools, timeout=effective_timeout, extra_body=effective_extra_body, + base_url=_base_info or resolved_base_url) # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) _client_base = str(getattr(client, "base_url", "") or "") @@ -2555,6 +2806,29 @@ def call_llm( raise first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── + client_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_base_info, "inference-api.nousresearch.com") + ) + if _is_auth_error(first_err) and client_is_nous: + refreshed_client, refreshed_model = _refresh_nous_auxiliary_client( + cache_provider=resolved_provider or "nous", + model=final_model, + async_mode=False, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + main_runtime=main_runtime, + ) + if refreshed_client is not None: + logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying", + task or "call") + if refreshed_model and refreshed_model != kwargs.get("model"): + kwargs["model"] = refreshed_model + return _validate_llm_response( + refreshed_client.chat.completions.create(**kwargs), task) + # ── Payment / credit exhaustion fallback ────────────────────── # When the resolved provider returns 402 or a credit-related error, # try alternative providers instead of giving up. This handles the @@ -2583,7 +2857,8 @@ def call_llm( fb_label, fb_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, - extra_body=extra_body) + extra_body=effective_extra_body, + base_url=str(getattr(fb_client, "base_url", "") or "")) return _validate_llm_response( fb_client.chat.completions.create(**fb_kwargs), task) raise @@ -2665,6 +2940,8 @@ async def async_call_llm( """ resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model( task, provider, model, base_url, api_key) + effective_extra_body = _get_task_extra_body(task) + effective_extra_body.update(extra_body or {}) if task == "vision": effective_provider, client, final_model = resolve_vision_provider_client( @@ -2718,14 +2995,17 @@ async def async_call_llm( effective_timeout = timeout if timeout is not None else _get_task_timeout(task) + # Pass the client's actual base_url (not just resolved_base_url) so + # endpoint-specific temperature overrides can distinguish + # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes. + _client_base = str(getattr(client, "base_url", "") or "") kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=effective_timeout, extra_body=extra_body, - base_url=resolved_base_url) + tools=tools, timeout=effective_timeout, extra_body=effective_extra_body, + base_url=_client_base or resolved_base_url) # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) - _client_base = str(getattr(client, "base_url", "") or "") if _is_anthropic_compat_endpoint(resolved_provider, _client_base): kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) @@ -2747,6 +3027,28 @@ async def async_call_llm( raise first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── + client_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_client_base, "inference-api.nousresearch.com") + ) + if _is_auth_error(first_err) and client_is_nous: + refreshed_client, refreshed_model = _refresh_nous_auxiliary_client( + cache_provider=resolved_provider or "nous", + model=final_model, + async_mode=True, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + ) + if refreshed_client is not None: + logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying", + task or "call") + if refreshed_model and refreshed_model != kwargs.get("model"): + kwargs["model"] = refreshed_model + return _validate_llm_response( + await refreshed_client.chat.completions.create(**kwargs), task) + # ── Payment / connection fallback (mirrors sync call_llm) ───── should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) is_auto = resolved_provider in ("auto", "", None) @@ -2761,7 +3063,8 @@ async def async_call_llm( fb_label, fb_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, - extra_body=extra_body) + extra_body=effective_extra_body, + base_url=str(getattr(fb_client, "base_url", "") or "")) # Convert sync fallback client to async async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "") if async_fb_model and async_fb_model != fb_kwargs.get("model"): diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py new file mode 100644 index 0000000000..4d3e5590be --- /dev/null +++ b/agent/codex_responses_adapter.py @@ -0,0 +1,813 @@ +"""Codex Responses API adapter. + +Pure format-conversion and normalization logic for the OpenAI Responses API +(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints). + +Extracted from run_agent.py to isolate Responses API-specific logic from the +core agent loop. All functions are stateless — they operate on the data passed +in and return transformed results. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import re +import uuid +from types import SimpleNamespace +from typing import Any, Dict, List, Optional + +from agent.prompt_builder import DEFAULT_AGENT_IDENTITY + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Multimodal content helpers +# --------------------------------------------------------------------------- + +def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]: + """Convert chat-style multimodal content to Responses API input parts. + + Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format) + Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format) + + Returns an empty list when ``content`` is not a list or contains no + recognized parts — callers fall back to the string path. + """ + if not isinstance(content, list): + return [] + converted: List[Dict[str, Any]] = [] + for part in content: + if isinstance(part, str): + if part: + converted.append({"type": "input_text", "text": part}) + continue + if not isinstance(part, dict): + continue + ptype = str(part.get("type") or "").strip().lower() + if ptype in {"text", "input_text", "output_text"}: + text = part.get("text") + if isinstance(text, str) and text: + converted.append({"type": "input_text", "text": text}) + continue + if ptype in {"image_url", "input_image"}: + image_ref = part.get("image_url") + detail = part.get("detail") + if isinstance(image_ref, dict): + url = image_ref.get("url") + detail = image_ref.get("detail", detail) + else: + url = image_ref + if not isinstance(url, str) or not url: + continue + image_part: Dict[str, Any] = {"type": "input_image", "image_url": url} + if isinstance(detail, str) and detail.strip(): + image_part["detail"] = detail.strip() + converted.append(image_part) + return converted + + +def _summarize_user_message_for_log(content: Any) -> str: + """Return a short text summary of a user message for logging/trajectory. + + Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}`` + parts from the API server. Logging, spinner previews, and trajectory + files all want a plain string — this helper extracts the first chunk of + text and notes any attached images. Returns an empty string for empty + lists and ``str(content)`` for unexpected scalar types. + """ + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + text_bits: List[str] = [] + image_count = 0 + for part in content: + if isinstance(part, str): + if part: + text_bits.append(part) + continue + if not isinstance(part, dict): + continue + ptype = str(part.get("type") or "").strip().lower() + if ptype in {"text", "input_text", "output_text"}: + text = part.get("text") + if isinstance(text, str) and text: + text_bits.append(text) + elif ptype in {"image_url", "input_image"}: + image_count += 1 + summary = " ".join(text_bits).strip() + if image_count: + note = f"[{image_count} image{'s' if image_count != 1 else ''}]" + summary = f"{note} {summary}" if summary else note + return summary + try: + return str(content) + except Exception: + return "" + + +# --------------------------------------------------------------------------- +# ID helpers +# --------------------------------------------------------------------------- + +def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str: + """Generate a deterministic call_id from tool call content. + + Used as a fallback when the API doesn't provide a call_id. + Deterministic IDs prevent cache invalidation — random UUIDs would + make every API call's prefix unique, breaking OpenAI's prompt cache. + """ + seed = f"{fn_name}:{arguments}:{index}" + digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12] + return f"call_{digest}" + + +def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]: + """Split a stored tool id into (call_id, response_item_id).""" + if not isinstance(raw_id, str): + return None, None + value = raw_id.strip() + if not value: + return None, None + if "|" in value: + call_id, response_item_id = value.split("|", 1) + call_id = call_id.strip() or None + response_item_id = response_item_id.strip() or None + return call_id, response_item_id + if value.startswith("fc_"): + return None, value + return value, None + + +def _derive_responses_function_call_id( + call_id: str, + response_item_id: Optional[str] = None, +) -> str: + """Build a valid Responses `function_call.id` (must start with `fc_`).""" + if isinstance(response_item_id, str): + candidate = response_item_id.strip() + if candidate.startswith("fc_"): + return candidate + + source = (call_id or "").strip() + if source.startswith("fc_"): + return source + if source.startswith("call_") and len(source) > len("call_"): + return f"fc_{source[len('call_'):]}" + + sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source) + if sanitized.startswith("fc_"): + return sanitized + if sanitized.startswith("call_") and len(sanitized) > len("call_"): + return f"fc_{sanitized[len('call_'):]}" + if sanitized: + return f"fc_{sanitized[:48]}" + + seed = source or str(response_item_id or "") or uuid.uuid4().hex + digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24] + return f"fc_{digest}" + + +# --------------------------------------------------------------------------- +# Schema conversion +# --------------------------------------------------------------------------- + +def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]: + """Convert chat-completions tool schemas to Responses function-tool schemas.""" + if not tools: + return None + + converted: List[Dict[str, Any]] = [] + for item in tools: + fn = item.get("function", {}) if isinstance(item, dict) else {} + name = fn.get("name") + if not isinstance(name, str) or not name.strip(): + continue + converted.append({ + "type": "function", + "name": name, + "description": fn.get("description", ""), + "strict": False, + "parameters": fn.get("parameters", {"type": "object", "properties": {}}), + }) + return converted or None + + +# --------------------------------------------------------------------------- +# Message format conversion +# --------------------------------------------------------------------------- + +def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Convert internal chat-style messages to Responses input items.""" + items: List[Dict[str, Any]] = [] + seen_item_ids: set = set() + + for msg in messages: + if not isinstance(msg, dict): + continue + role = msg.get("role") + if role == "system": + continue + + if role in {"user", "assistant"}: + content = msg.get("content", "") + if isinstance(content, list): + content_parts = _chat_content_to_responses_parts(content) + content_text = "".join( + p.get("text", "") for p in content_parts if p.get("type") == "input_text" + ) + else: + content_parts = [] + content_text = str(content) if content is not None else "" + + if role == "assistant": + # Replay encrypted reasoning items from previous turns + # so the API can maintain coherent reasoning chains. + codex_reasoning = msg.get("codex_reasoning_items") + has_codex_reasoning = False + if isinstance(codex_reasoning, list): + for ri in codex_reasoning: + if isinstance(ri, dict) and ri.get("encrypted_content"): + item_id = ri.get("id") + if item_id and item_id in seen_item_ids: + continue + # Strip the "id" field — with store=False the + # Responses API cannot look up items by ID and + # returns 404. The encrypted_content blob is + # self-contained for reasoning chain continuity. + replay_item = {k: v for k, v in ri.items() if k != "id"} + items.append(replay_item) + if item_id: + seen_item_ids.add(item_id) + has_codex_reasoning = True + + if content_parts: + items.append({"role": "assistant", "content": content_parts}) + elif content_text.strip(): + items.append({"role": "assistant", "content": content_text}) + elif has_codex_reasoning: + # The Responses API requires a following item after each + # reasoning item (otherwise: missing_following_item error). + # When the assistant produced only reasoning with no visible + # content, emit an empty assistant message as the required + # following item. + items.append({"role": "assistant", "content": ""}) + + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if not isinstance(tc, dict): + continue + fn = tc.get("function", {}) + fn_name = fn.get("name") + if not isinstance(fn_name, str) or not fn_name.strip(): + continue + + embedded_call_id, embedded_response_item_id = _split_responses_tool_id( + tc.get("id") + ) + call_id = tc.get("call_id") + if not isinstance(call_id, str) or not call_id.strip(): + call_id = embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + if ( + isinstance(embedded_response_item_id, str) + and embedded_response_item_id.startswith("fc_") + and len(embedded_response_item_id) > len("fc_") + ): + call_id = f"call_{embedded_response_item_id[len('fc_'):]}" + else: + _raw_args = str(fn.get("arguments", "{}")) + call_id = _deterministic_call_id(fn_name, _raw_args, len(items)) + call_id = call_id.strip() + + arguments = fn.get("arguments", "{}") + if isinstance(arguments, dict): + arguments = json.dumps(arguments, ensure_ascii=False) + elif not isinstance(arguments, str): + arguments = str(arguments) + arguments = arguments.strip() or "{}" + + items.append({ + "type": "function_call", + "call_id": call_id, + "name": fn_name, + "arguments": arguments, + }) + continue + + # Non-assistant (user) role: emit multimodal parts when present, + # otherwise fall back to the text payload. + if content_parts: + items.append({"role": role, "content": content_parts}) + else: + items.append({"role": role, "content": content_text}) + continue + + if role == "tool": + raw_tool_call_id = msg.get("tool_call_id") + call_id, _ = _split_responses_tool_id(raw_tool_call_id) + if not isinstance(call_id, str) or not call_id.strip(): + if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip(): + call_id = raw_tool_call_id.strip() + if not isinstance(call_id, str) or not call_id.strip(): + continue + items.append({ + "type": "function_call_output", + "call_id": call_id, + "output": str(msg.get("content", "") or ""), + }) + + return items + + +# --------------------------------------------------------------------------- +# Input preflight / validation +# --------------------------------------------------------------------------- + +def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]: + if not isinstance(raw_items, list): + raise ValueError("Codex Responses input must be a list of input items.") + + normalized: List[Dict[str, Any]] = [] + seen_ids: set = set() + for idx, item in enumerate(raw_items): + if not isinstance(item, dict): + raise ValueError(f"Codex Responses input[{idx}] must be an object.") + + item_type = item.get("type") + if item_type == "function_call": + call_id = item.get("call_id") + name = item.get("name") + if not isinstance(call_id, str) or not call_id.strip(): + raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.") + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.") + + arguments = item.get("arguments", "{}") + if isinstance(arguments, dict): + arguments = json.dumps(arguments, ensure_ascii=False) + elif not isinstance(arguments, str): + arguments = str(arguments) + arguments = arguments.strip() or "{}" + + normalized.append( + { + "type": "function_call", + "call_id": call_id.strip(), + "name": name.strip(), + "arguments": arguments, + } + ) + continue + + if item_type == "function_call_output": + call_id = item.get("call_id") + if not isinstance(call_id, str) or not call_id.strip(): + raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.") + output = item.get("output", "") + if output is None: + output = "" + if not isinstance(output, str): + output = str(output) + + normalized.append( + { + "type": "function_call_output", + "call_id": call_id.strip(), + "output": output, + } + ) + continue + + if item_type == "reasoning": + encrypted = item.get("encrypted_content") + if isinstance(encrypted, str) and encrypted: + item_id = item.get("id") + if isinstance(item_id, str) and item_id: + if item_id in seen_ids: + continue + seen_ids.add(item_id) + reasoning_item = {"type": "reasoning", "encrypted_content": encrypted} + # Do NOT include the "id" in the outgoing item — with + # store=False (our default) the API tries to resolve the + # id server-side and returns 404. The id is still used + # above for local deduplication via seen_ids. + summary = item.get("summary") + if isinstance(summary, list): + reasoning_item["summary"] = summary + else: + reasoning_item["summary"] = [] + normalized.append(reasoning_item) + continue + + role = item.get("role") + if role in {"user", "assistant"}: + content = item.get("content", "") + if content is None: + content = "" + if isinstance(content, list): + # Multimodal content from ``_chat_messages_to_responses_input`` + # is already in Responses format (``input_text`` / ``input_image``). + # Validate each part and pass through. + validated: List[Dict[str, Any]] = [] + for part_idx, part in enumerate(content): + if isinstance(part, str): + if part: + validated.append({"type": "input_text", "text": part}) + continue + if not isinstance(part, dict): + raise ValueError( + f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string." + ) + ptype = str(part.get("type") or "").strip().lower() + if ptype in {"input_text", "text", "output_text"}: + text = part.get("text", "") + if not isinstance(text, str): + text = str(text or "") + validated.append({"type": "input_text", "text": text}) + elif ptype in {"input_image", "image_url"}: + image_ref = part.get("image_url", "") + detail = part.get("detail") + if isinstance(image_ref, dict): + url = image_ref.get("url", "") + detail = image_ref.get("detail", detail) + else: + url = image_ref + if not isinstance(url, str): + url = str(url or "") + image_part: Dict[str, Any] = {"type": "input_image", "image_url": url} + if isinstance(detail, str) and detail.strip(): + image_part["detail"] = detail.strip() + validated.append(image_part) + else: + raise ValueError( + f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}." + ) + normalized.append({"role": role, "content": validated}) + continue + if not isinstance(content, str): + content = str(content) + + normalized.append({"role": role, "content": content}) + continue + + raise ValueError( + f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})." + ) + + return normalized + + +def _preflight_codex_api_kwargs( + api_kwargs: Any, + *, + allow_stream: bool = False, +) -> Dict[str, Any]: + if not isinstance(api_kwargs, dict): + raise ValueError("Codex Responses request must be a dict.") + + required = {"model", "instructions", "input"} + missing = [key for key in required if key not in api_kwargs] + if missing: + raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.") + + model = api_kwargs.get("model") + if not isinstance(model, str) or not model.strip(): + raise ValueError("Codex Responses request 'model' must be a non-empty string.") + model = model.strip() + + instructions = api_kwargs.get("instructions") + if instructions is None: + instructions = "" + if not isinstance(instructions, str): + instructions = str(instructions) + instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY + + normalized_input = _preflight_codex_input_items(api_kwargs.get("input")) + + tools = api_kwargs.get("tools") + normalized_tools = None + if tools is not None: + if not isinstance(tools, list): + raise ValueError("Codex Responses request 'tools' must be a list when provided.") + normalized_tools = [] + for idx, tool in enumerate(tools): + if not isinstance(tool, dict): + raise ValueError(f"Codex Responses tools[{idx}] must be an object.") + if tool.get("type") != "function": + raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.") + + name = tool.get("name") + parameters = tool.get("parameters") + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.") + if not isinstance(parameters, dict): + raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.") + + description = tool.get("description", "") + if description is None: + description = "" + if not isinstance(description, str): + description = str(description) + + strict = tool.get("strict", False) + if not isinstance(strict, bool): + strict = bool(strict) + + normalized_tools.append( + { + "type": "function", + "name": name.strip(), + "description": description, + "strict": strict, + "parameters": parameters, + } + ) + + store = api_kwargs.get("store", False) + if store is not False: + raise ValueError("Codex Responses contract requires 'store' to be false.") + + allowed_keys = { + "model", "instructions", "input", "tools", "store", + "reasoning", "include", "max_output_tokens", "temperature", + "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", + "extra_headers", + } + normalized: Dict[str, Any] = { + "model": model, + "instructions": instructions, + "input": normalized_input, + "store": False, + } + if normalized_tools is not None: + normalized["tools"] = normalized_tools + + # Pass through reasoning config + reasoning = api_kwargs.get("reasoning") + if isinstance(reasoning, dict): + normalized["reasoning"] = reasoning + include = api_kwargs.get("include") + if isinstance(include, list): + normalized["include"] = include + service_tier = api_kwargs.get("service_tier") + if isinstance(service_tier, str) and service_tier.strip(): + normalized["service_tier"] = service_tier.strip() + + # Pass through max_output_tokens and temperature + max_output_tokens = api_kwargs.get("max_output_tokens") + if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0: + normalized["max_output_tokens"] = int(max_output_tokens) + temperature = api_kwargs.get("temperature") + if isinstance(temperature, (int, float)): + normalized["temperature"] = float(temperature) + + # Pass through tool_choice, parallel_tool_calls, prompt_cache_key + for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"): + val = api_kwargs.get(passthrough_key) + if val is not None: + normalized[passthrough_key] = val + + extra_headers = api_kwargs.get("extra_headers") + if extra_headers is not None: + if not isinstance(extra_headers, dict): + raise ValueError("Codex Responses request 'extra_headers' must be an object.") + normalized_headers: Dict[str, str] = {} + for key, value in extra_headers.items(): + if not isinstance(key, str) or not key.strip(): + raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.") + if value is None: + continue + normalized_headers[key.strip()] = str(value) + if normalized_headers: + normalized["extra_headers"] = normalized_headers + + if allow_stream: + stream = api_kwargs.get("stream") + if stream is not None and stream is not True: + raise ValueError("Codex Responses 'stream' must be true when set.") + if stream is True: + normalized["stream"] = True + allowed_keys.add("stream") + elif "stream" in api_kwargs: + raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.") + + unexpected = sorted(key for key in api_kwargs if key not in allowed_keys) + if unexpected: + raise ValueError( + f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}." + ) + + return normalized + + +# --------------------------------------------------------------------------- +# Response extraction helpers +# --------------------------------------------------------------------------- + +def _extract_responses_message_text(item: Any) -> str: + """Extract assistant text from a Responses message output item.""" + content = getattr(item, "content", None) + if not isinstance(content, list): + return "" + + chunks: List[str] = [] + for part in content: + ptype = getattr(part, "type", None) + if ptype not in {"output_text", "text"}: + continue + text = getattr(part, "text", None) + if isinstance(text, str) and text: + chunks.append(text) + return "".join(chunks).strip() + + +def _extract_responses_reasoning_text(item: Any) -> str: + """Extract a compact reasoning text from a Responses reasoning item.""" + summary = getattr(item, "summary", None) + if isinstance(summary, list): + chunks: List[str] = [] + for part in summary: + text = getattr(part, "text", None) + if isinstance(text, str) and text: + chunks.append(text) + if chunks: + return "\n".join(chunks).strip() + text = getattr(item, "text", None) + if isinstance(text, str) and text: + return text.strip() + return "" + + +# --------------------------------------------------------------------------- +# Full response normalization +# --------------------------------------------------------------------------- + +def _normalize_codex_response(response: Any) -> tuple[Any, str]: + """Normalize a Responses API object to an assistant_message-like object.""" + output = getattr(response, "output", None) + if not isinstance(output, list) or not output: + # The Codex backend can return empty output when the answer was + # delivered entirely via stream events. Check output_text as a + # last-resort fallback before raising. + out_text = getattr(response, "output_text", None) + if isinstance(out_text, str) and out_text.strip(): + logger.debug( + "Codex response has empty output but output_text is present (%d chars); " + "synthesizing output item.", len(out_text.strip()), + ) + output = [SimpleNamespace( + type="message", role="assistant", status="completed", + content=[SimpleNamespace(type="output_text", text=out_text.strip())], + )] + response.output = output + else: + raise RuntimeError("Responses API returned no output items") + + response_status = getattr(response, "status", None) + if isinstance(response_status, str): + response_status = response_status.strip().lower() + else: + response_status = None + + if response_status in {"failed", "cancelled"}: + error_obj = getattr(response, "error", None) + if isinstance(error_obj, dict): + error_msg = error_obj.get("message") or str(error_obj) + else: + error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'" + raise RuntimeError(error_msg) + + content_parts: List[str] = [] + reasoning_parts: List[str] = [] + reasoning_items_raw: List[Dict[str, Any]] = [] + tool_calls: List[Any] = [] + has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} + saw_commentary_phase = False + saw_final_answer_phase = False + + for item in output: + item_type = getattr(item, "type", None) + item_status = getattr(item, "status", None) + if isinstance(item_status, str): + item_status = item_status.strip().lower() + else: + item_status = None + + if item_status in {"queued", "in_progress", "incomplete"}: + has_incomplete_items = True + + if item_type == "message": + item_phase = getattr(item, "phase", None) + if isinstance(item_phase, str): + normalized_phase = item_phase.strip().lower() + if normalized_phase in {"commentary", "analysis"}: + saw_commentary_phase = True + elif normalized_phase in {"final_answer", "final"}: + saw_final_answer_phase = True + message_text = _extract_responses_message_text(item) + if message_text: + content_parts.append(message_text) + elif item_type == "reasoning": + reasoning_text = _extract_responses_reasoning_text(item) + if reasoning_text: + reasoning_parts.append(reasoning_text) + # Capture the full reasoning item for multi-turn continuity. + # encrypted_content is an opaque blob the API needs back on + # subsequent turns to maintain coherent reasoning chains. + encrypted = getattr(item, "encrypted_content", None) + if isinstance(encrypted, str) and encrypted: + raw_item = {"type": "reasoning", "encrypted_content": encrypted} + item_id = getattr(item, "id", None) + if isinstance(item_id, str) and item_id: + raw_item["id"] = item_id + # Capture summary — required by the API when replaying reasoning items + summary = getattr(item, "summary", None) + if isinstance(summary, list): + raw_summary = [] + for part in summary: + text = getattr(part, "text", None) + if isinstance(text, str): + raw_summary.append({"type": "summary_text", "text": text}) + raw_item["summary"] = raw_summary + reasoning_items_raw.append(raw_item) + elif item_type == "function_call": + if item_status in {"queued", "in_progress", "incomplete"}: + continue + fn_name = getattr(item, "name", "") or "" + arguments = getattr(item, "arguments", "{}") + if not isinstance(arguments, str): + arguments = json.dumps(arguments, ensure_ascii=False) + raw_call_id = getattr(item, "call_id", None) + raw_item_id = getattr(item, "id", None) + embedded_call_id, _ = _split_responses_tool_id(raw_item_id) + call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls)) + call_id = call_id.strip() + response_item_id = raw_item_id if isinstance(raw_item_id, str) else None + response_item_id = _derive_responses_function_call_id(call_id, response_item_id) + tool_calls.append(SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=response_item_id, + type="function", + function=SimpleNamespace(name=fn_name, arguments=arguments), + )) + elif item_type == "custom_tool_call": + fn_name = getattr(item, "name", "") or "" + arguments = getattr(item, "input", "{}") + if not isinstance(arguments, str): + arguments = json.dumps(arguments, ensure_ascii=False) + raw_call_id = getattr(item, "call_id", None) + raw_item_id = getattr(item, "id", None) + embedded_call_id, _ = _split_responses_tool_id(raw_item_id) + call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls)) + call_id = call_id.strip() + response_item_id = raw_item_id if isinstance(raw_item_id, str) else None + response_item_id = _derive_responses_function_call_id(call_id, response_item_id) + tool_calls.append(SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=response_item_id, + type="function", + function=SimpleNamespace(name=fn_name, arguments=arguments), + )) + + final_text = "\n".join([p for p in content_parts if p]).strip() + if not final_text and hasattr(response, "output_text"): + out_text = getattr(response, "output_text", "") + if isinstance(out_text, str): + final_text = out_text.strip() + + assistant_message = SimpleNamespace( + content=final_text, + tool_calls=tool_calls, + reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None, + reasoning_content=None, + reasoning_details=None, + codex_reasoning_items=reasoning_items_raw or None, + ) + + if tool_calls: + finish_reason = "tool_calls" + elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): + finish_reason = "incomplete" + elif reasoning_items_raw and not final_text: + # Response contains only reasoning (encrypted thinking state) with + # no visible content or tool calls. The model is still thinking and + # needs another turn to produce the actual answer. Marking this as + # "stop" would send it into the empty-content retry loop which burns + # 3 retries then fails — treat it as incomplete instead so the Codex + # continuation path handles it correctly. + finish_reason = "incomplete" + else: + finish_reason = "stop" + return assistant_message, finish_reason diff --git a/agent/context_compressor.py b/agent/context_compressor.py index ae8c2c0bd3..254ac0ac5e 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -31,6 +31,7 @@ from agent.model_metadata import ( get_model_context_length, estimate_messages_tokens_rough, ) +from agent.redact import redact_sensitive_text logger = logging.getLogger(__name__) @@ -550,11 +551,15 @@ class ContextCompressor(ContextEngine): Includes tool call arguments and result content (up to ``_CONTENT_MAX`` chars per message) so the summarizer can preserve specific details like file paths, commands, and outputs. + + All content is redacted before serialization to prevent secrets + (API keys, tokens, passwords) from leaking into the summary that + gets sent to the auxiliary model and persisted across compactions. """ parts = [] for msg in turns: role = msg.get("role", "unknown") - content = msg.get("content") or "" + content = redact_sensitive_text(msg.get("content") or "") # Tool results: keep enough content for the summarizer if role == "tool": @@ -575,7 +580,7 @@ class ContextCompressor(ContextEngine): if isinstance(tc, dict): fn = tc.get("function", {}) name = fn.get("name", "?") - args = fn.get("arguments", "") + args = redact_sensitive_text(fn.get("arguments", "")) # Truncate long arguments but keep enough for context if len(args) > self._TOOL_ARGS_MAX: args = args[:self._TOOL_ARGS_HEAD] + "..." @@ -633,7 +638,13 @@ class ContextCompressor(ContextEngine): "assistant that continues the conversation. " "Do NOT respond to any questions or requests in the conversation — " "only output the structured summary. " - "Do NOT include any preamble, greeting, or prefix." + "Do NOT include any preamble, greeting, or prefix. " + "Write the summary in the same language the user was using in the " + "conversation — do not translate or switch to English. " + "NEVER include API keys, tokens, passwords, secrets, credentials, " + "or connection strings in the summary — replace any that appear " + "with [REDACTED]. Note that the user had credentials present, but " + "do not preserve their values." ) # Shared structured template (used by both paths). @@ -690,7 +701,7 @@ Be specific with file paths, commands, line numbers, and results.] [What remains to be done — framed as context, not instructions] ## Critical Context -[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation] +[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.] Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed. @@ -730,7 +741,7 @@ Use this exact structure: prompt += f""" FOCUS TOPIC: "{focus_topic}" -The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget.""" +The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED].""" try: call_kwargs = { @@ -753,7 +764,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Handle cases where content is not a string (e.g., dict from llama.cpp) if not isinstance(content, str): content = str(content) if content else "" - summary = content.strip() + # Redact the summary output as well — the summarizer LLM may + # ignore prompt instructions and echo back secrets verbatim. + summary = redact_sensitive_text(content.strip()) # Store for iterative updates on next compaction self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 @@ -794,7 +807,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio ) self.summary_model = "" # empty = use main model self._summary_failure_cooldown_until = 0.0 # no cooldown - return self._generate_summary(messages, summary_budget) # retry immediately + return self._generate_summary(turns_to_summarize) # retry immediately # Transient errors (timeout, rate limit, network) — shorter cooldown _transient_cooldown = 60 diff --git a/agent/context_references.py b/agent/context_references.py index 7ecb90c497..50a33a1d75 100644 --- a/agent/context_references.py +++ b/agent/context_references.py @@ -483,9 +483,7 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None: text=True, timeout=10, ) - except FileNotFoundError: - return None - except subprocess.TimeoutExpired: + except (FileNotFoundError, OSError, subprocess.TimeoutExpired): return None if result.returncode != 0: return None diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 031c58d705..783f949567 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -21,6 +21,9 @@ from pathlib import Path from types import SimpleNamespace from typing import Any +from agent.file_safety import get_read_block_error, is_write_denied +from agent.redact import redact_sensitive_text + ACP_MARKER_BASE_URL = "acp://copilot" _DEFAULT_TIMEOUT_SECONDS = 900.0 @@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: } +def _permission_denied(message_id: Any) -> dict[str, Any]: + return { + "jsonrpc": "2.0", + "id": message_id, + "result": { + "outcome": { + "outcome": "cancelled", + } + }, + } + + def _format_messages_as_prompt( messages: list[dict[str, Any]], model: str | None = None, @@ -386,6 +401,8 @@ class CopilotACPClient: stderr_tail: deque[str] = deque(maxlen=40) def _stdout_reader() -> None: + if proc.stdout is None: + return for line in proc.stdout: try: inbox.put(json.loads(line)) @@ -533,18 +550,13 @@ class CopilotACPClient: params = msg.get("params") or {} if method == "session/request_permission": - response = { - "jsonrpc": "2.0", - "id": message_id, - "result": { - "outcome": { - "outcome": "allow_once", - } - }, - } + response = _permission_denied(message_id) elif method == "fs/read_text_file": try: path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + block_error = get_read_block_error(str(path)) + if block_error: + raise PermissionError(block_error) content = path.read_text() if path.exists() else "" line = params.get("line") limit = params.get("limit") @@ -553,6 +565,8 @@ class CopilotACPClient: start = line - 1 end = start + limit if isinstance(limit, int) and limit > 0 else None content = "".join(lines[start:end]) + if content: + content = redact_sensitive_text(content) response = { "jsonrpc": "2.0", "id": message_id, @@ -565,6 +579,10 @@ class CopilotACPClient: elif method == "fs/write_text_file": try: path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + if is_write_denied(str(path)): + raise PermissionError( + f"Write denied: '{path}' is a protected system/credential file." + ) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(str(params.get("content") or "")) response = { diff --git a/agent/credential_pool.py b/agent/credential_pool.py index b02514e990..de8d03185a 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup active_sources: Set[str] = set() auth_store = _load_auth_store() + # Shared suppression gate — used at every upsert site so + # `hermes auth remove ` is stable across all source types. + try: + from hermes_cli.auth import is_source_suppressed as _is_suppressed + except ImportError: + def _is_suppressed(_p, _s): # type: ignore[misc] + return False + if provider == "anthropic": # Only auto-discover external credentials (Claude Code, Hermes PKCE) # when the user has explicitly configured anthropic as their provider. @@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup ("claude_code", read_claude_code_credentials()), ): if creds and creds.get("accessToken"): - # Check if user explicitly removed this source - try: - from hermes_cli.auth import is_source_suppressed - if is_source_suppressed(provider, source_name): - continue - except ImportError: - pass + if _is_suppressed(provider, source_name): + continue active_sources.add(source_name) changed |= _upsert_entry( entries, @@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup elif provider == "nous": state = _load_provider_state(auth_store, "nous") - if state: + if state and not _is_suppressed(provider, "device_code"): active_sources.add("device_code") # Prefer a user-supplied label embedded in the singleton state # (set by persist_nous_credentials(label=...) when the user ran @@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup token, source = resolve_copilot_token() if token: source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" - active_sources.add(source_name) - pconfig = PROVIDER_REGISTRY.get(provider) - changed |= _upsert_entry( - entries, - provider, - source_name, - { - "source": source_name, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": token, - "base_url": pconfig.inference_base_url if pconfig else "", - "label": source, - }, - ) + if not _is_suppressed(provider, source_name): + active_sources.add(source_name) + pconfig = PROVIDER_REGISTRY.get(provider) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": token, + "base_url": pconfig.inference_base_url if pconfig else "", + "label": source, + }, + ) except Exception as exc: logger.debug("Copilot token seed failed: %s", exc) @@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup token = creds.get("api_key", "") if token: source_name = creds.get("source", "qwen-cli") - active_sources.add(source_name) - changed |= _upsert_entry( - entries, - provider, - source_name, - { - "source": source_name, - "auth_type": AUTH_TYPE_OAUTH, - "access_token": token, - "expires_at_ms": creds.get("expires_at_ms"), - "base_url": creds.get("base_url", ""), - "label": creds.get("auth_file", source_name), - }, - ) + if not _is_suppressed(provider, source_name): + active_sources.add(source_name) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_OAUTH, + "access_token": token, + "expires_at_ms": creds.get("expires_at_ms"), + "base_url": creds.get("base_url", ""), + "label": creds.get("auth_file", source_name), + }, + ) except Exception as exc: logger.debug("Qwen OAuth token seed failed: %s", exc) @@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup # the device_code source as suppressed so it won't be re-seeded from # the Hermes auth store. Without this gate the removal is instantly # undone on the next load_pool() call. - codex_suppressed = False - try: - from hermes_cli.auth import is_source_suppressed - codex_suppressed = is_source_suppressed(provider, "device_code") - except ImportError: - pass - if codex_suppressed: + if _is_suppressed(provider, "device_code"): return changed, active_sources state = _load_provider_state(auth_store, "openai-codex") @@ -1158,10 +1157,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]: changed = False active_sources: Set[str] = set() + # Honour user suppression — `hermes auth remove ` for an + # env-seeded credential marks the env: source as suppressed so it + # won't be re-seeded from the user's shell environment or ~/.hermes/.env. + # Without this gate the removal is silently undone on the next + # load_pool() call whenever the var is still exported by the shell. + try: + from hermes_cli.auth import is_source_suppressed as _is_source_suppressed + except ImportError: + def _is_source_suppressed(_p, _s): # type: ignore[misc] + return False if provider == "openrouter": token = os.getenv("OPENROUTER_API_KEY", "").strip() if token: source = "env:OPENROUTER_API_KEY" + if _is_source_suppressed(provider, source): + return changed, active_sources active_sources.add(source) changed |= _upsert_entry( entries, @@ -1198,6 +1209,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool if not token: continue source = f"env:{env_var}" + if _is_source_suppressed(provider, source): + continue active_sources.add(source) auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY base_url = env_url or pconfig.inference_base_url @@ -1242,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b changed = False active_sources: Set[str] = set() + # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons. + try: + from hermes_cli.auth import is_source_suppressed as _is_suppressed + except ImportError: + def _is_suppressed(_p, _s): # type: ignore[misc] + return False + # Seed from the custom_providers config entry's api_key field cp_config = _get_custom_provider_config(pool_key) if cp_config: @@ -1250,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b name = str(cp_config.get("name") or "").strip() if api_key: source = f"config:{name}" - active_sources.add(source) - changed |= _upsert_entry( - entries, - pool_key, - source, - { - "source": source, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": api_key, - "base_url": base_url, - "label": name or source, - }, - ) + if not _is_suppressed(pool_key, source): + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": api_key, + "base_url": base_url, + "label": name or source, + }, + ) # Seed from model.api_key if model.provider=='custom' and model.base_url matches try: @@ -1282,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b matched_key = get_custom_provider_pool_key(model_base_url) if matched_key == pool_key: source = "model_config" - active_sources.add(source) - changed |= _upsert_entry( - entries, - pool_key, - source, - { - "source": source, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": model_api_key, - "base_url": model_base_url, - "label": "model_config", - }, - ) + if not _is_suppressed(pool_key, source): + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": model_api_key, + "base_url": model_base_url, + "label": "model_config", + }, + ) except Exception: pass diff --git a/agent/credential_sources.py b/agent/credential_sources.py new file mode 100644 index 0000000000..8ad2fade0b --- /dev/null +++ b/agent/credential_sources.py @@ -0,0 +1,401 @@ +"""Unified removal contract for every credential source Hermes reads from. + +Hermes seeds its credential pool from many places: + + env: — os.environ / ~/.hermes/.env + claude_code — ~/.claude/.credentials.json + hermes_pkce — ~/.hermes/.anthropic_oauth.json + device_code — auth.json providers. (nous, openai-codex, ...) + qwen-cli — ~/.qwen/oauth_creds.json + gh_cli — gh auth token + config: — custom_providers config entry + model_config — model.api_key when model.provider == "custom" + manual — user ran `hermes auth add` + +Each source has its own reader inside ``agent.credential_pool._seed_from_*`` +(which keep their existing shape — we haven't restructured them). What we +unify here is **removal**: + + ``hermes auth remove `` must make the pool entry stay gone. + +Before this module, every source had an ad-hoc removal branch in +``auth_remove_command``, and several sources had no branch at all — so +``auth remove`` silently reverted on the next ``load_pool()`` call for +qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and +custom-config sources. + +Now every source registers a ``RemovalStep`` that does exactly three things +in the same shape: + + 1. Clean up whatever externally-readable state the source reads from + (.env line, auth.json block, OAuth file, etc.) + 2. Suppress the ``(provider, source_id)`` in auth.json so the + corresponding ``_seed_from_*`` branch skips the upsert on re-load + 3. Return ``RemovalResult`` describing what was cleaned and any + diagnostic hints the user should see (shell-exported env vars, + external credential files we deliberately don't delete, etc.) + +Adding a new credential source is: + - wire up a reader branch in ``_seed_from_*`` (existing pattern) + - gate that reader behind ``is_source_suppressed(provider, source_id)`` + - register a ``RemovalStep`` here + +No more per-source if/elif chain in ``auth_remove_command``. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, List, Optional + + +@dataclass +class RemovalResult: + """Outcome of removing a credential source. + + Attributes: + cleaned: Short strings describing external state that was actually + mutated (``"Cleared XAI_API_KEY from .env"``, + ``"Cleared openai-codex OAuth tokens from auth store"``). + Printed as plain lines to the user. + hints: Diagnostic lines ABOUT state the user may need to clean up + themselves or is deliberately left intact (shell-exported env + var, Claude Code credential file we don't delete, etc.). + Printed as plain lines to the user. Always non-destructive. + suppress: Whether to call ``suppress_credential_source`` after + cleanup so future ``load_pool`` calls skip this source. + Default True — almost every source needs this to stay sticky. + The only legitimate False is ``manual`` entries, which aren't + seeded from anywhere external. + """ + + cleaned: List[str] = field(default_factory=list) + hints: List[str] = field(default_factory=list) + suppress: bool = True + + +@dataclass +class RemovalStep: + """How to remove one specific credential source cleanly. + + Attributes: + provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...). + Special value ``"*"`` means "matches any provider" — used for + sources like ``manual`` that aren't provider-specific. + source_id: Source identifier as it appears in + ``PooledCredential.source``. May be a literal (``"claude_code"``) + or a prefix pattern matched via ``match_fn``. + match_fn: Optional predicate overriding literal ``source_id`` + matching. Gets the removed entry's source string. Used for + ``env:*`` (any env-seeded key), ``config:*`` (any custom + pool), and ``manual:*`` (any manual-source variant). + remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the + actual cleanup and returns what happened for the user. + description: One-line human-readable description for docs / tests. + """ + + provider: str + source_id: str + remove_fn: Callable[..., RemovalResult] + match_fn: Optional[Callable[[str], bool]] = None + description: str = "" + + def matches(self, provider: str, source: str) -> bool: + if self.provider != "*" and self.provider != provider: + return False + if self.match_fn is not None: + return self.match_fn(source) + return source == self.source_id + + +_REGISTRY: List[RemovalStep] = [] + + +def register(step: RemovalStep) -> RemovalStep: + _REGISTRY.append(step) + return step + + +def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]: + """Return the first matching RemovalStep, or None if unregistered. + + Unregistered sources fall through to the default remove path in + ``auth_remove_command``: the pool entry is already gone (that happens + before dispatch), no external cleanup, no suppression. This is the + correct behaviour for ``manual`` entries — they were only ever stored + in the pool, nothing external to clean up. + """ + for step in _REGISTRY: + if step.matches(provider, source): + return step + return None + + +# --------------------------------------------------------------------------- +# Individual RemovalStep implementations — one per source. +# --------------------------------------------------------------------------- +# Each remove_fn is intentionally small and single-purpose. Adding a new +# credential source means adding ONE entry here — no other changes to +# auth_remove_command. + + +def _remove_env_source(provider: str, removed) -> RemovalResult: + """env: — the most common case. + + Handles three user situations: + 1. Var lives only in ~/.hermes/.env → clear it + 2. Var lives only in the user's shell (shell profile, systemd + EnvironmentFile, launchd plist) → hint them where to unset it + 3. Var lives in both → clear from .env, hint about shell + """ + from hermes_cli.config import get_env_path, remove_env_value + + result = RemovalResult() + env_var = removed.source[len("env:"):] + if not env_var: + return result + + # Detect shell vs .env BEFORE remove_env_value pops os.environ. + env_in_process = bool(os.getenv(env_var)) + env_in_dotenv = False + try: + env_path = get_env_path() + if env_path.exists(): + env_in_dotenv = any( + line.strip().startswith(f"{env_var}=") + for line in env_path.read_text(errors="replace").splitlines() + ) + except OSError: + pass + shell_exported = env_in_process and not env_in_dotenv + + cleared = remove_env_value(env_var) + if cleared: + result.cleaned.append(f"Cleared {env_var} from .env") + + if shell_exported: + result.hints.extend([ + f"Note: {env_var} is still set in your shell environment " + f"(not in ~/.hermes/.env).", + " Unset it there (shell profile, systemd EnvironmentFile, " + "launchd plist, etc.) or it will keep being visible to Hermes.", + f" The pool entry is now suppressed — Hermes will ignore " + f"{env_var} until you run `hermes auth add {provider}`.", + ]) + else: + result.hints.append( + f"Suppressed env:{env_var} — it will not be re-seeded even " + f"if the variable is re-exported later." + ) + return result + + +def _remove_claude_code(provider: str, removed) -> RemovalResult: + """~/.claude/.credentials.json is owned by Claude Code itself. + + We don't delete it — the user's Claude Code install still needs to + work. We just suppress it so Hermes stops reading it. + """ + return RemovalResult(hints=[ + "Suppressed claude_code credential — it will not be re-seeded.", + "Note: Claude Code credentials still live in ~/.claude/.credentials.json", + "Run `hermes auth add anthropic` to re-enable if needed.", + ]) + + +def _remove_hermes_pkce(provider: str, removed) -> RemovalResult: + """~/.hermes/.anthropic_oauth.json is ours — delete it outright.""" + from hermes_constants import get_hermes_home + + result = RemovalResult() + oauth_file = get_hermes_home() / ".anthropic_oauth.json" + if oauth_file.exists(): + try: + oauth_file.unlink() + result.cleaned.append("Cleared Hermes Anthropic OAuth credentials") + except OSError as exc: + result.hints.append(f"Could not delete {oauth_file}: {exc}") + return result + + +def _clear_auth_store_provider(provider: str) -> bool: + """Delete auth_store.providers[provider]. Returns True if deleted.""" + from hermes_cli.auth import ( + _auth_store_lock, + _load_auth_store, + _save_auth_store, + ) + + with _auth_store_lock(): + auth_store = _load_auth_store() + providers_dict = auth_store.get("providers") + if isinstance(providers_dict, dict) and provider in providers_dict: + del providers_dict[provider] + _save_auth_store(auth_store) + return True + return False + + +def _remove_nous_device_code(provider: str, removed) -> RemovalResult: + """Nous OAuth lives in auth.json providers.nous — clear it and suppress. + + We suppress in addition to clearing because nothing else stops the + user's next `hermes login` run from writing providers.nous again + before they decide to. Suppression forces them to go through + `hermes auth add nous` to re-engage, which is the documented re-add + path and clears the suppression atomically. + """ + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + return result + + +def _remove_codex_device_code(provider: str, removed) -> RemovalResult: + """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json. + + refresh_codex_oauth_pure() writes both every time, so clearing only + the Hermes auth store is not enough — _seed_from_singletons() would + re-import from ~/.codex/auth.json on the next load_pool() call and + the removal would be instantly undone. We suppress instead of + deleting Codex CLI's file, so the Codex CLI itself keeps working. + + The canonical source name in ``_seed_from_singletons`` is + ``"device_code"`` (no prefix). Entries may show up in the pool as + either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added + via ``hermes auth add openai-codex``), but in both cases the re-seed + gate lives at the ``"device_code"`` suppression key. We suppress + that canonical key here; the central dispatcher also suppresses + ``removed.source`` which is fine — belt-and-suspenders, idempotent. + """ + from hermes_cli.auth import suppress_credential_source + + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + # Suppress the canonical re-seed source, not just whatever source the + # removed entry had. Otherwise `manual:device_code` removals wouldn't + # block the `device_code` re-seed path. + suppress_credential_source(provider, "device_code") + result.hints.extend([ + "Suppressed openai-codex device_code source — it will not be re-seeded.", + "Note: Codex CLI credentials still live in ~/.codex/auth.json", + "Run `hermes auth add openai-codex` to re-enable if needed.", + ]) + return result + + +def _remove_qwen_cli(provider: str, removed) -> RemovalResult: + """~/.qwen/oauth_creds.json is owned by the Qwen CLI. + + Same pattern as claude_code — suppress, don't delete. The user's + Qwen CLI install still reads from that file. + """ + return RemovalResult(hints=[ + "Suppressed qwen-cli credential — it will not be re-seeded.", + "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json", + "Run `hermes auth add qwen-oauth` to re-enable if needed.", + ]) + + +def _remove_copilot_gh(provider: str, removed) -> RemovalResult: + """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN. + + Copilot is special: the same token can be seeded as multiple source + entries (gh_cli from ``_seed_from_singletons`` plus env: from + ``_seed_from_env``), so removing one entry without suppressing the + others lets the duplicates resurrect. We suppress ALL known copilot + sources here so removal is stable regardless of which entry the + user clicked. + + We don't touch the user's gh CLI or shell state — just suppress so + Hermes stops picking the token up. + """ + # Suppress ALL copilot source variants up-front so no path resurrects + # the pool entry. The central dispatcher in auth_remove_command will + # ALSO suppress removed.source, but it's idempotent so double-calling + # is harmless. + from hermes_cli.auth import suppress_credential_source + suppress_credential_source(provider, "gh_cli") + for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + suppress_credential_source(provider, f"env:{env_var}") + + return RemovalResult(hints=[ + "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.", + "Note: Your gh CLI / shell environment is unchanged.", + "Run `hermes auth add copilot` to re-enable if needed.", + ]) + + +def _remove_custom_config(provider: str, removed) -> RemovalResult: + """Custom provider pools are seeded from custom_providers config or + model.api_key. Both are in config.yaml — modifying that from here + is more invasive than suppression. We suppress; the user can edit + config.yaml if they want to remove the key from disk entirely. + """ + source_label = removed.source + return RemovalResult(hints=[ + f"Suppressed {source_label} — it will not be re-seeded.", + "Note: The underlying value in config.yaml is unchanged. Edit it " + "directly if you want to remove the credential from disk.", + ]) + + +def _register_all_sources() -> None: + """Called once on module import. + + ORDER MATTERS — ``find_removal_step`` returns the first match. Put + provider-specific steps before the generic ``env:*`` step so that e.g. + copilot's ``env:GH_TOKEN`` goes through the copilot removal (which + doesn't touch the user's shell), not the generic env-var removal + (which would try to clear .env). + """ + register(RemovalStep( + provider="copilot", source_id="gh_cli", + match_fn=lambda src: src == "gh_cli" or src.startswith("env:"), + remove_fn=_remove_copilot_gh, + description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN", + )) + register(RemovalStep( + provider="*", source_id="env:", + match_fn=lambda src: src.startswith("env:"), + remove_fn=_remove_env_source, + description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)", + )) + register(RemovalStep( + provider="anthropic", source_id="claude_code", + remove_fn=_remove_claude_code, + description="~/.claude/.credentials.json", + )) + register(RemovalStep( + provider="anthropic", source_id="hermes_pkce", + remove_fn=_remove_hermes_pkce, + description="~/.hermes/.anthropic_oauth.json", + )) + register(RemovalStep( + provider="nous", source_id="device_code", + remove_fn=_remove_nous_device_code, + description="auth.json providers.nous", + )) + register(RemovalStep( + provider="openai-codex", source_id="device_code", + match_fn=lambda src: src == "device_code" or src.endswith(":device_code"), + remove_fn=_remove_codex_device_code, + description="auth.json providers.openai-codex + ~/.codex/auth.json", + )) + register(RemovalStep( + provider="qwen-oauth", source_id="qwen-cli", + remove_fn=_remove_qwen_cli, + description="~/.qwen/oauth_creds.json", + )) + register(RemovalStep( + provider="*", source_id="config:", + match_fn=lambda src: src.startswith("config:") or src == "model_config", + remove_fn=_remove_custom_config, + description="Custom provider config.yaml api_key field", + )) + + +_register_all_sources() diff --git a/agent/display.py b/agent/display.py index 3f1341485e..474595d76c 100644 --- a/agent/display.py +++ b/agent/display.py @@ -225,9 +225,11 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) - content = _oneline(args.get("content", "")) return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\"" elif action == "replace": - return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\"" + old = _oneline(args.get("old_text") or "") or "" + return f"~{target}: \"{old[:20]}\"" elif action == "remove": - return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\"" + old = _oneline(args.get("old_text") or "") or "" + return f"-{target}: \"{old[:20]}\"" return action if tool_name == "send_message": @@ -939,9 +941,13 @@ def get_cute_tool_message( if action == "add": return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}") elif action == "replace": - return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}") + old = args.get("old_text") or "" + old = old if old else "" + return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(old, 20)}\" {dur}") elif action == "remove": - return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}") + old = args.get("old_text") or "" + old = old if old else "" + return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(old, 20)}\" {dur}") return _wrap(f"┊ 🧠 memory {action} {dur}") if tool_name == "skills_list": return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}") diff --git a/agent/error_classifier.py b/agent/error_classifier.py index fa6a985041..14a2609d83 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -290,7 +290,7 @@ def classify_api_error( if isinstance(body, dict): _err_obj = body.get("error", {}) if isinstance(_err_obj, dict): - _body_msg = (_err_obj.get("message") or "").lower() + _body_msg = str(_err_obj.get("message") or "").lower() # Parse metadata.raw for wrapped provider errors _metadata = _err_obj.get("metadata", {}) if isinstance(_metadata, dict): @@ -302,11 +302,11 @@ def classify_api_error( if isinstance(_inner, dict): _inner_err = _inner.get("error", {}) if isinstance(_inner_err, dict): - _metadata_msg = (_inner_err.get("message") or "").lower() + _metadata_msg = str(_inner_err.get("message") or "").lower() except (json.JSONDecodeError, TypeError): pass if not _body_msg: - _body_msg = (body.get("message") or "").lower() + _body_msg = str(body.get("message") or "").lower() # Combine all message sources for pattern matching parts = [_raw_msg] if _body_msg and _body_msg not in _raw_msg: @@ -470,11 +470,16 @@ def _classify_by_status( retryable=False, should_fallback=True, ) - # Generic 404 — could be model or endpoint + # Generic 404 with no "model not found" signal — could be a wrong + # endpoint path (common with local llama.cpp / Ollama / vLLM when + # the URL is slightly misconfigured), a proxy routing glitch, or + # a transient backend issue. Classifying these as model_not_found + # silently falls back to a different provider and tells the model + # the model is missing, which is wrong and wastes a turn. Treat + # as unknown so the retry loop surfaces the real error instead. return result_fn( - FailoverReason.model_not_found, - retryable=False, - should_fallback=True, + FailoverReason.unknown, + retryable=True, ) if status_code == 413: @@ -606,10 +611,10 @@ def _classify_400( if isinstance(body, dict): err_obj = body.get("error", {}) if isinstance(err_obj, dict): - err_body_msg = (err_obj.get("message") or "").strip().lower() + err_body_msg = str(err_obj.get("message") or "").strip().lower() # Responses API (and some providers) use flat body: {"message": "..."} if not err_body_msg: - err_body_msg = (body.get("message") or "").strip().lower() + err_body_msg = str(body.get("message") or "").strip().lower() is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "") is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80 diff --git a/agent/file_safety.py b/agent/file_safety.py new file mode 100644 index 0000000000..09da46cafd --- /dev/null +++ b/agent/file_safety.py @@ -0,0 +1,111 @@ +"""Shared file safety rules used by both tools and ACP shims.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Optional + + +def _hermes_home_path() -> Path: + """Resolve the active HERMES_HOME (profile-aware) without circular imports.""" + try: + from hermes_constants import get_hermes_home # local import to avoid cycles + return get_hermes_home() + except Exception: + return Path(os.path.expanduser("~/.hermes")) + + +def build_write_denied_paths(home: str) -> set[str]: + """Return exact sensitive paths that must never be written.""" + hermes_home = _hermes_home_path() + return { + os.path.realpath(p) + for p in [ + os.path.join(home, ".ssh", "authorized_keys"), + os.path.join(home, ".ssh", "id_rsa"), + os.path.join(home, ".ssh", "id_ed25519"), + os.path.join(home, ".ssh", "config"), + str(hermes_home / ".env"), + os.path.join(home, ".bashrc"), + os.path.join(home, ".zshrc"), + os.path.join(home, ".profile"), + os.path.join(home, ".bash_profile"), + os.path.join(home, ".zprofile"), + os.path.join(home, ".netrc"), + os.path.join(home, ".pgpass"), + os.path.join(home, ".npmrc"), + os.path.join(home, ".pypirc"), + "/etc/sudoers", + "/etc/passwd", + "/etc/shadow", + ] + } + + +def build_write_denied_prefixes(home: str) -> list[str]: + """Return sensitive directory prefixes that must never be written.""" + return [ + os.path.realpath(p) + os.sep + for p in [ + os.path.join(home, ".ssh"), + os.path.join(home, ".aws"), + os.path.join(home, ".gnupg"), + os.path.join(home, ".kube"), + "/etc/sudoers.d", + "/etc/systemd", + os.path.join(home, ".docker"), + os.path.join(home, ".azure"), + os.path.join(home, ".config", "gh"), + ] + ] + + +def get_safe_write_root() -> Optional[str]: + """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset.""" + root = os.getenv("HERMES_WRITE_SAFE_ROOT", "") + if not root: + return None + try: + return os.path.realpath(os.path.expanduser(root)) + except Exception: + return None + + +def is_write_denied(path: str) -> bool: + """Return True if path is blocked by the write denylist or safe root.""" + home = os.path.realpath(os.path.expanduser("~")) + resolved = os.path.realpath(os.path.expanduser(str(path))) + + if resolved in build_write_denied_paths(home): + return True + for prefix in build_write_denied_prefixes(home): + if resolved.startswith(prefix): + return True + + safe_root = get_safe_write_root() + if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)): + return True + + return False + + +def get_read_block_error(path: str) -> Optional[str]: + """Return an error message when a read targets internal Hermes cache files.""" + resolved = Path(path).expanduser().resolve() + hermes_home = _hermes_home_path().resolve() + blocked_dirs = [ + hermes_home / "skills" / ".hub" / "index-cache", + hermes_home / "skills" / ".hub", + ] + for blocked in blocked_dirs: + try: + resolved.relative_to(blocked) + except ValueError: + continue + return ( + f"Access denied: {path} is an internal Hermes cache file " + "and cannot be read directly to prevent prompt injection. " + "Use the skills_list or skill_view tools instead." + ) + return None diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py index ed687bffd6..24866c3a53 100644 --- a/agent/gemini_cloudcode_adapter.py +++ b/agent/gemini_cloudcode_adapter.py @@ -39,6 +39,7 @@ from typing import Any, Dict, Iterator, List, Optional import httpx from agent import google_oauth +from agent.gemini_schema import sanitize_gemini_tool_parameters from agent.google_code_assist import ( CODE_ASSIST_ENDPOINT, FREE_TIER_ID, @@ -205,7 +206,7 @@ def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]: decl["description"] = str(fn["description"]) params = fn.get("parameters") if isinstance(params, dict): - decl["parameters"] = params + decl["parameters"] = sanitize_gemini_tool_parameters(params) declarations.append(decl) if not declarations: return [] @@ -504,9 +505,16 @@ def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]: def _translate_stream_event( event: Dict[str, Any], model: str, - tool_call_indices: Dict[str, int], + tool_call_counter: List[int], ) -> List[_GeminiStreamChunk]: - """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).""" + """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s). + + ``tool_call_counter`` is a single-element list used as a mutable counter + across events in the same stream. Each ``functionCall`` part gets a + fresh, unique OpenAI ``index`` — keying by function name would collide + whenever the model issues parallel calls to the same tool (e.g. reading + three files in one turn). + """ inner = event.get("response") if isinstance(event.get("response"), dict) else event candidates = inner.get("candidates") or [] if not candidates: @@ -532,7 +540,8 @@ def _translate_stream_event( fc = part.get("functionCall") if isinstance(fc, dict) and fc.get("name"): name = str(fc["name"]) - idx = tool_call_indices.setdefault(name, len(tool_call_indices)) + idx = tool_call_counter[0] + tool_call_counter[0] += 1 try: args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False) except (TypeError, ValueError): @@ -549,7 +558,7 @@ def _translate_stream_event( finish_reason_raw = str(cand.get("finishReason") or "") if finish_reason_raw: mapped = _map_gemini_finish_reason(finish_reason_raw) - if tool_call_indices: + if tool_call_counter[0] > 0: mapped = "tool_calls" chunks.append(_make_stream_chunk(model=model, finish_reason=mapped)) return chunks @@ -733,9 +742,9 @@ class GeminiCloudCodeClient: # Materialize error body for better diagnostics response.read() raise _gemini_http_error(response) - tool_call_indices: Dict[str, int] = {} + tool_call_counter: List[int] = [0] for event in _iter_sse_events(response): - for chunk in _translate_stream_event(event, model, tool_call_indices): + for chunk in _translate_stream_event(event, model, tool_call_counter): yield chunk except httpx.HTTPError as exc: raise CodeAssistError( @@ -790,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError: err_obj = {} err_status = str(err_obj.get("status") or "").strip() err_message = str(err_obj.get("message") or "").strip() - err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else [] + _raw_details = err_obj.get("details") + err_details_list = _raw_details if isinstance(_raw_details, list) else [] # Extract google.rpc.ErrorInfo reason + metadata. There may be more # than one ErrorInfo (rare), so we pick the first one with a reason. diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py new file mode 100644 index 0000000000..406e4a19b7 --- /dev/null +++ b/agent/gemini_native_adapter.py @@ -0,0 +1,847 @@ +"""OpenAI-compatible facade over Google AI Studio's native Gemini API. + +Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the +main agent loop can keep using its existing OpenAI-shaped message flow. +This adapter is the transport shim that converts those OpenAI-style +``messages[]`` / ``tools[]`` requests into Gemini's native +``models/{model}:generateContent`` schema and converts the responses back. + +Why this exists +--------------- +Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn +agent/tool loop (auth churn, tool-call replay quirks, thought-signature +requirements). The native Gemini API is the canonical path and avoids the +OpenAI-compat layer entirely. +""" + +from __future__ import annotations + +import asyncio +import base64 +import json +import logging +import time +import uuid +from types import SimpleNamespace +from typing import Any, Dict, Iterator, List, Optional + +import httpx + +from agent.gemini_schema import sanitize_gemini_tool_parameters + +logger = logging.getLogger(__name__) + +DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta" + + +def is_native_gemini_base_url(base_url: str) -> bool: + """Return True when the endpoint speaks Gemini's native REST API.""" + normalized = str(base_url or "").strip().rstrip("/").lower() + if not normalized: + return False + if "generativelanguage.googleapis.com" not in normalized: + return False + return not normalized.endswith("/openai") + + +class GeminiAPIError(Exception): + """Error shape compatible with Hermes retry/error classification.""" + + def __init__( + self, + message: str, + *, + code: str = "gemini_api_error", + status_code: Optional[int] = None, + response: Optional[httpx.Response] = None, + retry_after: Optional[float] = None, + details: Optional[Dict[str, Any]] = None, + ) -> None: + super().__init__(message) + self.code = code + self.status_code = status_code + self.response = response + self.retry_after = retry_after + self.details = details or {} + + +def _coerce_content_to_text(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content + if isinstance(content, list): + pieces: List[str] = [] + for part in content: + if isinstance(part, str): + pieces.append(part) + elif isinstance(part, dict) and part.get("type") == "text": + text = part.get("text") + if isinstance(text, str): + pieces.append(text) + return "\n".join(pieces) + return str(content) + + +def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]: + if not isinstance(content, list): + text = _coerce_content_to_text(content) + return [{"text": text}] if text else [] + + parts: List[Dict[str, Any]] = [] + for item in content: + if isinstance(item, str): + parts.append({"text": item}) + continue + if not isinstance(item, dict): + continue + ptype = item.get("type") + if ptype == "text": + text = item.get("text") + if isinstance(text, str) and text: + parts.append({"text": text}) + elif ptype == "image_url": + url = ((item.get("image_url") or {}).get("url") or "") + if not isinstance(url, str) or not url.startswith("data:"): + continue + try: + header, encoded = url.split(",", 1) + mime = header.split(":", 1)[1].split(";", 1)[0] + raw = base64.b64decode(encoded) + except Exception: + continue + parts.append( + { + "inlineData": { + "mimeType": mime, + "data": base64.b64encode(raw).decode("ascii"), + } + } + ) + return parts + + +def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]: + extra = tool_call.get("extra_content") or {} + if not isinstance(extra, dict): + return None + google = extra.get("google") or extra.get("thought_signature") + if isinstance(google, dict): + sig = google.get("thought_signature") or google.get("thoughtSignature") + return str(sig) if isinstance(sig, str) and sig else None + if isinstance(google, str) and google: + return google + return None + + +def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]: + fn = tool_call.get("function") or {} + args_raw = fn.get("arguments", "") + try: + args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {} + except json.JSONDecodeError: + args = {"_raw": args_raw} + if not isinstance(args, dict): + args = {"_value": args} + + part: Dict[str, Any] = { + "functionCall": { + "name": str(fn.get("name") or ""), + "args": args, + } + } + thought_signature = _tool_call_extra_signature(tool_call) + if thought_signature: + part["thoughtSignature"] = thought_signature + return part + + +def _translate_tool_result_to_gemini( + message: Dict[str, Any], + tool_name_by_call_id: Optional[Dict[str, str]] = None, +) -> Dict[str, Any]: + tool_name_by_call_id = tool_name_by_call_id or {} + tool_call_id = str(message.get("tool_call_id") or "") + name = str( + message.get("name") + or tool_name_by_call_id.get(tool_call_id) + or tool_call_id + or "tool" + ) + content = _coerce_content_to_text(message.get("content")) + try: + parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None + except json.JSONDecodeError: + parsed = None + response = parsed if isinstance(parsed, dict) else {"output": content} + return { + "functionResponse": { + "name": name, + "response": response, + } + } + + +def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]: + system_text_parts: List[str] = [] + contents: List[Dict[str, Any]] = [] + tool_name_by_call_id: Dict[str, str] = {} + + for msg in messages: + if not isinstance(msg, dict): + continue + role = str(msg.get("role") or "user") + + if role == "system": + system_text_parts.append(_coerce_content_to_text(msg.get("content"))) + continue + + if role in {"tool", "function"}: + contents.append( + { + "role": "user", + "parts": [ + _translate_tool_result_to_gemini( + msg, + tool_name_by_call_id=tool_name_by_call_id, + ) + ], + } + ) + continue + + gemini_role = "model" if role == "assistant" else "user" + parts: List[Dict[str, Any]] = [] + + content_parts = _extract_multimodal_parts(msg.get("content")) + parts.extend(content_parts) + + tool_calls = msg.get("tool_calls") or [] + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if isinstance(tool_call, dict): + tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "") + tool_name = str(((tool_call.get("function") or {}).get("name") or "")) + if tool_call_id and tool_name: + tool_name_by_call_id[tool_call_id] = tool_name + parts.append(_translate_tool_call_to_gemini(tool_call)) + + if parts: + contents.append({"role": gemini_role, "parts": parts}) + + system_instruction = None + joined_system = "\n".join(part for part in system_text_parts if part).strip() + if joined_system: + system_instruction = {"parts": [{"text": joined_system}]} + return contents, system_instruction + + +def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]: + if not isinstance(tools, list): + return [] + declarations: List[Dict[str, Any]] = [] + for tool in tools: + if not isinstance(tool, dict): + continue + fn = tool.get("function") or {} + if not isinstance(fn, dict): + continue + name = fn.get("name") + if not isinstance(name, str) or not name: + continue + decl: Dict[str, Any] = {"name": name} + description = fn.get("description") + if isinstance(description, str) and description: + decl["description"] = description + parameters = fn.get("parameters") + if isinstance(parameters, dict): + decl["parameters"] = sanitize_gemini_tool_parameters(parameters) + declarations.append(decl) + return [{"functionDeclarations": declarations}] if declarations else [] + + +def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]: + if tool_choice is None: + return None + if isinstance(tool_choice, str): + if tool_choice == "auto": + return {"functionCallingConfig": {"mode": "AUTO"}} + if tool_choice == "required": + return {"functionCallingConfig": {"mode": "ANY"}} + if tool_choice == "none": + return {"functionCallingConfig": {"mode": "NONE"}} + if isinstance(tool_choice, dict): + fn = tool_choice.get("function") or {} + name = fn.get("name") + if isinstance(name, str) and name: + return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}} + return None + + +def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]: + if not isinstance(config, dict) or not config: + return None + budget = config.get("thinkingBudget", config.get("thinking_budget")) + include = config.get("includeThoughts", config.get("include_thoughts")) + level = config.get("thinkingLevel", config.get("thinking_level")) + normalized: Dict[str, Any] = {} + if isinstance(budget, (int, float)): + normalized["thinkingBudget"] = int(budget) + if isinstance(include, bool): + normalized["includeThoughts"] = include + if isinstance(level, str) and level.strip(): + normalized["thinkingLevel"] = level.strip().lower() + return normalized or None + + +def build_gemini_request( + *, + messages: List[Dict[str, Any]], + tools: Any = None, + tool_choice: Any = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + top_p: Optional[float] = None, + stop: Any = None, + thinking_config: Any = None, +) -> Dict[str, Any]: + contents, system_instruction = _build_gemini_contents(messages) + request: Dict[str, Any] = {"contents": contents} + if system_instruction: + request["systemInstruction"] = system_instruction + + gemini_tools = _translate_tools_to_gemini(tools) + if gemini_tools: + request["tools"] = gemini_tools + + tool_config = _translate_tool_choice_to_gemini(tool_choice) + if tool_config: + request["toolConfig"] = tool_config + + generation_config: Dict[str, Any] = {} + if temperature is not None: + generation_config["temperature"] = temperature + if max_tokens is not None: + generation_config["maxOutputTokens"] = max_tokens + if top_p is not None: + generation_config["topP"] = top_p + if stop: + generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)] + normalized_thinking = _normalize_thinking_config(thinking_config) + if normalized_thinking: + generation_config["thinkingConfig"] = normalized_thinking + if generation_config: + request["generationConfig"] = generation_config + + return request + + +def _map_gemini_finish_reason(reason: str) -> str: + mapping = { + "STOP": "stop", + "MAX_TOKENS": "length", + "SAFETY": "content_filter", + "RECITATION": "content_filter", + "OTHER": "stop", + } + return mapping.get(str(reason or "").upper(), "stop") + + +def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]: + sig = part.get("thoughtSignature") + if isinstance(sig, str) and sig: + return {"google": {"thought_signature": sig}} + return None + + +def _empty_response(model: str) -> SimpleNamespace: + message = SimpleNamespace( + role="assistant", + content="", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=None, + ) + choice = SimpleNamespace(index=0, message=message, finish_reason="stop") + usage = SimpleNamespace( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0, + prompt_tokens_details=SimpleNamespace(cached_tokens=0), + ) + return SimpleNamespace( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion", + created=int(time.time()), + model=model, + choices=[choice], + usage=usage, + ) + + +def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace: + candidates = resp.get("candidates") or [] + if not isinstance(candidates, list) or not candidates: + return _empty_response(model) + + cand = candidates[0] if isinstance(candidates[0], dict) else {} + content_obj = cand.get("content") if isinstance(cand, dict) else {} + parts = content_obj.get("parts") if isinstance(content_obj, dict) else [] + + text_pieces: List[str] = [] + reasoning_pieces: List[str] = [] + tool_calls: List[SimpleNamespace] = [] + + for index, part in enumerate(parts or []): + if not isinstance(part, dict): + continue + if part.get("thought") is True and isinstance(part.get("text"), str): + reasoning_pieces.append(part["text"]) + continue + if isinstance(part.get("text"), str): + text_pieces.append(part["text"]) + continue + fc = part.get("functionCall") + if isinstance(fc, dict) and fc.get("name"): + try: + args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False) + except (TypeError, ValueError): + args_str = "{}" + tool_call = SimpleNamespace( + id=f"call_{uuid.uuid4().hex[:12]}", + type="function", + index=index, + function=SimpleNamespace(name=str(fc["name"]), arguments=args_str), + ) + extra_content = _tool_call_extra_from_part(part) + if extra_content: + tool_call.extra_content = extra_content + tool_calls.append(tool_call) + + finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or "")) + usage_meta = resp.get("usageMetadata") or {} + usage = SimpleNamespace( + prompt_tokens=int(usage_meta.get("promptTokenCount") or 0), + completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0), + total_tokens=int(usage_meta.get("totalTokenCount") or 0), + prompt_tokens_details=SimpleNamespace( + cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0), + ), + ) + reasoning = "".join(reasoning_pieces) or None + message = SimpleNamespace( + role="assistant", + content="".join(text_pieces) if text_pieces else None, + tool_calls=tool_calls or None, + reasoning=reasoning, + reasoning_content=reasoning, + reasoning_details=None, + ) + choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason) + return SimpleNamespace( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion", + created=int(time.time()), + model=model, + choices=[choice], + usage=usage, + ) + + +class _GeminiStreamChunk(SimpleNamespace): + pass + + +def _make_stream_chunk( + *, + model: str, + content: str = "", + tool_call_delta: Optional[Dict[str, Any]] = None, + finish_reason: Optional[str] = None, + reasoning: str = "", +) -> _GeminiStreamChunk: + delta_kwargs: Dict[str, Any] = { + "role": "assistant", + "content": None, + "tool_calls": None, + "reasoning": None, + "reasoning_content": None, + } + if content: + delta_kwargs["content"] = content + if tool_call_delta is not None: + tool_delta = SimpleNamespace( + index=tool_call_delta.get("index", 0), + id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}", + type="function", + function=SimpleNamespace( + name=tool_call_delta.get("name") or "", + arguments=tool_call_delta.get("arguments") or "", + ), + ) + extra_content = tool_call_delta.get("extra_content") + if isinstance(extra_content, dict): + tool_delta.extra_content = extra_content + delta_kwargs["tool_calls"] = [tool_delta] + if reasoning: + delta_kwargs["reasoning"] = reasoning + delta_kwargs["reasoning_content"] = reasoning + delta = SimpleNamespace(**delta_kwargs) + choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason) + return _GeminiStreamChunk( + id=f"chatcmpl-{uuid.uuid4().hex[:12]}", + object="chat.completion.chunk", + created=int(time.time()), + model=model, + choices=[choice], + usage=None, + ) + + +def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]: + buffer = "" + for chunk in response.iter_text(): + if not chunk: + continue + buffer += chunk + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.rstrip("\r") + if not line: + continue + if not line.startswith("data: "): + continue + data = line[6:] + if data == "[DONE]": + return + try: + payload = json.loads(data) + except json.JSONDecodeError: + logger.debug("Non-JSON Gemini SSE line: %s", data[:200]) + continue + if isinstance(payload, dict): + yield payload + + +def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]: + candidates = event.get("candidates") or [] + if not candidates: + return [] + cand = candidates[0] if isinstance(candidates[0], dict) else {} + parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else [] + chunks: List[_GeminiStreamChunk] = [] + + for part_index, part in enumerate(parts): + if not isinstance(part, dict): + continue + if part.get("thought") is True and isinstance(part.get("text"), str): + chunks.append(_make_stream_chunk(model=model, reasoning=part["text"])) + continue + if isinstance(part.get("text"), str) and part["text"]: + chunks.append(_make_stream_chunk(model=model, content=part["text"])) + fc = part.get("functionCall") + if isinstance(fc, dict) and fc.get("name"): + name = str(fc["name"]) + try: + args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True) + except (TypeError, ValueError): + args_str = "{}" + thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else "" + call_key = json.dumps( + { + "part_index": part_index, + "name": name, + "thought_signature": thought_signature, + }, + sort_keys=True, + ) + slot = tool_call_indices.get(call_key) + if slot is None: + slot = { + "index": len(tool_call_indices), + "id": f"call_{uuid.uuid4().hex[:12]}", + "last_arguments": "", + } + tool_call_indices[call_key] = slot + emitted_arguments = args_str + last_arguments = str(slot.get("last_arguments") or "") + if last_arguments: + if args_str == last_arguments: + emitted_arguments = "" + elif args_str.startswith(last_arguments): + emitted_arguments = args_str[len(last_arguments):] + slot["last_arguments"] = args_str + chunks.append( + _make_stream_chunk( + model=model, + tool_call_delta={ + "index": slot["index"], + "id": slot["id"], + "name": name, + "arguments": emitted_arguments, + "extra_content": _tool_call_extra_from_part(part), + }, + ) + ) + + finish_reason_raw = str(cand.get("finishReason") or "") + if finish_reason_raw: + mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw) + chunks.append(_make_stream_chunk(model=model, finish_reason=mapped)) + return chunks + + +def gemini_http_error(response: httpx.Response) -> GeminiAPIError: + status = response.status_code + body_text = "" + body_json: Dict[str, Any] = {} + try: + body_text = response.text + except Exception: + body_text = "" + if body_text: + try: + parsed = json.loads(body_text) + if isinstance(parsed, dict): + body_json = parsed + except (ValueError, TypeError): + body_json = {} + + err_obj = body_json.get("error") if isinstance(body_json, dict) else None + if not isinstance(err_obj, dict): + err_obj = {} + err_status = str(err_obj.get("status") or "").strip() + err_message = str(err_obj.get("message") or "").strip() + _raw_details = err_obj.get("details") + details_list = _raw_details if isinstance(_raw_details, list) else [] + + reason = "" + retry_after: Optional[float] = None + metadata: Dict[str, Any] = {} + for detail in details_list: + if not isinstance(detail, dict): + continue + type_url = str(detail.get("@type") or "") + if not reason and type_url.endswith("/google.rpc.ErrorInfo"): + reason_value = detail.get("reason") + if isinstance(reason_value, str): + reason = reason_value + md = detail.get("metadata") + if isinstance(md, dict): + metadata = md + header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after") + if header_retry: + try: + retry_after = float(header_retry) + except (TypeError, ValueError): + retry_after = None + + code = f"gemini_http_{status}" + if status == 401: + code = "gemini_unauthorized" + elif status == 429: + code = "gemini_rate_limited" + elif status == 404: + code = "gemini_model_not_found" + + if err_message: + message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}" + else: + message = f"Gemini returned HTTP {status}: {body_text[:500]}" + + return GeminiAPIError( + message, + code=code, + status_code=status, + response=response, + retry_after=retry_after, + details={ + "status": err_status, + "reason": reason, + "metadata": metadata, + "message": err_message, + }, + ) + + +class _GeminiChatCompletions: + def __init__(self, client: "GeminiNativeClient"): + self._client = client + + def create(self, **kwargs: Any) -> Any: + return self._client._create_chat_completion(**kwargs) + + +class _AsyncGeminiChatCompletions: + def __init__(self, client: "AsyncGeminiNativeClient"): + self._client = client + + async def create(self, **kwargs: Any) -> Any: + return await self._client._create_chat_completion(**kwargs) + + +class _GeminiChatNamespace: + def __init__(self, client: "GeminiNativeClient"): + self.completions = _GeminiChatCompletions(client) + + +class _AsyncGeminiChatNamespace: + def __init__(self, client: "AsyncGeminiNativeClient"): + self.completions = _AsyncGeminiChatCompletions(client) + + +class GeminiNativeClient: + """Minimal OpenAI-SDK-compatible facade over Gemini's native REST API.""" + + def __init__( + self, + *, + api_key: str, + base_url: Optional[str] = None, + default_headers: Optional[Dict[str, str]] = None, + timeout: Any = None, + http_client: Optional[httpx.Client] = None, + **_: Any, + ) -> None: + self.api_key = api_key + normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/") + if normalized_base.endswith("/openai"): + normalized_base = normalized_base[: -len("/openai")] + self.base_url = normalized_base + self._default_headers = dict(default_headers or {}) + self.chat = _GeminiChatNamespace(self) + self.is_closed = False + self._http = http_client or httpx.Client( + timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0) + ) + + def close(self) -> None: + self.is_closed = True + try: + self._http.close() + except Exception: + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def _headers(self) -> Dict[str, str]: + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "x-goog-api-key": self.api_key, + "User-Agent": "hermes-agent (gemini-native)", + } + headers.update(self._default_headers) + return headers + + @staticmethod + def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]: + try: + return False, next(iterator) + except StopIteration: + return True, None + + def _create_chat_completion( + self, + *, + model: str = "gemini-2.5-flash", + messages: Optional[List[Dict[str, Any]]] = None, + stream: bool = False, + tools: Any = None, + tool_choice: Any = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + top_p: Optional[float] = None, + stop: Any = None, + extra_body: Optional[Dict[str, Any]] = None, + timeout: Any = None, + **_: Any, + ) -> Any: + thinking_config = None + if isinstance(extra_body, dict): + thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig") + + request = build_gemini_request( + messages=messages or [], + tools=tools, + tool_choice=tool_choice, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + stop=stop, + thinking_config=thinking_config, + ) + + if stream: + return self._stream_completion(model=model, request=request, timeout=timeout) + + url = f"{self.base_url}/models/{model}:generateContent" + response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout) + if response.status_code != 200: + raise gemini_http_error(response) + try: + payload = response.json() + except ValueError as exc: + raise GeminiAPIError( + f"Invalid JSON from Gemini native API: {exc}", + code="gemini_invalid_json", + status_code=response.status_code, + response=response, + ) from exc + return translate_gemini_response(payload, model=model) + + def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]: + url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse" + stream_headers = dict(self._headers()) + stream_headers["Accept"] = "text/event-stream" + + def _generator() -> Iterator[_GeminiStreamChunk]: + try: + with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response: + if response.status_code != 200: + response.read() + raise gemini_http_error(response) + tool_call_indices: Dict[str, Dict[str, Any]] = {} + for event in _iter_sse_events(response): + for chunk in translate_stream_event(event, model, tool_call_indices): + yield chunk + except httpx.HTTPError as exc: + raise GeminiAPIError( + f"Gemini streaming request failed: {exc}", + code="gemini_stream_error", + ) from exc + + return _generator() + + +class AsyncGeminiNativeClient: + """Async wrapper used by auxiliary_client for native Gemini calls.""" + + def __init__(self, sync_client: GeminiNativeClient): + self._sync = sync_client + self.api_key = sync_client.api_key + self.base_url = sync_client.base_url + self.chat = _AsyncGeminiChatNamespace(self) + + async def _create_chat_completion(self, **kwargs: Any) -> Any: + stream = bool(kwargs.get("stream")) + result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs) + if not stream: + return result + + async def _async_stream() -> Any: + while True: + done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result) + if done: + break + yield chunk + + return _async_stream() + + async def close(self) -> None: + await asyncio.to_thread(self._sync.close) diff --git a/agent/gemini_schema.py b/agent/gemini_schema.py new file mode 100644 index 0000000000..904c99d31b --- /dev/null +++ b/agent/gemini_schema.py @@ -0,0 +1,85 @@ +"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset.""" + +from __future__ import annotations + +from typing import Any, Dict, List + +# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema`` +# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields +# outside that subset before sending Hermes tool schemas to Google. +_GEMINI_SCHEMA_ALLOWED_KEYS = { + "type", + "format", + "title", + "description", + "nullable", + "enum", + "maxItems", + "minItems", + "properties", + "required", + "minProperties", + "maxProperties", + "minLength", + "maxLength", + "pattern", + "example", + "anyOf", + "propertyOrdering", + "default", + "items", + "minimum", + "maximum", +} + + +def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]: + """Return a Gemini-compatible copy of a tool parameter schema. + + Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys + such as ``$schema`` or ``additionalProperties`` that Google's Gemini + ``Schema`` object rejects. This helper preserves the documented Gemini + subset and recursively sanitizes nested ``properties`` / ``items`` / + ``anyOf`` definitions. + """ + + if not isinstance(schema, dict): + return {} + + cleaned: Dict[str, Any] = {} + for key, value in schema.items(): + if key not in _GEMINI_SCHEMA_ALLOWED_KEYS: + continue + if key == "properties": + if not isinstance(value, dict): + continue + props: Dict[str, Any] = {} + for prop_name, prop_schema in value.items(): + if not isinstance(prop_name, str): + continue + props[prop_name] = sanitize_gemini_schema(prop_schema) + cleaned[key] = props + continue + if key == "items": + cleaned[key] = sanitize_gemini_schema(value) + continue + if key == "anyOf": + if not isinstance(value, list): + continue + cleaned[key] = [ + sanitize_gemini_schema(item) + for item in value + if isinstance(item, dict) + ] + continue + cleaned[key] = value + return cleaned + + +def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]: + """Normalize tool parameters to a valid Gemini object schema.""" + + cleaned = sanitize_gemini_schema(parameters) + if not cleaned: + return {"type": "object", "properties": {}} + return cleaned diff --git a/agent/image_gen_provider.py b/agent/image_gen_provider.py new file mode 100644 index 0000000000..47f65c1b34 --- /dev/null +++ b/agent/image_gen_provider.py @@ -0,0 +1,242 @@ +""" +Image Generation Provider ABC +============================= + +Defines the pluggable-backend interface for image generation. Providers register +instances via ``PluginContext.register_image_gen_provider()``; the active one +(selected via ``image_gen.provider`` in ``config.yaml``) services every +``image_generate`` tool call. + +Providers live in ``/plugins/image_gen//`` (built-in, auto-loaded +as ``kind: backend``) or ``~/.hermes/plugins/image_gen//`` (user, opt-in +via ``plugins.enabled``). + +Response shape +-------------- +All providers return a dict that :func:`success_response` / :func:`error_response` +produce. The tool wrapper JSON-serializes it. Keys: + + success bool + image str | None URL or absolute file path + model str provider-specific model identifier + prompt str echoed prompt + aspect_ratio str "landscape" | "square" | "portrait" + provider str provider name (for diagnostics) + error str only when success=False + error_type str only when success=False +""" + +from __future__ import annotations + +import abc +import base64 +import datetime +import logging +import uuid +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait") +DEFAULT_ASPECT_RATIO = "landscape" + + +# --------------------------------------------------------------------------- +# ABC +# --------------------------------------------------------------------------- + + +class ImageGenProvider(abc.ABC): + """Abstract base class for an image generation backend. + + Subclasses must implement :meth:`generate`. Everything else has sane + defaults — override only what your provider needs. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """Stable short identifier used in ``image_gen.provider`` config. + + Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``. + """ + + @property + def display_name(self) -> str: + """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``.""" + return self.name.title() + + def is_available(self) -> bool: + """Return True when this provider can service calls. + + Typically checks for a required API key. Default: True + (providers with no external dependencies are always available). + """ + return True + + def list_models(self) -> List[Dict[str, Any]]: + """Return catalog entries for ``hermes tools`` model picker. + + Each entry:: + + { + "id": "gpt-image-1.5", # required + "display": "GPT Image 1.5", # optional; defaults to id + "speed": "~10s", # optional + "strengths": "...", # optional + "price": "$...", # optional + } + + Default: empty list (provider has no user-selectable models). + """ + return [] + + def get_setup_schema(self) -> Dict[str, Any]: + """Return provider metadata for the ``hermes tools`` picker. + + Used by ``tools_config.py`` to inject this provider as a row in + the Image Generation provider list. Shape:: + + { + "name": "OpenAI", # picker label + "badge": "paid", # optional short tag + "tag": "One-line description...", # optional subtitle + "env_vars": [ # keys to prompt for + {"key": "OPENAI_API_KEY", + "prompt": "OpenAI API key", + "url": "https://platform.openai.com/api-keys"}, + ], + } + + Default: minimal entry derived from ``display_name``. Override to + expose API key prompts and custom badges. + """ + return { + "name": self.display_name, + "badge": "", + "tag": "", + "env_vars": [], + } + + def default_model(self) -> Optional[str]: + """Return the default model id, or None if not applicable.""" + models = self.list_models() + if models: + return models[0].get("id") + return None + + @abc.abstractmethod + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + """Generate an image. + + Implementations should return the dict from :func:`success_response` + or :func:`error_response`. ``kwargs`` may contain forward-compat + parameters future versions of the schema will expose — implementations + should ignore unknown keys. + """ + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def resolve_aspect_ratio(value: Optional[str]) -> str: + """Clamp an aspect_ratio value to the valid set, defaulting to landscape. + + Invalid values are coerced rather than rejected so the tool surface is + forgiving of agent mistakes. + """ + if not isinstance(value, str): + return DEFAULT_ASPECT_RATIO + v = value.strip().lower() + if v in VALID_ASPECT_RATIOS: + return v + return DEFAULT_ASPECT_RATIO + + +def _images_cache_dir() -> Path: + """Return ``$HERMES_HOME/cache/images/``, creating parents as needed.""" + from hermes_constants import get_hermes_home + + path = get_hermes_home() / "cache" / "images" + path.mkdir(parents=True, exist_ok=True) + return path + + +def save_b64_image( + b64_data: str, + *, + prefix: str = "image", + extension: str = "png", +) -> Path: + """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``. + + Returns the absolute :class:`Path` to the saved file. + + Filename format: ``__.``. + """ + raw = base64.b64decode(b64_data) + ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + short = uuid.uuid4().hex[:8] + path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}" + path.write_bytes(raw) + return path + + +def success_response( + *, + image: str, + model: str, + prompt: str, + aspect_ratio: str, + provider: str, + extra: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """Build a uniform success response dict. + + ``image`` may be an HTTP URL or an absolute filesystem path (for b64 + providers like OpenAI). Callers that need to pass through additional + backend-specific fields can supply ``extra``. + """ + payload: Dict[str, Any] = { + "success": True, + "image": image, + "model": model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "provider": provider, + } + if extra: + for k, v in extra.items(): + payload.setdefault(k, v) + return payload + + +def error_response( + *, + error: str, + error_type: str = "provider_error", + provider: str = "", + model: str = "", + prompt: str = "", + aspect_ratio: str = DEFAULT_ASPECT_RATIO, +) -> Dict[str, Any]: + """Build a uniform error response dict.""" + return { + "success": False, + "image": None, + "error": error, + "error_type": error_type, + "model": model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "provider": provider, + } diff --git a/agent/image_gen_registry.py b/agent/image_gen_registry.py new file mode 100644 index 0000000000..715133231c --- /dev/null +++ b/agent/image_gen_registry.py @@ -0,0 +1,120 @@ +""" +Image Generation Provider Registry +================================== + +Central map of registered providers. Populated by plugins at import-time via +``PluginContext.register_image_gen_provider()``; consumed by the +``image_generate`` tool to dispatch each call to the active backend. + +Active selection +---------------- +The active provider is chosen by ``image_gen.provider`` in ``config.yaml``. +If unset, :func:`get_active_provider` applies fallback logic: + +1. If exactly one provider is registered, use it. +2. Otherwise if a provider named ``fal`` is registered, use it (legacy + default — matches pre-plugin behavior). +3. Otherwise return ``None`` (the tool surfaces a helpful error pointing + the user at ``hermes tools``). +""" + +from __future__ import annotations + +import logging +import threading +from typing import Dict, List, Optional + +from agent.image_gen_provider import ImageGenProvider + +logger = logging.getLogger(__name__) + + +_providers: Dict[str, ImageGenProvider] = {} +_lock = threading.Lock() + + +def register_provider(provider: ImageGenProvider) -> None: + """Register an image generation provider. + + Re-registration (same ``name``) overwrites the previous entry and logs + a debug message — this makes hot-reload scenarios (tests, dev loops) + behave predictably. + """ + if not isinstance(provider, ImageGenProvider): + raise TypeError( + f"register_provider() expects an ImageGenProvider instance, " + f"got {type(provider).__name__}" + ) + name = provider.name + if not isinstance(name, str) or not name.strip(): + raise ValueError("Image gen provider .name must be a non-empty string") + with _lock: + existing = _providers.get(name) + _providers[name] = provider + if existing is not None: + logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__) + else: + logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__) + + +def list_providers() -> List[ImageGenProvider]: + """Return all registered providers, sorted by name.""" + with _lock: + items = list(_providers.values()) + return sorted(items, key=lambda p: p.name) + + +def get_provider(name: str) -> Optional[ImageGenProvider]: + """Return the provider registered under *name*, or None.""" + if not isinstance(name, str): + return None + with _lock: + return _providers.get(name.strip()) + + +def get_active_provider() -> Optional[ImageGenProvider]: + """Resolve the currently-active provider. + + Reads ``image_gen.provider`` from config.yaml; falls back per the + module docstring. + """ + configured: Optional[str] = None + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get("image_gen") if isinstance(cfg, dict) else None + if isinstance(section, dict): + raw = section.get("provider") + if isinstance(raw, str) and raw.strip(): + configured = raw.strip() + except Exception as exc: + logger.debug("Could not read image_gen.provider from config: %s", exc) + + with _lock: + snapshot = dict(_providers) + + if configured: + provider = snapshot.get(configured) + if provider is not None: + return provider + logger.debug( + "image_gen.provider='%s' configured but not registered; falling back", + configured, + ) + + # Fallback: single-provider case + if len(snapshot) == 1: + return next(iter(snapshot.values())) + + # Fallback: prefer legacy FAL for backward compat + if "fal" in snapshot: + return snapshot["fal"] + + return None + + +def _reset_for_tests() -> None: + """Clear the registry. **Test-only.**""" + with _lock: + _providers.clear() diff --git a/agent/insights.py b/agent/insights.py index 4dafb74876..70907b4f3d 100644 --- a/agent/insights.py +++ b/agent/insights.py @@ -124,6 +124,7 @@ class InsightsEngine: # Gather raw data sessions = self._get_sessions(cutoff, source) tool_usage = self._get_tool_usage(cutoff, source) + skill_usage = self._get_skill_usage(cutoff, source) message_stats = self._get_message_stats(cutoff, source) if not sessions: @@ -135,6 +136,15 @@ class InsightsEngine: "models": [], "platforms": [], "tools": [], + "skills": { + "summary": { + "total_skill_loads": 0, + "total_skill_edits": 0, + "total_skill_actions": 0, + "distinct_skills_used": 0, + }, + "top_skills": [], + }, "activity": {}, "top_sessions": [], } @@ -144,6 +154,7 @@ class InsightsEngine: models = self._compute_model_breakdown(sessions) platforms = self._compute_platform_breakdown(sessions) tools = self._compute_tool_breakdown(tool_usage) + skills = self._compute_skill_breakdown(skill_usage) activity = self._compute_activity_patterns(sessions) top_sessions = self._compute_top_sessions(sessions) @@ -156,6 +167,7 @@ class InsightsEngine: "models": models, "platforms": platforms, "tools": tools, + "skills": skills, "activity": activity, "top_sessions": top_sessions, } @@ -284,6 +296,82 @@ class InsightsEngine: for name, count in tool_counts.most_common() ] + def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]: + """Extract per-skill usage from assistant tool calls.""" + skill_counts: Dict[str, Dict[str, Any]] = {} + + if source: + cursor = self._conn.execute( + """SELECT m.tool_calls, m.timestamp + FROM messages m + JOIN sessions s ON s.id = m.session_id + WHERE s.started_at >= ? AND s.source = ? + AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""", + (cutoff, source), + ) + else: + cursor = self._conn.execute( + """SELECT m.tool_calls, m.timestamp + FROM messages m + JOIN sessions s ON s.id = m.session_id + WHERE s.started_at >= ? + AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""", + (cutoff,), + ) + + for row in cursor.fetchall(): + try: + calls = row["tool_calls"] + if isinstance(calls, str): + calls = json.loads(calls) + if not isinstance(calls, list): + continue + except (json.JSONDecodeError, TypeError): + continue + + timestamp = row["timestamp"] + for call in calls: + if not isinstance(call, dict): + continue + func = call.get("function", {}) + tool_name = func.get("name") + if tool_name not in {"skill_view", "skill_manage"}: + continue + + args = func.get("arguments") + if isinstance(args, str): + try: + args = json.loads(args) + except (json.JSONDecodeError, TypeError): + continue + if not isinstance(args, dict): + continue + + skill_name = args.get("name") + if not isinstance(skill_name, str) or not skill_name.strip(): + continue + + entry = skill_counts.setdefault( + skill_name, + { + "skill": skill_name, + "view_count": 0, + "manage_count": 0, + "last_used_at": None, + }, + ) + if tool_name == "skill_view": + entry["view_count"] += 1 + else: + entry["manage_count"] += 1 + + if timestamp is not None and ( + entry["last_used_at"] is None or timestamp > entry["last_used_at"] + ): + entry["last_used_at"] = timestamp + + return list(skill_counts.values()) + def _get_message_stats(self, cutoff: float, source: str = None) -> Dict: """Get aggregate message statistics.""" if source: @@ -475,6 +563,46 @@ class InsightsEngine: }) return result + def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]: + """Process per-skill usage into summary + ranked list.""" + total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0 + total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0 + total_skill_actions = total_skill_loads + total_skill_edits + + top_skills = [] + for skill in skill_usage: + total_count = skill["view_count"] + skill["manage_count"] + percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0 + top_skills.append({ + "skill": skill["skill"], + "view_count": skill["view_count"], + "manage_count": skill["manage_count"], + "total_count": total_count, + "percentage": percentage, + "last_used_at": skill.get("last_used_at"), + }) + + top_skills.sort( + key=lambda s: ( + s["total_count"], + s["view_count"], + s["manage_count"], + s["last_used_at"] or 0, + s["skill"], + ), + reverse=True, + ) + + return { + "summary": { + "total_skill_loads": total_skill_loads, + "total_skill_edits": total_skill_edits, + "total_skill_actions": total_skill_actions, + "distinct_skills_used": len(skill_usage), + }, + "top_skills": top_skills, + } + def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict: """Analyze activity patterns by day of week and hour.""" day_counts = Counter() # 0=Monday ... 6=Sunday @@ -670,6 +798,28 @@ class InsightsEngine: lines.append(f" ... and {len(report['tools']) - 15} more tools") lines.append("") + # Skill usage + skills = report.get("skills", {}) + top_skills = skills.get("top_skills", []) + if top_skills: + lines.append(" 🧠 Top Skills") + lines.append(" " + "─" * 56) + lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}") + for skill in top_skills[:10]: + last_used = "—" + if skill.get("last_used_at"): + last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d") + lines.append( + f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}" + ) + summary = skills.get("summary", {}) + lines.append( + f" Distinct skills: {summary.get('distinct_skills_used', 0)} " + f"Loads: {summary.get('total_skill_loads', 0):,} " + f"Edits: {summary.get('total_skill_edits', 0):,}" + ) + lines.append("") + # Activity patterns act = report.get("activity", {}) if act.get("by_day"): @@ -753,6 +903,18 @@ class InsightsEngine: lines.append(f" {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)") lines.append("") + skills = report.get("skills", {}) + if skills.get("top_skills"): + lines.append("**🧠 Top Skills:**") + for skill in skills["top_skills"][:5]: + suffix = "" + if skill.get("last_used_at"): + suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}" + lines.append( + f" {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}" + ) + lines.append("") + # Activity summary act = report.get("activity", {}) if act.get("busiest_day") and act.get("busiest_hour"): diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 81bac6c92f..152e536fdb 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -14,6 +14,8 @@ from urllib.parse import urlparse import requests import yaml +from utils import base_url_host_matches, base_url_hostname + from hermes_constants import OPENROUTER_MODELS_URL logger = logging.getLogger(__name__) @@ -23,7 +25,7 @@ logger = logging.getLogger(__name__) # are preserved so the full model name reaches cache lookups and server queries. _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", - "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek", + "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek", "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba", "qwen-oauth", "xiaomi", @@ -34,7 +36,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({ "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot", "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek", "ollama", - "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen", + "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen", "mimo", "xiaomi-mimo", "arcee-ai", "arceeai", "xai", "x-ai", "x.ai", "grok", @@ -116,7 +118,6 @@ DEFAULT_CONTEXT_LENGTHS = { "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) - "gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context "gpt-5.1-chat": 128000, # Chat variant has 128k context "gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k) "gpt-4.1": 1047576, @@ -169,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = { "Qwen/Qwen3.5-35B-A3B": 131072, "deepseek-ai/DeepSeek-V3.2": 65536, "moonshotai/Kimi-K2.5": 262144, + "moonshotai/Kimi-K2.6": 262144, "moonshotai/Kimi-K2-Thinking": 262144, "MiniMaxAI/MiniMax-M2.5": 204800, "XiaomiMiMo/MiMo-V2-Flash": 256000, @@ -211,8 +213,15 @@ def _normalize_base_url(base_url: str) -> str: return (base_url or "").strip().rstrip("/") +def _auth_headers(api_key: str = "") -> Dict[str, str]: + token = str(api_key or "").strip() + if not token: + return {} + return {"Authorization": f"Bearer {token}"} + + def _is_openrouter_base_url(base_url: str) -> bool: - return "openrouter.ai" in _normalize_base_url(base_url).lower() + return base_url_host_matches(base_url, "openrouter.ai") def _is_custom_endpoint(base_url: str) -> bool: @@ -228,6 +237,8 @@ _URL_TO_PROVIDER: Dict[str, str] = { "api.moonshot.ai": "kimi-coding", "api.moonshot.cn": "kimi-coding-cn", "api.kimi.com": "kimi-coding", + "api.stepfun.ai": "stepfun", + "api.stepfun.com": "stepfun", "api.arcee.ai": "arcee", "api.minimax": "minimax", "dashscope.aliyuncs.com": "alibaba", @@ -310,7 +321,7 @@ def is_local_endpoint(base_url: str) -> bool: return False -def detect_local_server_type(base_url: str) -> Optional[str]: +def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]: """Detect which local server is running at base_url by probing known endpoints. Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None. @@ -322,8 +333,10 @@ def detect_local_server_type(base_url: str) -> Optional[str]: if server_url.endswith("/v1"): server_url = server_url[:-3] + headers = _auth_headers(api_key) + try: - with httpx.Client(timeout=2.0) as client: + with httpx.Client(timeout=2.0, headers=headers) as client: # LM Studio exposes /api/v1/models — check first (most specific) try: r = client.get(f"{server_url}/api/v1/models") @@ -510,6 +523,59 @@ def fetch_endpoint_model_metadata( headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} last_error: Optional[Exception] = None + if is_local_endpoint(normalized): + try: + if detect_local_server_type(normalized, api_key=api_key) == "lm-studio": + server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized + response = requests.get( + server_url.rstrip("/") + "/api/v1/models", + headers=headers, + timeout=10, + ) + response.raise_for_status() + payload = response.json() + cache: Dict[str, Dict[str, Any]] = {} + for model in payload.get("models", []): + if not isinstance(model, dict): + continue + model_id = model.get("key") or model.get("id") + if not model_id: + continue + entry: Dict[str, Any] = {"name": model.get("name", model_id)} + + context_length = None + for inst in model.get("loaded_instances", []) or []: + if not isinstance(inst, dict): + continue + cfg = inst.get("config", {}) + ctx = cfg.get("context_length") if isinstance(cfg, dict) else None + if isinstance(ctx, int) and ctx > 0: + context_length = ctx + break + if context_length is None: + context_length = _extract_context_length(model) + if context_length is not None: + entry["context_length"] = context_length + + max_completion_tokens = _extract_max_completion_tokens(model) + if max_completion_tokens is not None: + entry["max_completion_tokens"] = max_completion_tokens + + pricing = _extract_pricing(model) + if pricing: + entry["pricing"] = pricing + + _add_model_aliases(cache, model_id, entry) + alt_id = model.get("id") + if isinstance(alt_id, str) and alt_id and alt_id != model_id: + _add_model_aliases(cache, alt_id, entry) + + _endpoint_model_metadata_cache[normalized] = cache + _endpoint_model_metadata_cache_time[normalized] = time.time() + return cache + except Exception as exc: + last_error = exc + for candidate in candidates: url = candidate.rstrip("/") + "/models" try: @@ -716,7 +782,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool: return False -def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]: +def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]: """Query an Ollama server for the model's context length. Returns the model's maximum context from GGUF metadata via ``/api/show``, @@ -734,14 +800,16 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]: server_url = server_url[:-3] try: - server_type = detect_local_server_type(base_url) + server_type = detect_local_server_type(base_url, api_key=api_key) except Exception: return None if server_type != "ollama": return None + headers = _auth_headers(api_key) + try: - with httpx.Client(timeout=3.0) as client: + with httpx.Client(timeout=3.0, headers=headers) as client: resp = client.post(f"{server_url}/api/show", json={"name": bare_model}) if resp.status_code != 200: return None @@ -769,7 +837,7 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]: return None -def _query_local_context_length(model: str, base_url: str) -> Optional[int]: +def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]: """Query a local server for the model's context length.""" import httpx @@ -782,13 +850,15 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]: if server_url.endswith("/v1"): server_url = server_url[:-3] + headers = _auth_headers(api_key) + try: - server_type = detect_local_server_type(base_url) + server_type = detect_local_server_type(base_url, api_key=api_key) except Exception: server_type = None try: - with httpx.Client(timeout=3.0) as client: + with httpx.Client(timeout=3.0, headers=headers) as client: # Ollama: /api/show returns model details with context info if server_type == "ollama": resp = client.post(f"{server_url}/api/show", json={"name": model}) @@ -999,7 +1069,7 @@ def get_model_context_length( if not _is_known_provider_base_url(base_url): # 3. Try querying local server directly if is_local_endpoint(base_url): - local_ctx = _query_local_context_length(model, base_url) + local_ctx = _query_local_context_length(model, base_url, api_key=api_key) if local_ctx and local_ctx > 0: save_context_length(model, base_url, local_ctx) return local_ctx @@ -1013,7 +1083,7 @@ def get_model_context_length( # 4. Anthropic /v1/models API (only for regular API keys, not OAuth) if provider == "anthropic" or ( - base_url and "api.anthropic.com" in base_url + base_url and base_url_hostname(base_url) == "api.anthropic.com" ): ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key) if ctx: @@ -1022,7 +1092,11 @@ def get_model_context_length( # 4b. AWS Bedrock — use static context length table. # Bedrock's ListFoundationModels doesn't expose context window sizes, # so we maintain a curated table in bedrock_adapter.py. - if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url): + if provider == "bedrock" or ( + base_url + and base_url_hostname(base_url).startswith("bedrock-runtime.") + and base_url_host_matches(base_url, "amazonaws.com") + ): try: from agent.bedrock_adapter import get_bedrock_context_length return get_bedrock_context_length(model) @@ -1069,7 +1143,7 @@ def get_model_context_length( # 9. Query local server as last resort if base_url and is_local_endpoint(base_url): - local_ctx = _query_local_context_length(model, base_url) + local_ctx = _query_local_context_length(model, base_url, api_key=api_key) if local_ctx and local_ctx > 0: save_context_length(model, base_url, local_ctx) return local_ctx diff --git a/agent/models_dev.py b/agent/models_dev.py index 3e5c911e7e..2f06a75d89 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "openai-codex": "openai", "zai": "zai", "kimi-coding": "kimi-for-coding", + "stepfun": "stepfun", "kimi-coding-cn": "kimi-for-coding", "minimax": "minimax", "minimax-cn": "minimax-cn", diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 3e042f65df..8e061f831b 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -152,7 +152,13 @@ MEMORY_GUIDANCE = ( "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO " "state to memory; use session_search to recall those from past transcripts. " "If you've discovered a new way to do something, solved a problem that could be " - "necessary later, save it as a skill with the skill tool." + "necessary later, save it as a skill with the skill tool.\n" + "Write memories as declarative facts, not instructions to yourself. " + "'User prefers concise responses' ✓ — 'Always respond concisely' ✗. " + "'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. " + "Imperative phrasing gets re-read as a directive in later sessions and can " + "cause repeated work or override the user's current request. Procedures and " + "workflows belong in skills, not memory." ) SESSION_SEARCH_GUIDANCE = ( @@ -344,7 +350,13 @@ PLATFORM_HINTS = { ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " - "renderable inside a terminal." + "renderable inside a terminal. " + "File delivery: there is no attachment channel — the user reads your " + "response directly in their terminal. Do NOT emit MEDIA:/path tags " + "(those are only intercepted on messaging platforms like Telegram, " + "Discord, Slack, etc.; on the CLI they render as literal text). " + "When referring to a file you created or changed, just state its " + "absolute path in plain text; the user can open it from there." ), "sms": ( "You are communicating via SMS. Keep responses concise and use plain text " @@ -613,12 +625,14 @@ def build_skills_system_prompt( or get_session_env("HERMES_SESSION_PLATFORM") or "" ) + disabled = get_disabled_skill_names() cache_key = ( str(skills_dir.resolve()), tuple(str(d) for d in external_dirs), tuple(sorted(str(t) for t in (available_tools or set()))), tuple(sorted(str(ts) for ts in (available_toolsets or set()))), _platform_hint, + tuple(sorted(disabled)), ) with _SKILLS_PROMPT_CACHE_LOCK: cached = _SKILLS_PROMPT_CACHE.get(cache_key) @@ -626,8 +640,6 @@ def build_skills_system_prompt( _SKILLS_PROMPT_CACHE.move_to_end(cache_key) return cached - disabled = get_disabled_skill_names() - # ── Layer 2: disk snapshot ──────────────────────────────────────── snapshot = _load_skills_snapshot(skills_dir) diff --git a/agent/redact.py b/agent/redact.py index af3b7bb93c..3679b73236 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -13,6 +13,48 @@ import re logger = logging.getLogger(__name__) +# Sensitive query-string parameter names (case-insensitive exact match). +# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match +# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes). +_SENSITIVE_QUERY_PARAMS = frozenset({ + "access_token", + "refresh_token", + "id_token", + "token", + "api_key", + "apikey", + "client_secret", + "password", + "auth", + "jwt", + "session", + "secret", + "key", + "code", # OAuth authorization codes + "signature", # pre-signed URL signatures + "x-amz-signature", +}) + +# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match). +# Exact match, NOT substring — "token_count" and "session_id" must NOT match. +# Ported from nearai/ironclaw#2529. +_SENSITIVE_BODY_KEYS = frozenset({ + "access_token", + "refresh_token", + "id_token", + "token", + "api_key", + "apikey", + "client_secret", + "password", + "auth", + "jwt", + "secret", + "private_key", + "authorization", + "key", +}) + # Snapshot at import time so runtime env mutations (e.g. LLM-generated # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session. _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off") @@ -108,6 +150,30 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>") # Negative lookahead prevents matching hex strings or identifiers _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])") +# URLs containing query strings — matches `scheme://...?...[# or end]`. +# Used to scan text for URLs whose query params may contain secrets. +# Ported from nearai/ironclaw#2529. +_URL_WITH_QUERY_RE = re.compile( + r"(https?|wss?|ftp)://" # scheme + r"([^\s/?#]+)" # authority (may include userinfo) + r"([^\s?#]*)" # path + r"\?([^\s#]+)" # query (required) + r"(#\S*)?", # optional fragment +) + +# URLs containing userinfo — `scheme://user:password@host` for ANY scheme +# (not just DB protocols already covered by _DB_CONNSTR_RE above). +# Catches things like `https://user:token@api.example.com/v1/foo`. +_URL_USERINFO_RE = re.compile( + r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@", +) + +# Form-urlencoded body detection: conservative — only applies when the entire +# text looks like a query string (k=v&k=v pattern with no newlines). +_FORM_BODY_RE = re.compile( + r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$" +) + # Compile known prefix patterns into one alternation _PREFIX_RE = re.compile( r"(? str: return f"{token[:6]}...{token[-4:]}" +def _redact_query_string(query: str) -> str: + """Redact sensitive parameter values in a URL query string. + + Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values + replaced with `***`. Non-sensitive keys pass through unchanged. + Empty or malformed pairs are preserved as-is. + """ + if not query: + return query + parts = [] + for pair in query.split("&"): + if "=" not in pair: + parts.append(pair) + continue + key, _, value = pair.partition("=") + if key.lower() in _SENSITIVE_QUERY_PARAMS: + parts.append(f"{key}=***") + else: + parts.append(pair) + return "&".join(parts) + + +def _redact_url_query_params(text: str) -> str: + """Scan text for URLs with query strings and redact sensitive params. + + Catches opaque tokens that don't match vendor prefix regexes, e.g. + `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`. + """ + def _sub(m: re.Match) -> str: + scheme = m.group(1) + authority = m.group(2) + path = m.group(3) + query = _redact_query_string(m.group(4)) + fragment = m.group(5) or "" + return f"{scheme}://{authority}{path}?{query}{fragment}" + return _URL_WITH_QUERY_RE.sub(_sub, text) + + +def _redact_url_userinfo(text: str) -> str: + """Strip `user:password@` from HTTP/WS/FTP URLs. + + DB protocols (postgres, mysql, mongodb, redis, amqp) are handled + separately by `_DB_CONNSTR_RE`. + """ + return _URL_USERINFO_RE.sub( + lambda m: f"{m.group(1)}://{m.group(2)}:***@", + text, + ) + + +def _redact_form_body(text: str) -> str: + """Redact sensitive values in a form-urlencoded body. + + Only applies when the entire input looks like a pure form body + (k=v&k=v with no newlines, no other text). Single-line non-form + text passes through unchanged. This is a conservative pass — the + `_redact_url_query_params` function handles embedded query strings. + """ + if not text or "\n" in text or "&" not in text: + return text + # The body-body form check is strict: only trigger on clean k=v&k=v. + if not _FORM_BODY_RE.match(text.strip()): + return text + return _redact_query_string(text.strip()) + + def redact_sensitive_text(text: str) -> str: """Apply all redaction patterns to a block of text. @@ -173,6 +305,16 @@ def redact_sensitive_text(text: str) -> str: # JWT tokens (eyJ... — base64-encoded JSON headers) text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text) + # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes. + # DB schemes are handled above by _DB_CONNSTR_RE. + text = _redact_url_userinfo(text) + + # URL query params containing opaque tokens (?access_token=…&code=…) + text = _redact_url_query_params(text) + + # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs). + text = _redact_form_body(text) + # Discord user/role mentions (<@snowflake_id>) text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text) diff --git a/agent/shell_hooks.py b/agent/shell_hooks.py new file mode 100644 index 0000000000..b579ad5b87 --- /dev/null +++ b/agent/shell_hooks.py @@ -0,0 +1,831 @@ +""" +Shell-script hooks bridge. + +Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for +consent on first use of each ``(event, command)`` pair, and registers +callbacks on the existing plugin hook manager so every existing +``invoke_hook()`` site dispatches to the configured shell scripts — with +zero changes to call sites. + +Design notes +------------ +* Python plugins and shell hooks compose naturally: both flow through + :func:`hermes_cli.plugins.invoke_hook` and its aggregators. Python + plugins are registered first (via ``discover_and_load()``) so their + block decisions win ties over shell-hook blocks. +* Subprocess execution uses ``shlex.split(os.path.expanduser(command))`` + with ``shell=False`` — no shell injection footguns. Users that need + pipes/redirection wrap their logic in a script. +* First-use consent is gated by the allowlist under + ``~/.hermes/shell-hooks-allowlist.json``. Non-TTY callers must pass + ``accept_hooks=True`` (resolved from ``--accept-hooks``, + ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config) + for registration to succeed without a prompt. +* Registration is idempotent — safe to invoke from both the CLI entry + point (``hermes_cli/main.py``) and the gateway entry point + (``gateway/run.py``). + +Wire protocol +------------- +**stdin** (JSON, piped to the script):: + + { + "hook_event_name": "pre_tool_call", + "tool_name": "terminal", + "tool_input": {"command": "rm -rf /"}, + "session_id": "sess_abc123", + "cwd": "/home/user/project", + "extra": {...} # event-specific kwargs + } + +**stdout** (JSON, optional — anything else is ignored):: + + # Block a pre_tool_call (either shape accepted; normalised internally): + {"decision": "block", "reason": "Forbidden command"} # Claude-Code-style + {"action": "block", "message": "Forbidden command"} # Hermes-canonical + + # Inject context for pre_llm_call: + {"context": "Today is Friday"} + + # Silent no-op: + +""" + +from __future__ import annotations + +import difflib +import json +import logging +import os +import re +import shlex +import subprocess +import sys +import tempfile +import threading +import time +from contextlib import contextmanager +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple + +try: + import fcntl # POSIX only; Windows falls back to best-effort without flock. +except ImportError: # pragma: no cover + fcntl = None # type: ignore[assignment] + +from hermes_constants import get_hermes_home + +logger = logging.getLogger(__name__) + +DEFAULT_TIMEOUT_SECONDS = 60 +MAX_TIMEOUT_SECONDS = 300 +ALLOWLIST_FILENAME = "shell-hooks-allowlist.json" + +# (event, matcher, command) triples that have been wired to the plugin +# manager in the current process. Matcher is part of the key because +# the same script can legitimately register for different matchers under +# the same event (e.g. one entry per tool the user wants to gate). +# Second registration attempts for the exact same triple become no-ops +# so the CLI and gateway can both call register_from_config() safely. +_registered: Set[Tuple[str, Optional[str], str]] = set() +_registered_lock = threading.Lock() + +# Intra-process lock for allowlist read-modify-write on platforms that +# lack ``fcntl`` (non-POSIX). Kept separate from ``_registered_lock`` +# because ``register_from_config`` already holds ``_registered_lock`` when +# it triggers ``_record_approval`` — reusing it here would self-deadlock +# (``threading.Lock`` is non-reentrant). POSIX callers use the sibling +# ``.lock`` file via ``fcntl.flock`` and bypass this. +_allowlist_write_lock = threading.Lock() + + +@dataclass +class ShellHookSpec: + """Parsed and validated representation of a single ``hooks:`` entry.""" + + event: str + command: str + matcher: Optional[str] = None + timeout: int = DEFAULT_TIMEOUT_SECONDS + compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False) + + def __post_init__(self) -> None: + # Strip whitespace introduced by YAML quirks (e.g. multi-line string + # folding) — a matcher of " terminal" would otherwise silently fail + # to match "terminal" without any diagnostic. + if isinstance(self.matcher, str): + stripped = self.matcher.strip() + self.matcher = stripped if stripped else None + if self.matcher: + try: + self.compiled_matcher = re.compile(self.matcher) + except re.error as exc: + logger.warning( + "shell hook matcher %r is invalid (%s) — treating as " + "literal equality", self.matcher, exc, + ) + self.compiled_matcher = None + + def matches_tool(self, tool_name: Optional[str]) -> bool: + if not self.matcher: + return True + if tool_name is None: + return False + if self.compiled_matcher is not None: + return self.compiled_matcher.fullmatch(tool_name) is not None + # compiled_matcher is None only when the regex failed to compile, + # in which case we already warned and fall back to literal equality. + return tool_name == self.matcher + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def register_from_config( + cfg: Optional[Dict[str, Any]], + *, + accept_hooks: bool = False, +) -> List[ShellHookSpec]: + """Register every configured shell hook on the plugin manager. + + ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config`` + output). The ``hooks:`` key is read out of it. Missing, empty, or + non-dict ``hooks`` is treated as zero configured hooks. + + ``accept_hooks=True`` skips the TTY consent prompt — the caller is + promising that the user has opted in via a flag, env var, or config + setting. ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are + also honored inside this function so either CLI or gateway call sites + pick them up. + + Returns the list of :class:`ShellHookSpec` entries that ended up wired + up on the plugin manager. Skipped entries (unknown events, malformed, + not allowlisted, already registered) are logged but not returned. + """ + if not isinstance(cfg, dict): + return [] + + effective_accept = _resolve_effective_accept(cfg, accept_hooks) + + specs = _parse_hooks_block(cfg.get("hooks")) + if not specs: + return [] + + registered: List[ShellHookSpec] = [] + + # Import lazily — avoids circular imports at module-load time. + from hermes_cli.plugins import get_plugin_manager + + manager = get_plugin_manager() + + # Idempotence + allowlist read happen under the lock; the TTY + # prompt runs outside so other threads aren't parked on a blocking + # input(). Mutation re-takes the lock with a defensive idempotence + # re-check in case two callers ever race through the prompt. + for spec in specs: + key = (spec.event, spec.matcher, spec.command) + with _registered_lock: + if key in _registered: + continue + already_allowlisted = _is_allowlisted(spec.event, spec.command) + + if not already_allowlisted: + if not _prompt_and_record( + spec.event, spec.command, accept_hooks=effective_accept, + ): + logger.warning( + "shell hook for %s (%s) not allowlisted — skipped. " + "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / " + "hooks_auto_accept: true, or approve at the TTY " + "prompt next run.", + spec.event, spec.command, + ) + continue + + with _registered_lock: + if key in _registered: + continue + manager._hooks.setdefault(spec.event, []).append(_make_callback(spec)) + _registered.add(key) + registered.append(spec) + logger.info( + "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)", + spec.event, spec.command, spec.matcher, spec.timeout, + ) + + return registered + + +def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]: + """Return the parsed ``ShellHookSpec`` entries from config without + registering anything. Used by ``hermes hooks list`` and ``doctor``.""" + if not isinstance(cfg, dict): + return [] + return _parse_hooks_block(cfg.get("hooks")) + + +def reset_for_tests() -> None: + """Clear the idempotence set. Test-only helper.""" + with _registered_lock: + _registered.clear() + + +# --------------------------------------------------------------------------- +# Config parsing +# --------------------------------------------------------------------------- + +def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]: + """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``. + + Malformed entries warn-and-skip — we never raise from config parsing + because a broken hook must not crash the agent. + """ + from hermes_cli.plugins import VALID_HOOKS + + if not isinstance(hooks_cfg, dict): + return [] + + specs: List[ShellHookSpec] = [] + + for event_name, entries in hooks_cfg.items(): + if event_name not in VALID_HOOKS: + suggestion = difflib.get_close_matches( + str(event_name), VALID_HOOKS, n=1, cutoff=0.6, + ) + if suggestion: + logger.warning( + "unknown hook event %r in hooks: config — did you mean %r?", + event_name, suggestion[0], + ) + else: + logger.warning( + "unknown hook event %r in hooks: config (valid: %s)", + event_name, ", ".join(sorted(VALID_HOOKS)), + ) + continue + + if entries is None: + continue + + if not isinstance(entries, list): + logger.warning( + "hooks.%s must be a list of hook definitions; got %s", + event_name, type(entries).__name__, + ) + continue + + for i, raw in enumerate(entries): + spec = _parse_single_entry(event_name, i, raw) + if spec is not None: + specs.append(spec) + + return specs + + +def _parse_single_entry( + event: str, index: int, raw: Any, +) -> Optional[ShellHookSpec]: + if not isinstance(raw, dict): + logger.warning( + "hooks.%s[%d] must be a mapping with a 'command' key; got %s", + event, index, type(raw).__name__, + ) + return None + + command = raw.get("command") + if not isinstance(command, str) or not command.strip(): + logger.warning( + "hooks.%s[%d] is missing a non-empty 'command' field", + event, index, + ) + return None + + matcher = raw.get("matcher") + if matcher is not None and not isinstance(matcher, str): + logger.warning( + "hooks.%s[%d].matcher must be a string regex; ignoring", + event, index, + ) + matcher = None + + if matcher is not None and event not in ("pre_tool_call", "post_tool_call"): + logger.warning( + "hooks.%s[%d].matcher=%r will be ignored at runtime — the " + "matcher field is only honored for pre_tool_call / " + "post_tool_call. The hook will fire on every %s event.", + event, index, matcher, event, + ) + matcher = None + + timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS) + try: + timeout = int(timeout_raw) + except (TypeError, ValueError): + logger.warning( + "hooks.%s[%d].timeout must be an int (got %r); using default %ds", + event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS, + ) + timeout = DEFAULT_TIMEOUT_SECONDS + + if timeout < 1: + logger.warning( + "hooks.%s[%d].timeout must be >=1; using default %ds", + event, index, DEFAULT_TIMEOUT_SECONDS, + ) + timeout = DEFAULT_TIMEOUT_SECONDS + + if timeout > MAX_TIMEOUT_SECONDS: + logger.warning( + "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping", + event, index, timeout, MAX_TIMEOUT_SECONDS, + ) + timeout = MAX_TIMEOUT_SECONDS + + return ShellHookSpec( + event=event, + command=command.strip(), + matcher=matcher, + timeout=timeout, + ) + + +# --------------------------------------------------------------------------- +# Subprocess callback +# --------------------------------------------------------------------------- + +_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"} + + +def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]: + """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin. + + Returns a diagnostic dict with the same keys for every outcome + (``returncode``, ``stdout``, ``stderr``, ``timed_out``, + ``elapsed_seconds``, ``error``). This is the single place the + subprocess is actually invoked — both the live callback path + (:func:`_make_callback`) and the CLI test helper (:func:`run_once`) + go through it. + """ + result: Dict[str, Any] = { + "returncode": None, + "stdout": "", + "stderr": "", + "timed_out": False, + "elapsed_seconds": 0.0, + "error": None, + } + try: + argv = shlex.split(os.path.expanduser(spec.command)) + except ValueError as exc: + result["error"] = f"command {spec.command!r} cannot be parsed: {exc}" + return result + if not argv: + result["error"] = "empty command" + return result + + t0 = time.monotonic() + try: + proc = subprocess.run( + argv, + input=stdin_json, + capture_output=True, + timeout=spec.timeout, + text=True, + shell=False, + ) + except subprocess.TimeoutExpired: + result["timed_out"] = True + result["elapsed_seconds"] = round(time.monotonic() - t0, 3) + return result + except FileNotFoundError: + result["error"] = "command not found" + return result + except PermissionError: + result["error"] = "command not executable" + return result + except Exception as exc: # pragma: no cover — defensive + result["error"] = str(exc) + return result + + result["returncode"] = proc.returncode + result["stdout"] = proc.stdout or "" + result["stderr"] = proc.stderr or "" + result["elapsed_seconds"] = round(time.monotonic() - t0, 3) + return result + + +def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]: + """Build the closure that ``invoke_hook()`` will call per firing.""" + + def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]: + # Matcher gate — only meaningful for tool-scoped events. + if spec.event in ("pre_tool_call", "post_tool_call"): + if not spec.matches_tool(kwargs.get("tool_name")): + return None + + r = _spawn(spec, _serialize_payload(spec.event, kwargs)) + + if r["error"]: + logger.warning( + "shell hook failed (event=%s command=%s): %s", + spec.event, spec.command, r["error"], + ) + return None + if r["timed_out"]: + logger.warning( + "shell hook timed out after %.2fs (event=%s command=%s)", + r["elapsed_seconds"], spec.event, spec.command, + ) + return None + + stderr = r["stderr"].strip() + if stderr: + logger.debug( + "shell hook stderr (event=%s command=%s): %s", + spec.event, spec.command, stderr[:400], + ) + # Non-zero exits: log but still parse stdout so scripts that + # signal failure via exit code can also return a block directive. + if r["returncode"] != 0: + logger.warning( + "shell hook exited %d (event=%s command=%s); stderr=%s", + r["returncode"], spec.event, spec.command, stderr[:400], + ) + return _parse_response(spec.event, r["stdout"]) + + _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]" + _callback.__qualname__ = _callback.__name__ + return _callback + + +def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str: + """Render the stdin JSON payload. Unserialisable values are + stringified via ``default=str`` rather than dropped.""" + extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS} + try: + cwd = str(Path.cwd()) + except OSError: + cwd = "" + payload = { + "hook_event_name": event, + "tool_name": kwargs.get("tool_name"), + "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None, + "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "", + "cwd": cwd, + "extra": extras, + } + return json.dumps(payload, ensure_ascii=False, default=str) + + +def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]: + """Translate stdout JSON into a Hermes wire-shape dict. + + For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block", + "reason": "..."}`` payload is translated into the canonical Hermes + ``{"action": "block", "message": "..."}`` shape expected by + :func:`hermes_cli.plugins.get_pre_tool_call_block_message`. This is + the single most important correctness invariant in this module — + skipping the translation silently breaks every ``pre_tool_call`` + block directive. + + For ``pre_llm_call``, ``{"context": "..."}`` is passed through + unchanged to match the existing plugin-hook contract. + + Anything else returns ``None``. + """ + stdout = (stdout or "").strip() + if not stdout: + return None + + try: + data = json.loads(stdout) + except json.JSONDecodeError: + logger.warning( + "shell hook stdout was not valid JSON (event=%s): %s", + event, stdout[:200], + ) + return None + + if not isinstance(data, dict): + return None + + if event == "pre_tool_call": + if data.get("action") == "block": + message = data.get("message") or data.get("reason") or "" + if isinstance(message, str) and message: + return {"action": "block", "message": message} + if data.get("decision") == "block": + message = data.get("reason") or data.get("message") or "" + if isinstance(message, str) and message: + return {"action": "block", "message": message} + return None + + context = data.get("context") + if isinstance(context, str) and context.strip(): + return {"context": context} + + return None + + +# --------------------------------------------------------------------------- +# Allowlist / consent +# --------------------------------------------------------------------------- + +def allowlist_path() -> Path: + """Path to the per-user shell-hook allowlist file.""" + return get_hermes_home() / ALLOWLIST_FILENAME + + +def load_allowlist() -> Dict[str, Any]: + """Return the parsed allowlist, or an empty skeleton if absent.""" + try: + raw = json.loads(allowlist_path().read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return {"approvals": []} + if not isinstance(raw, dict): + return {"approvals": []} + approvals = raw.get("approvals") + if not isinstance(approvals, list): + raw["approvals"] = [] + return raw + + +def save_allowlist(data: Dict[str, Any]) -> None: + """Atomically persist the allowlist via per-process ``mkstemp`` + + ``os.replace``. Cross-process read-modify-write races are handled + by :func:`_locked_update_approvals` (``fcntl.flock``). On OSError + the failure is logged; the in-process hook still registers but + the approval won't survive across runs.""" + p = allowlist_path() + try: + p.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent), + ) + try: + with os.fdopen(fd, "w") as fh: + fh.write(json.dumps(data, indent=2, sort_keys=True)) + os.replace(tmp_path, p) + except Exception: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + except OSError as exc: + logger.warning( + "Failed to persist shell hook allowlist to %s: %s. " + "The approval is in-memory for this run, but the next " + "startup will re-prompt (or skip registration on non-TTY " + "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).", + p, exc, + ) + + +def _is_allowlisted(event: str, command: str) -> bool: + data = load_allowlist() + return any( + isinstance(e, dict) + and e.get("event") == event + and e.get("command") == command + for e in data.get("approvals", []) + ) + + +@contextmanager +def _locked_update_approvals() -> Iterator[Dict[str, Any]]: + """Serialise read-modify-write on the allowlist across processes. + + Holds an exclusive ``flock`` on a sibling lock file for the duration + of the update so concurrent ``_record_approval``/``revoke`` callers + cannot clobber each other's changes (the race Codex reproduced with + 20–50 simultaneous writers). Falls back to an in-process lock on + platforms without ``fcntl``. + """ + p = allowlist_path() + p.parent.mkdir(parents=True, exist_ok=True) + lock_path = p.with_suffix(p.suffix + ".lock") + + if fcntl is None: # pragma: no cover — non-POSIX fallback + with _allowlist_write_lock: + data = load_allowlist() + yield data + save_allowlist(data) + return + + with open(lock_path, "a+") as lock_fh: + fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX) + try: + data = load_allowlist() + yield data + save_allowlist(data) + finally: + fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN) + + +def _prompt_and_record( + event: str, command: str, *, accept_hooks: bool, +) -> bool: + """Decide whether to approve an unseen ``(event, command)`` pair. + Returns ``True`` iff the approval was granted and recorded. + """ + if accept_hooks: + _record_approval(event, command) + logger.info( + "shell hook auto-approved via --accept-hooks / env / config: " + "%s -> %s", event, command, + ) + return True + + if not sys.stdin.isatty(): + return False + + print( + f"\n⚠ Hermes is about to register a shell hook that will run a\n" + f" command on your behalf.\n\n" + f" Event: {event}\n" + f" Command: {command}\n\n" + f" Commands run with your full user credentials. Only approve\n" + f" commands you trust." + ) + try: + answer = input("Allow this hook to run? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print() # keep the terminal tidy after ^C + return False + + if answer in ("y", "yes"): + _record_approval(event, command) + return True + + return False + + +def _record_approval(event: str, command: str) -> None: + entry = { + "event": event, + "command": command, + "approved_at": _utc_now_iso(), + "script_mtime_at_approval": script_mtime_iso(command), + } + with _locked_update_approvals() as data: + data["approvals"] = [ + e for e in data.get("approvals", []) + if not ( + isinstance(e, dict) + and e.get("event") == event + and e.get("command") == command + ) + ] + [entry] + + +def _utc_now_iso() -> str: + return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z") + + +def revoke(command: str) -> int: + """Remove every allowlist entry matching ``command``. + + Returns the number of entries removed. Does not unregister any + callbacks that are already live on the plugin manager in the current + process — restart the CLI / gateway to drop them. + """ + with _locked_update_approvals() as data: + before = len(data.get("approvals", [])) + data["approvals"] = [ + e for e in data.get("approvals", []) + if not (isinstance(e, dict) and e.get("command") == command) + ] + after = len(data["approvals"]) + return before - after + + +_SCRIPT_EXTENSIONS: Tuple[str, ...] = ( + ".sh", ".bash", ".zsh", ".fish", + ".py", ".pyw", + ".rb", ".pl", ".lua", + ".js", ".mjs", ".cjs", ".ts", +) + + +def _command_script_path(command: str) -> str: + """Return the script path from ``command`` for doctor / drift checks. + + Prefers a token ending in a known script extension, then a token + containing ``/`` or leading ``~``, then the first token. Handles + ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the + common bare-path form. + """ + try: + parts = shlex.split(command) + except ValueError: + return command + if not parts: + return command + for part in parts: + if part.lower().endswith(_SCRIPT_EXTENSIONS): + return part + for part in parts: + if "/" in part or part.startswith("~"): + return part + return parts[0] + + +# --------------------------------------------------------------------------- +# Helpers for accept-hooks resolution +# --------------------------------------------------------------------------- + +def _resolve_effective_accept( + cfg: Dict[str, Any], accept_hooks_arg: bool, +) -> bool: + """Combine all three opt-in channels into a single boolean. + + Precedence (any truthy source flips us on): + 1. ``--accept-hooks`` flag (CLI) / explicit argument + 2. ``HERMES_ACCEPT_HOOKS`` env var + 3. ``hooks_auto_accept: true`` in ``cli-config.yaml`` + """ + if accept_hooks_arg: + return True + env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower() + if env in ("1", "true", "yes", "on"): + return True + cfg_val = cfg.get("hooks_auto_accept", False) + return bool(cfg_val) + + +# --------------------------------------------------------------------------- +# Introspection (used by `hermes hooks` CLI) +# --------------------------------------------------------------------------- + +def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]: + """Return the allowlist record for this pair, if any.""" + for e in load_allowlist().get("approvals", []): + if ( + isinstance(e, dict) + and e.get("event") == event + and e.get("command") == command + ): + return e + return None + + +def script_mtime_iso(command: str) -> Optional[str]: + """ISO-8601 mtime of the resolved script path, or ``None`` if the + script is missing.""" + path = _command_script_path(command) + if not path: + return None + try: + expanded = os.path.expanduser(path) + return datetime.fromtimestamp( + os.path.getmtime(expanded), tz=timezone.utc, + ).isoformat().replace("+00:00", "Z") + except OSError: + return None + + +def script_is_executable(command: str) -> bool: + """Return ``True`` iff ``command`` is runnable as configured. + + For a bare invocation (``/path/hook.sh``) the script itself must be + executable. For interpreter-prefixed commands (``python3 + /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has + to be readable — the interpreter doesn't care about the ``X_OK`` + bit. Mirrors what ``_spawn`` would actually do at runtime.""" + path = _command_script_path(command) + if not path: + return False + expanded = os.path.expanduser(path) + if not os.path.isfile(expanded): + return False + try: + argv = shlex.split(command) + except ValueError: + return False + is_bare_invocation = bool(argv) and argv[0] == path + required = os.X_OK if is_bare_invocation else os.R_OK + return os.access(expanded, required) + + +def run_once( + spec: ShellHookSpec, kwargs: Dict[str, Any], +) -> Dict[str, Any]: + """Fire a single shell-hook invocation with a synthetic payload. + Used by ``hermes hooks test`` and ``hermes hooks doctor``. + + ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook` + would pass at runtime. It is routed through :func:`_serialize_payload` + so the synthetic stdin exactly matches what a real hook firing would + produce — otherwise scripts tested via ``hermes hooks test`` could + diverge silently from production behaviour. + + Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field + holding the canonical Hermes-wire-shape response.""" + stdin_json = _serialize_payload(spec.event, kwargs) + result = _spawn(spec, stdin_json) + result["parsed"] = _parse_response(spec.event, result["stdout"]) + return result diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 280105daca..a4345ca8c4 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like import json import logging import re +import subprocess from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional @@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+") _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]") _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}") +# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. +# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are +# left as-is so the user can debug them. +_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") + +# Matches inline shell snippets like: !`date +%Y-%m-%d` +# Non-greedy, single-line only — no newlines inside the backticks. +_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") + +# Cap inline-shell output so a runaway command can't blow out the context. +_INLINE_SHELL_MAX_OUTPUT = 4000 + + +def _load_skills_config() -> dict: + """Load the ``skills`` section of config.yaml (best-effort).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + skills_cfg = cfg.get("skills") + if isinstance(skills_cfg, dict): + return skills_cfg + except Exception: + logger.debug("Could not read skills config", exc_info=True) + return {} + + +def _substitute_template_vars( + content: str, + skill_dir: Path | None, + session_id: str | None, +) -> str: + """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. + + Only substitutes tokens for which a concrete value is available — + unresolved tokens are left in place so the author can spot them. + """ + if not content: + return content + + skill_dir_str = str(skill_dir) if skill_dir else None + + def _replace(match: re.Match) -> str: + token = match.group(1) + if token == "HERMES_SKILL_DIR" and skill_dir_str: + return skill_dir_str + if token == "HERMES_SESSION_ID" and session_id: + return str(session_id) + return match.group(0) + + return _SKILL_TEMPLATE_RE.sub(_replace, content) + + +def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: + """Execute a single inline-shell snippet and return its stdout (trimmed). + + Failures return a short ``[inline-shell error: ...]`` marker instead of + raising, so one bad snippet can't wreck the whole skill message. + """ + try: + completed = subprocess.run( + ["bash", "-c", command], + cwd=str(cwd) if cwd else None, + capture_output=True, + text=True, + timeout=max(1, int(timeout)), + check=False, + ) + except subprocess.TimeoutExpired: + return f"[inline-shell timeout after {timeout}s: {command}]" + except FileNotFoundError: + return f"[inline-shell error: bash not found]" + except Exception as exc: + return f"[inline-shell error: {exc}]" + + output = (completed.stdout or "").rstrip("\n") + if not output and completed.stderr: + output = completed.stderr.rstrip("\n") + if len(output) > _INLINE_SHELL_MAX_OUTPUT: + output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]" + return output + + +def _expand_inline_shell( + content: str, + skill_dir: Path | None, + timeout: int, +) -> str: + """Replace every !`cmd` snippet in ``content`` with its stdout. + + Runs each snippet with the skill directory as CWD so relative paths in + the snippet work the way the author expects. + """ + if "!`" not in content: + return content + + def _replace(match: re.Match) -> str: + cmd = match.group(1).strip() + if not cmd: + return "" + return _run_inline_shell(cmd, skill_dir, timeout) + + return _INLINE_SHELL_RE.sub(_replace, content) + def build_plan_path( user_instruction: str = "", @@ -133,14 +238,36 @@ def _build_skill_message( activation_note: str, user_instruction: str = "", runtime_note: str = "", + session_id: str | None = None, ) -> str: """Format a loaded skill into a user/system message payload.""" from tools.skills_tool import SKILLS_DIR content = str(loaded_skill.get("content") or "") + # ── Template substitution and inline-shell expansion ── + # Done before anything else so downstream blocks (setup notes, + # supporting-file hints) see the expanded content. + skills_cfg = _load_skills_config() + if skills_cfg.get("template_vars", True): + content = _substitute_template_vars(content, skill_dir, session_id) + if skills_cfg.get("inline_shell", False): + timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10) + content = _expand_inline_shell(content, skill_dir, timeout) + parts = [activation_note, "", content.strip()] + # ── Inject the absolute skill directory so the agent can reference + # bundled scripts without an extra skill_view() round-trip. ── + if skill_dir: + parts.append("") + parts.append(f"[Skill directory: {skill_dir}]") + parts.append( + "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, " + "`templates/config.yaml`) against that directory, then run them " + "with the terminal tool using the absolute path." + ) + # ── Inject resolved skill config values ── _inject_skill_config(loaded_skill, parts) @@ -188,11 +315,13 @@ def _build_skill_message( # Skill is from an external dir — use the skill name instead skill_view_target = skill_dir.name parts.append("") - parts.append("[This skill has supporting files you can load with the skill_view tool:]") + parts.append("[This skill has supporting files:]") for sf in supporting: - parts.append(f"- {sf}") + parts.append(f"- {sf} -> {skill_dir / sf}") parts.append( - f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="")' + f'\nLoad any of these with skill_view(name="{skill_view_target}", ' + f'file_path=""), or run scripts directly by absolute path ' + f"(e.g. `node {skill_dir}/scripts/foo.js`)." ) if user_instruction: @@ -332,6 +461,7 @@ def build_skill_invocation_message( activation_note, user_instruction=user_instruction, runtime_note=runtime_note, + session_id=task_id, ) @@ -370,6 +500,7 @@ def build_preloaded_skills_prompt( loaded_skill, skill_dir, activation_note, + session_id=task_id, ) ) loaded_names.append(skill_name) diff --git a/agent/smart_model_routing.py b/agent/smart_model_routing.py deleted file mode 100644 index 6d482be270..0000000000 --- a/agent/smart_model_routing.py +++ /dev/null @@ -1,195 +0,0 @@ -"""Helpers for optional cheap-vs-strong model routing.""" - -from __future__ import annotations - -import os -import re -from typing import Any, Dict, Optional - -from utils import is_truthy_value - -_COMPLEX_KEYWORDS = { - "debug", - "debugging", - "implement", - "implementation", - "refactor", - "patch", - "traceback", - "stacktrace", - "exception", - "error", - "analyze", - "analysis", - "investigate", - "architecture", - "design", - "compare", - "benchmark", - "optimize", - "optimise", - "review", - "terminal", - "shell", - "tool", - "tools", - "pytest", - "test", - "tests", - "plan", - "planning", - "delegate", - "subagent", - "cron", - "docker", - "kubernetes", -} - -_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE) - - -def _coerce_bool(value: Any, default: bool = False) -> bool: - return is_truthy_value(value, default=default) - - -def _coerce_int(value: Any, default: int) -> int: - try: - return int(value) - except (TypeError, ValueError): - return default - - -def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: - """Return the configured cheap-model route when a message looks simple. - - Conservative by design: if the message has signs of code/tool/debugging/ - long-form work, keep the primary model. - """ - cfg = routing_config or {} - if not _coerce_bool(cfg.get("enabled"), False): - return None - - cheap_model = cfg.get("cheap_model") or {} - if not isinstance(cheap_model, dict): - return None - provider = str(cheap_model.get("provider") or "").strip().lower() - model = str(cheap_model.get("model") or "").strip() - if not provider or not model: - return None - - text = (user_message or "").strip() - if not text: - return None - - max_chars = _coerce_int(cfg.get("max_simple_chars"), 160) - max_words = _coerce_int(cfg.get("max_simple_words"), 28) - - if len(text) > max_chars: - return None - if len(text.split()) > max_words: - return None - if text.count("\n") > 1: - return None - if "```" in text or "`" in text: - return None - if _URL_RE.search(text): - return None - - lowered = text.lower() - words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()} - if words & _COMPLEX_KEYWORDS: - return None - - route = dict(cheap_model) - route["provider"] = provider - route["model"] = model - route["routing_reason"] = "simple_turn" - return route - - -def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]: - """Resolve the effective model/runtime for one turn. - - Returns a dict with model/runtime/signature/label fields. - """ - route = choose_cheap_model_route(user_message, routing_config) - if not route: - return { - "model": primary.get("model"), - "runtime": { - "api_key": primary.get("api_key"), - "base_url": primary.get("base_url"), - "provider": primary.get("provider"), - "api_mode": primary.get("api_mode"), - "command": primary.get("command"), - "args": list(primary.get("args") or []), - "credential_pool": primary.get("credential_pool"), - }, - "label": None, - "signature": ( - primary.get("model"), - primary.get("provider"), - primary.get("base_url"), - primary.get("api_mode"), - primary.get("command"), - tuple(primary.get("args") or ()), - ), - } - - from hermes_cli.runtime_provider import resolve_runtime_provider - - explicit_api_key = None - api_key_env = str(route.get("api_key_env") or "").strip() - if api_key_env: - explicit_api_key = os.getenv(api_key_env) or None - - try: - runtime = resolve_runtime_provider( - requested=route.get("provider"), - explicit_api_key=explicit_api_key, - explicit_base_url=route.get("base_url"), - ) - except Exception: - return { - "model": primary.get("model"), - "runtime": { - "api_key": primary.get("api_key"), - "base_url": primary.get("base_url"), - "provider": primary.get("provider"), - "api_mode": primary.get("api_mode"), - "command": primary.get("command"), - "args": list(primary.get("args") or []), - "credential_pool": primary.get("credential_pool"), - }, - "label": None, - "signature": ( - primary.get("model"), - primary.get("provider"), - primary.get("base_url"), - primary.get("api_mode"), - primary.get("command"), - tuple(primary.get("args") or ()), - ), - } - - return { - "model": route.get("model"), - "runtime": { - "api_key": runtime.get("api_key"), - "base_url": runtime.get("base_url"), - "provider": runtime.get("provider"), - "api_mode": runtime.get("api_mode"), - "command": runtime.get("command"), - "args": list(runtime.get("args") or []), - "credential_pool": runtime.get("credential_pool"), - }, - "label": f"smart route → {route.get('model')} ({runtime.get('provider')})", - "signature": ( - route.get("model"), - runtime.get("provider"), - runtime.get("base_url"), - runtime.get("api_mode"), - runtime.get("command"), - tuple(runtime.get("args") or ()), - ), - } diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py new file mode 100644 index 0000000000..5752113325 --- /dev/null +++ b/agent/transports/__init__.py @@ -0,0 +1,51 @@ +"""Transport layer types and registry for provider response normalization. + +Usage: + from agent.transports import get_transport + transport = get_transport("anthropic_messages") + result = transport.normalize_response(raw_response) +""" + +from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401 + +_REGISTRY: dict = {} + + +def register_transport(api_mode: str, transport_cls: type) -> None: + """Register a transport class for an api_mode string.""" + _REGISTRY[api_mode] = transport_cls + + +def get_transport(api_mode: str): + """Get a transport instance for the given api_mode. + + Returns None if no transport is registered for this api_mode. + This allows gradual migration — call sites can check for None + and fall back to the legacy code path. + """ + if not _REGISTRY: + _discover_transports() + cls = _REGISTRY.get(api_mode) + if cls is None: + return None + return cls() + + +def _discover_transports() -> None: + """Import all transport modules to trigger auto-registration.""" + try: + import agent.transports.anthropic # noqa: F401 + except ImportError: + pass + try: + import agent.transports.codex # noqa: F401 + except ImportError: + pass + try: + import agent.transports.chat_completions # noqa: F401 + except ImportError: + pass + try: + import agent.transports.bedrock # noqa: F401 + except ImportError: + pass diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py new file mode 100644 index 0000000000..7ffa71a6f9 --- /dev/null +++ b/agent/transports/anthropic.py @@ -0,0 +1,129 @@ +"""Anthropic Messages API transport. + +Delegates to the existing adapter functions in agent/anthropic_adapter.py. +This transport owns format conversion and normalization — NOT client lifecycle. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse + + +class AnthropicTransport(ProviderTransport): + """Transport for api_mode='anthropic_messages'. + + Wraps the existing functions in anthropic_adapter.py behind the + ProviderTransport ABC. Each method delegates — no logic is duplicated. + """ + + @property + def api_mode(self) -> str: + return "anthropic_messages" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI messages to Anthropic (system, messages) tuple. + + kwargs: + base_url: Optional[str] — affects thinking signature handling. + """ + from agent.anthropic_adapter import convert_messages_to_anthropic + + base_url = kwargs.get("base_url") + return convert_messages_to_anthropic(messages, base_url=base_url) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Anthropic input_schema format.""" + from agent.anthropic_adapter import convert_tools_to_anthropic + + return convert_tools_to_anthropic(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Anthropic messages.create() kwargs. + + Calls convert_messages and convert_tools internally. + + params (all optional): + max_tokens: int + reasoning_config: dict | None + tool_choice: str | None + is_oauth: bool + preserve_dots: bool + context_length: int | None + base_url: str | None + fast_mode: bool + """ + from agent.anthropic_adapter import build_anthropic_kwargs + + return build_anthropic_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=params.get("max_tokens", 16384), + reasoning_config=params.get("reasoning_config"), + tool_choice=params.get("tool_choice"), + is_oauth=params.get("is_oauth", False), + preserve_dots=params.get("preserve_dots", False), + context_length=params.get("context_length"), + base_url=params.get("base_url"), + fast_mode=params.get("fast_mode", False), + ) + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Anthropic response to NormalizedResponse. + + kwargs: + strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names. + """ + from agent.anthropic_adapter import normalize_anthropic_response_v2 + + strip_tool_prefix = kwargs.get("strip_tool_prefix", False) + return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix) + + def validate_response(self, response: Any) -> bool: + """Check Anthropic response structure is valid.""" + if response is None: + return False + content_blocks = getattr(response, "content", None) + if not isinstance(content_blocks, list): + return False + if not content_blocks: + return False + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Extract Anthropic cache_read and cache_creation token counts.""" + usage = getattr(response, "usage", None) + if usage is None: + return None + cached = getattr(usage, "cache_read_input_tokens", 0) or 0 + written = getattr(usage, "cache_creation_input_tokens", 0) or 0 + if cached or written: + return {"cached_tokens": cached, "creation_tokens": written} + return None + + # Promote the adapter's canonical mapping to module level so it's shared + _STOP_REASON_MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "refusal": "content_filter", + "model_context_window_exceeded": "length", + } + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Anthropic stop_reason to OpenAI finish_reason.""" + return self._STOP_REASON_MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("anthropic_messages", AnthropicTransport) diff --git a/agent/transports/base.py b/agent/transports/base.py new file mode 100644 index 0000000000..b516967b6a --- /dev/null +++ b/agent/transports/base.py @@ -0,0 +1,89 @@ +"""Abstract base for provider transports. + +A transport owns the data path for one api_mode: + convert_messages → convert_tools → build_kwargs → normalize_response + +It does NOT own: client construction, streaming, credential refresh, +prompt caching, interrupt handling, or retry logic. Those stay on AIAgent. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + +from agent.transports.types import NormalizedResponse + + +class ProviderTransport(ABC): + """Base class for provider-specific format conversion and normalization.""" + + @property + @abstractmethod + def api_mode(self) -> str: + """The api_mode string this transport handles (e.g. 'anthropic_messages').""" + ... + + @abstractmethod + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI-format messages to provider-native format. + + Returns provider-specific structure (e.g. (system, messages) for Anthropic, + or the messages list unchanged for chat_completions). + """ + ... + + @abstractmethod + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI-format tool definitions to provider-native format. + + Returns provider-specific tool list (e.g. Anthropic input_schema format). + """ + ... + + @abstractmethod + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build the complete API call kwargs dict. + + This is the primary entry point — it typically calls convert_messages() + and convert_tools() internally, then adds model-specific config. + + Returns a dict ready to be passed to the provider's SDK client. + """ + ... + + @abstractmethod + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize a raw provider response to the shared NormalizedResponse type. + + This is the only method that returns a transport-layer type. + """ + ... + + def validate_response(self, response: Any) -> bool: + """Optional: check if the raw response is structurally valid. + + Returns True if valid, False if the response should be treated as invalid. + Default implementation always returns True. + """ + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Optional: extract provider-specific cache hit/creation stats. + + Returns dict with 'cached_tokens' and 'creation_tokens', or None. + Default returns None. + """ + return None + + def map_finish_reason(self, raw_reason: str) -> str: + """Optional: map provider-specific stop reason to OpenAI equivalent. + + Default returns the raw reason unchanged. Override for providers + with different stop reason vocabularies. + """ + return raw_reason diff --git a/agent/transports/bedrock.py b/agent/transports/bedrock.py new file mode 100644 index 0000000000..af549e7eae --- /dev/null +++ b/agent/transports/bedrock.py @@ -0,0 +1,154 @@ +"""AWS Bedrock Converse API transport. + +Delegates to the existing adapter functions in agent/bedrock_adapter.py. +Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport +owns format conversion and normalization, while client construction and +boto3 calls stay on AIAgent. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage + + +class BedrockTransport(ProviderTransport): + """Transport for api_mode='bedrock_converse'.""" + + @property + def api_mode(self) -> str: + return "bedrock_converse" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI messages to Bedrock Converse format.""" + from agent.bedrock_adapter import convert_messages_to_converse + return convert_messages_to_converse(messages) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Bedrock Converse toolConfig.""" + from agent.bedrock_adapter import convert_tools_to_converse + return convert_tools_to_converse(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Bedrock converse() kwargs. + + Calls convert_messages and convert_tools internally. + + params: + max_tokens: int — output token limit (default 4096) + temperature: float | None + guardrail_config: dict | None — Bedrock guardrails + region: str — AWS region (default 'us-east-1') + """ + from agent.bedrock_adapter import build_converse_kwargs + + region = params.get("region", "us-east-1") + guardrail = params.get("guardrail_config") + + kwargs = build_converse_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=params.get("max_tokens", 4096), + temperature=params.get("temperature"), + guardrail_config=guardrail, + ) + # Sentinel keys for dispatch — agent pops these before the boto3 call + kwargs["__bedrock_converse__"] = True + kwargs["__bedrock_region__"] = region + return kwargs + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Bedrock response to NormalizedResponse. + + Handles two shapes: + 1. Raw boto3 dict (from direct converse() calls) + 2. Already-normalized SimpleNamespace with .choices (from dispatch site) + """ + from agent.bedrock_adapter import normalize_converse_response + + # Normalize to OpenAI-compatible SimpleNamespace + if hasattr(response, "choices") and response.choices: + # Already normalized at dispatch site + ns = response + else: + # Raw boto3 dict + ns = normalize_converse_response(response) + + choice = ns.choices[0] + msg = choice.message + finish_reason = choice.finish_reason or "stop" + + tool_calls = None + if msg.tool_calls: + tool_calls = [ + ToolCall( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + ) + for tc in msg.tool_calls + ] + + usage = None + if hasattr(ns, "usage") and ns.usage: + u = ns.usage + usage = Usage( + prompt_tokens=getattr(u, "prompt_tokens", 0) or 0, + completion_tokens=getattr(u, "completion_tokens", 0) or 0, + total_tokens=getattr(u, "total_tokens", 0) or 0, + ) + + reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None) + + return NormalizedResponse( + content=msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=reasoning, + usage=usage, + ) + + def validate_response(self, response: Any) -> bool: + """Check Bedrock response structure. + + After normalize_converse_response, the response has OpenAI-compatible + .choices — same check as chat_completions. + """ + if response is None: + return False + # Raw Bedrock dict response — check for 'output' key + if isinstance(response, dict): + return "output" in response + # Already-normalized SimpleNamespace + if hasattr(response, "choices"): + return bool(response.choices) + return False + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Bedrock stop reason to OpenAI finish_reason. + + The adapter already does this mapping inside normalize_converse_response, + so this is only used for direct access to raw responses. + """ + _MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "guardrail_intervened": "content_filter", + "content_filtered": "content_filter", + } + return _MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("bedrock_converse", BedrockTransport) diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py new file mode 100644 index 0000000000..900f59dcf4 --- /dev/null +++ b/agent/transports/chat_completions.py @@ -0,0 +1,387 @@ +"""OpenAI Chat Completions transport. + +Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible +providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.). + +Messages and tools are already in OpenAI format — convert_messages and +convert_tools are near-identity. The complexity lives in build_kwargs +which has provider-specific conditionals for max_tokens defaults, +reasoning configuration, temperature handling, and extra_body assembly. +""" + +import copy +from typing import Any, Dict, List, Optional + +from agent.prompt_builder import DEVELOPER_ROLE_MODELS +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage + + +class ChatCompletionsTransport(ProviderTransport): + """Transport for api_mode='chat_completions'. + + The default path for OpenAI-compatible providers. + """ + + @property + def api_mode(self) -> str: + return "chat_completions" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]: + """Messages are already in OpenAI format — sanitize Codex leaks only. + + Strips Codex Responses API fields (``codex_reasoning_items`` on the + message, ``call_id``/``response_item_id`` on tool_calls) that strict + chat-completions providers reject with 400/422. + """ + needs_sanitize = False + for msg in messages: + if not isinstance(msg, dict): + continue + if "codex_reasoning_items" in msg: + needs_sanitize = True + break + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc): + needs_sanitize = True + break + if needs_sanitize: + break + + if not needs_sanitize: + return messages + + sanitized = copy.deepcopy(messages) + for msg in sanitized: + if not isinstance(msg, dict): + continue + msg.pop("codex_reasoning_items", None) + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if isinstance(tc, dict): + tc.pop("call_id", None) + tc.pop("response_item_id", None) + return sanitized + + def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Tools are already in OpenAI format — identity.""" + return tools + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build chat.completions.create() kwargs. + + This is the most complex transport method — it handles ~16 providers + via params rather than subclasses. + + params: + timeout: float — API call timeout + max_tokens: int | None — user-configured max tokens + ephemeral_max_output_tokens: int | None — one-shot override (error recovery) + max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N} + reasoning_config: dict | None + request_overrides: dict | None + session_id: str | None + qwen_session_metadata: dict | None — {sessionId, promptId} precomputed + model_lower: str — lowercase model name for pattern matching + # Provider detection flags (all optional, default False) + is_openrouter: bool + is_nous: bool + is_qwen_portal: bool + is_github_models: bool + is_nvidia_nim: bool + is_kimi: bool + is_custom_provider: bool + ollama_num_ctx: int | None + # Provider routing + provider_preferences: dict | None + # Qwen-specific + qwen_prepare_fn: callable | None — runs AFTER codex sanitization + qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists + # Temperature + fixed_temperature: Any — from _fixed_temperature_for_model() + omit_temperature: bool + # Reasoning + supports_reasoning: bool + github_reasoning_extra: dict | None + # Claude on OpenRouter/Nous max output + anthropic_max_output: int | None + # Extra + extra_body_additions: dict | None — pre-built extra_body entries + """ + # Codex sanitization: drop reasoning_items / call_id / response_item_id + sanitized = self.convert_messages(messages) + + # Qwen portal prep AFTER codex sanitization. If sanitize already + # deepcopied, reuse that copy via the in-place variant to avoid a + # second deepcopy. + is_qwen = params.get("is_qwen_portal", False) + if is_qwen: + qwen_prep = params.get("qwen_prepare_fn") + qwen_prep_inplace = params.get("qwen_prepare_inplace_fn") + if sanitized is messages: + if qwen_prep is not None: + sanitized = qwen_prep(sanitized) + else: + # Already deepcopied — transform in place + if qwen_prep_inplace is not None: + qwen_prep_inplace(sanitized) + elif qwen_prep is not None: + sanitized = qwen_prep(sanitized) + + # Developer role swap for GPT-5/Codex models + model_lower = params.get("model_lower", (model or "").lower()) + if ( + sanitized + and isinstance(sanitized[0], dict) + and sanitized[0].get("role") == "system" + and any(p in model_lower for p in DEVELOPER_ROLE_MODELS) + ): + sanitized = list(sanitized) + sanitized[0] = {**sanitized[0], "role": "developer"} + + api_kwargs: Dict[str, Any] = { + "model": model, + "messages": sanitized, + } + + timeout = params.get("timeout") + if timeout is not None: + api_kwargs["timeout"] = timeout + + # Temperature + fixed_temp = params.get("fixed_temperature") + omit_temp = params.get("omit_temperature", False) + if omit_temp: + api_kwargs.pop("temperature", None) + elif fixed_temp is not None: + api_kwargs["temperature"] = fixed_temp + + # Qwen metadata (caller precomputes {sessionId, promptId}) + qwen_meta = params.get("qwen_session_metadata") + if qwen_meta and is_qwen: + api_kwargs["metadata"] = qwen_meta + + # Tools + if tools: + api_kwargs["tools"] = tools + + # max_tokens resolution — priority: ephemeral > user > provider default + max_tokens_fn = params.get("max_tokens_param_fn") + ephemeral = params.get("ephemeral_max_output_tokens") + max_tokens = params.get("max_tokens") + anthropic_max_out = params.get("anthropic_max_output") + is_nvidia_nim = params.get("is_nvidia_nim", False) + is_kimi = params.get("is_kimi", False) + reasoning_config = params.get("reasoning_config") + + if ephemeral is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(ephemeral)) + elif max_tokens is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(max_tokens)) + elif is_nvidia_nim and max_tokens_fn: + api_kwargs.update(max_tokens_fn(16384)) + elif is_qwen and max_tokens_fn: + api_kwargs.update(max_tokens_fn(65536)) + elif is_kimi and max_tokens_fn: + # Kimi/Moonshot: 32000 matches Kimi CLI's default + api_kwargs.update(max_tokens_fn(32000)) + elif anthropic_max_out is not None: + api_kwargs["max_tokens"] = anthropic_max_out + + # Kimi: top-level reasoning_effort (unless thinking disabled) + if is_kimi: + _kimi_thinking_off = bool( + reasoning_config + and isinstance(reasoning_config, dict) + and reasoning_config.get("enabled") is False + ) + if not _kimi_thinking_off: + _kimi_effort = "medium" + if reasoning_config and isinstance(reasoning_config, dict): + _e = (reasoning_config.get("effort") or "").strip().lower() + if _e in ("low", "medium", "high"): + _kimi_effort = _e + api_kwargs["reasoning_effort"] = _kimi_effort + + # extra_body assembly + extra_body: Dict[str, Any] = {} + + is_openrouter = params.get("is_openrouter", False) + is_nous = params.get("is_nous", False) + is_github_models = params.get("is_github_models", False) + + provider_prefs = params.get("provider_preferences") + if provider_prefs and is_openrouter: + extra_body["provider"] = provider_prefs + + # Kimi extra_body.thinking + if is_kimi: + _kimi_thinking_enabled = True + if reasoning_config and isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + _kimi_thinking_enabled = False + extra_body["thinking"] = { + "type": "enabled" if _kimi_thinking_enabled else "disabled", + } + + # Reasoning + if params.get("supports_reasoning", False): + if is_github_models: + gh_reasoning = params.get("github_reasoning_extra") + if gh_reasoning is not None: + extra_body["reasoning"] = gh_reasoning + else: + if reasoning_config is not None: + rc = dict(reasoning_config) + if is_nous and rc.get("enabled") is False: + pass # omit for Nous when disabled + else: + extra_body["reasoning"] = rc + else: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + + if is_nous: + extra_body["tags"] = ["product=hermes-agent"] + + # Ollama num_ctx + ollama_ctx = params.get("ollama_num_ctx") + if ollama_ctx: + options = extra_body.get("options", {}) + options["num_ctx"] = ollama_ctx + extra_body["options"] = options + + # Ollama/custom think=false + if params.get("is_custom_provider", False): + if reasoning_config and isinstance(reasoning_config, dict): + _effort = (reasoning_config.get("effort") or "").strip().lower() + _enabled = reasoning_config.get("enabled", True) + if _effort == "none" or _enabled is False: + extra_body["think"] = False + + if is_qwen: + extra_body["vl_high_resolution_images"] = True + + # Merge any pre-built extra_body additions + additions = params.get("extra_body_additions") + if additions: + extra_body.update(additions) + + if extra_body: + api_kwargs["extra_body"] = extra_body + + # Request overrides last (service_tier etc.) + overrides = params.get("request_overrides") + if overrides: + api_kwargs.update(overrides) + + return api_kwargs + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize OpenAI ChatCompletion to NormalizedResponse. + + For chat_completions, this is near-identity — the response is already + in OpenAI format. extra_content on tool_calls (Gemini thought_signature) + is preserved via ToolCall.provider_data. reasoning_details (OpenRouter + unified format) and reasoning_content (DeepSeek/Moonshot) are also + preserved for downstream replay. + """ + choice = response.choices[0] + msg = choice.message + finish_reason = choice.finish_reason or "stop" + + tool_calls = None + if msg.tool_calls: + tool_calls = [] + for tc in msg.tool_calls: + # Preserve provider-specific extras on the tool call. + # Gemini 3 thinking models attach extra_content with + # thought_signature — without replay on the next turn the API + # rejects the request with 400. + tc_provider_data: Dict[str, Any] = {} + extra = getattr(tc, "extra_content", None) + if extra is None and hasattr(tc, "model_extra"): + extra = (tc.model_extra or {}).get("extra_content") + if extra is not None: + if hasattr(extra, "model_dump"): + try: + extra = extra.model_dump() + except Exception: + pass + tc_provider_data["extra_content"] = extra + tool_calls.append(ToolCall( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + provider_data=tc_provider_data or None, + )) + + usage = None + if hasattr(response, "usage") and response.usage: + u = response.usage + usage = Usage( + prompt_tokens=getattr(u, "prompt_tokens", 0) or 0, + completion_tokens=getattr(u, "completion_tokens", 0) or 0, + total_tokens=getattr(u, "total_tokens", 0) or 0, + ) + + # Preserve reasoning fields separately. DeepSeek/Moonshot use + # ``reasoning_content``; others use ``reasoning``. Downstream code + # (_extract_reasoning, thinking-prefill retry) reads both distinctly, + # so keep them apart in provider_data rather than merging. + reasoning = getattr(msg, "reasoning", None) + reasoning_content = getattr(msg, "reasoning_content", None) + + provider_data: Dict[str, Any] = {} + if reasoning_content: + provider_data["reasoning_content"] = reasoning_content + rd = getattr(msg, "reasoning_details", None) + if rd: + provider_data["reasoning_details"] = rd + + return NormalizedResponse( + content=msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=reasoning, + usage=usage, + provider_data=provider_data or None, + ) + + def validate_response(self, response: Any) -> bool: + """Check that response has valid choices.""" + if response is None: + return False + if not hasattr(response, "choices") or response.choices is None: + return False + if not response.choices: + return False + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details.""" + usage = getattr(response, "usage", None) + if usage is None: + return None + details = getattr(usage, "prompt_tokens_details", None) + if details is None: + return None + cached = getattr(details, "cached_tokens", 0) or 0 + written = getattr(details, "cache_write_tokens", 0) or 0 + if cached or written: + return {"cached_tokens": cached, "creation_tokens": written} + return None + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("chat_completions", ChatCompletionsTransport) diff --git a/agent/transports/codex.py b/agent/transports/codex.py new file mode 100644 index 0000000000..ec48352193 --- /dev/null +++ b/agent/transports/codex.py @@ -0,0 +1,217 @@ +"""OpenAI Responses API (Codex) transport. + +Delegates to the existing adapter functions in agent/codex_responses_adapter.py. +This transport owns format conversion and normalization — NOT client lifecycle, +streaming, or the _run_codex_stream() call path. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage + + +class ResponsesApiTransport(ProviderTransport): + """Transport for api_mode='codex_responses'. + + Wraps the functions extracted into codex_responses_adapter.py (PR 1). + """ + + @property + def api_mode(self) -> str: + return "codex_responses" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI chat messages to Responses API input items.""" + from agent.codex_responses_adapter import _chat_messages_to_responses_input + return _chat_messages_to_responses_input(messages) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Responses API function definitions.""" + from agent.codex_responses_adapter import _responses_tools + return _responses_tools(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Responses API kwargs. + + Calls convert_messages and convert_tools internally. + + params: + instructions: str — system prompt (extracted from messages[0] if not given) + reasoning_config: dict | None — {effort, enabled} + session_id: str | None — used for prompt_cache_key + xAI conv header + max_tokens: int | None — max_output_tokens + request_overrides: dict | None — extra kwargs merged in + provider: str | None — provider name for backend-specific logic + base_url: str | None — endpoint URL + base_url_hostname: str | None — hostname for backend detection + is_github_responses: bool — Copilot/GitHub models backend + is_codex_backend: bool — chatgpt.com/backend-api/codex + is_xai_responses: bool — xAI/Grok backend + github_reasoning_extra: dict | None — Copilot reasoning params + """ + from agent.codex_responses_adapter import ( + _chat_messages_to_responses_input, + _responses_tools, + ) + + from run_agent import DEFAULT_AGENT_IDENTITY + + instructions = params.get("instructions", "") + payload_messages = messages + if not instructions: + if messages and messages[0].get("role") == "system": + instructions = str(messages[0].get("content") or "").strip() + payload_messages = messages[1:] + if not instructions: + instructions = DEFAULT_AGENT_IDENTITY + + is_github_responses = params.get("is_github_responses", False) + is_codex_backend = params.get("is_codex_backend", False) + is_xai_responses = params.get("is_xai_responses", False) + + # Resolve reasoning effort + reasoning_effort = "medium" + reasoning_enabled = True + reasoning_config = params.get("reasoning_config") + if reasoning_config and isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + reasoning_enabled = False + elif reasoning_config.get("effort"): + reasoning_effort = reasoning_config["effort"] + + _effort_clamp = {"minimal": "low"} + reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort) + + kwargs = { + "model": model, + "instructions": instructions, + "input": _chat_messages_to_responses_input(payload_messages), + "tools": _responses_tools(tools), + "tool_choice": "auto", + "parallel_tool_calls": True, + "store": False, + } + + session_id = params.get("session_id") + if not is_github_responses and session_id: + kwargs["prompt_cache_key"] = session_id + + if reasoning_enabled and is_xai_responses: + kwargs["include"] = ["reasoning.encrypted_content"] + elif reasoning_enabled: + if is_github_responses: + github_reasoning = params.get("github_reasoning_extra") + if github_reasoning is not None: + kwargs["reasoning"] = github_reasoning + else: + kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} + kwargs["include"] = ["reasoning.encrypted_content"] + elif not is_github_responses and not is_xai_responses: + kwargs["include"] = [] + + request_overrides = params.get("request_overrides") + if request_overrides: + kwargs.update(request_overrides) + + max_tokens = params.get("max_tokens") + if max_tokens is not None and not is_codex_backend: + kwargs["max_output_tokens"] = max_tokens + + if is_xai_responses and session_id: + kwargs["extra_headers"] = {"x-grok-conv-id": session_id} + + return kwargs + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Codex Responses API response to NormalizedResponse.""" + from agent.codex_responses_adapter import ( + _normalize_codex_response, + _extract_responses_message_text, + _extract_responses_reasoning_text, + ) + + # _normalize_codex_response returns (SimpleNamespace, finish_reason_str) + msg, finish_reason = _normalize_codex_response(response) + + tool_calls = None + if msg and msg.tool_calls: + tool_calls = [] + for tc in msg.tool_calls: + provider_data = {} + if hasattr(tc, "call_id") and tc.call_id: + provider_data["call_id"] = tc.call_id + if hasattr(tc, "response_item_id") and tc.response_item_id: + provider_data["response_item_id"] = tc.response_item_id + tool_calls.append(ToolCall( + id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None), + name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""), + arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"), + provider_data=provider_data or None, + )) + + # Extract reasoning items for provider_data + provider_data = {} + if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items: + provider_data["codex_reasoning_items"] = msg.codex_reasoning_items + if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details: + provider_data["reasoning_details"] = msg.reasoning_details + + return NormalizedResponse( + content=msg.content if msg else None, + tool_calls=tool_calls, + finish_reason=finish_reason or "stop", + reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None, + usage=None, # Codex usage is extracted separately in normalize_usage() + provider_data=provider_data or None, + ) + + def validate_response(self, response: Any) -> bool: + """Check Codex Responses API response has valid output structure. + + Returns True only if response.output is a non-empty list. + Does NOT check output_text fallback — the caller handles that + with diagnostic logging for stream backfill recovery. + """ + if response is None: + return False + output = getattr(response, "output", None) + if not isinstance(output, list) or not output: + return False + return True + + def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict: + """Validate and sanitize Codex API kwargs before the call. + + Normalizes input items, strips unsupported fields, validates structure. + """ + from agent.codex_responses_adapter import _preflight_codex_api_kwargs + return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream) + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Codex response.status to OpenAI finish_reason. + + Codex uses response.status ('completed', 'incomplete') + + response.incomplete_details.reason for granular mapping. + This method handles the simple status string; the caller + should check incomplete_details separately for 'max_output_tokens'. + """ + _MAP = { + "completed": "stop", + "incomplete": "length", + "failed": "stop", + "cancelled": "stop", + } + return _MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("codex_responses", ResponsesApiTransport) diff --git a/agent/transports/types.py b/agent/transports/types.py new file mode 100644 index 0000000000..2b048fcaa4 --- /dev/null +++ b/agent/transports/types.py @@ -0,0 +1,100 @@ +"""Shared types for normalized provider responses. + +These dataclasses define the canonical shape that all provider adapters +normalize responses to. The shared surface is intentionally minimal — +only fields that every downstream consumer reads are top-level. +Protocol-specific state goes in ``provider_data`` dicts (response-level +and per-tool-call) so that protocol-aware code paths can access it +without polluting the shared type. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class ToolCall: + """A normalized tool call from any provider. + + ``id`` is the protocol's canonical identifier — what gets used in + ``tool_call_id`` / ``tool_use_id`` when constructing tool result + messages. May be ``None`` when the provider omits it; the agent + fills it via ``_deterministic_call_id()`` before storing in history. + + ``provider_data`` carries per-tool-call protocol metadata that only + protocol-aware code reads: + + * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}`` + * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}`` + * Others: ``None`` + """ + + id: Optional[str] + name: str + arguments: str # JSON string + provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + + +@dataclass +class Usage: + """Token usage from an API response.""" + + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + cached_tokens: int = 0 + + +@dataclass +class NormalizedResponse: + """Normalized API response from any provider. + + Shared fields are truly cross-provider — every caller can rely on + them without branching on api_mode. Protocol-specific state goes in + ``provider_data`` so that only protocol-aware code paths read it. + + Response-level ``provider_data`` examples: + + * Anthropic: ``{"reasoning_details": [...]}`` + * Codex: ``{"codex_reasoning_items": [...]}`` + * Others: ``None`` + """ + + content: Optional[str] + tool_calls: Optional[List[ToolCall]] + finish_reason: str # "stop", "tool_calls", "length", "content_filter" + reasoning: Optional[str] = None + usage: Optional[Usage] = None + provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + + +# --------------------------------------------------------------------------- +# Factory helpers +# --------------------------------------------------------------------------- + +def build_tool_call( + id: Optional[str], + name: str, + arguments: Any, + **provider_fields: Any, +) -> ToolCall: + """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict. + + Any extra keyword arguments are collected into ``provider_data``. + """ + args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments) + pd = dict(provider_fields) if provider_fields else None + return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd) + + +def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str: + """Translate a provider-specific stop reason to the normalised set. + + Falls back to ``"stop"`` for unknown or ``None`` reasons. + """ + if reason is None: + return "stop" + return mapping.get(reason, "stop") diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py index 29c75b172a..3554c5b991 100644 --- a/agent/usage_pricing.py +++ b/agent/usage_pricing.py @@ -6,6 +6,7 @@ from decimal import Decimal from typing import Any, Dict, Literal, Optional from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata +from utils import base_url_host_matches DEFAULT_PRICING = {"input": 0.0, "output": 0.0} @@ -393,7 +394,7 @@ def resolve_billing_route( if provider_name == "openai-codex": return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included") - if provider_name == "openrouter" or "openrouter.ai" in base: + if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"): return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api") if provider_name == "anthropic": return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot") diff --git a/batch_runner.py b/batch_runner.py index 1a65f473ff..7413ad59f4 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -444,6 +444,7 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]: if not reasoning.get("has_any_reasoning", True): print(f" 🚫 Prompt {prompt_index} discarded (no reasoning in any turn)") discarded_no_reasoning += 1 + completed_in_batch.append(prompt_index) continue # Get and normalize tool stats for consistent schema across all entries @@ -1189,12 +1190,12 @@ def main( """ # Handle list distributions if list_distributions: - from toolset_distributions import list_distributions as get_all_dists, print_distribution_info - + from toolset_distributions import print_distribution_info + print("📊 Available Toolset Distributions") print("=" * 70) - - all_dists = get_all_dists() + + all_dists = list_distributions() for dist_name in sorted(all_dists.keys()): print_distribution_info(dist_name) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 20b54b7887..e8e3d30af6 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -63,7 +63,38 @@ model: # Leave unset to use the model's native output ceiling (recommended). # Set only if you want to deliberately limit individual response length. # - # max_tokens: 8192 +# max_tokens: 8192 + +# Named provider overrides (optional) +# Use this for per-provider request timeouts, non-stream stale timeouts, +# and per-model exceptions. +# Applies to the primary turn client on every api_mode (OpenAI-wire, native +# Anthropic, and Anthropic-compatible providers), the fallback chain, and +# client rebuilds during credential rotation. For OpenAI-wire chat +# completions (streaming and non-streaming) the configured value is also +# used as the per-request ``timeout=`` kwarg so it wins over the legacy +# HERMES_API_TIMEOUT env var (which still applies when no config is set). +# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and +# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these +# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s, +# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s). +# +# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock +# SDK paths) — those use boto3 with its own timeout configuration. +# +# providers: +# ollama-local: +# request_timeout_seconds: 300 # Longer timeout for local cold-starts +# stale_timeout_seconds: 900 # Explicitly re-enable stale detection on local endpoints +# anthropic: +# request_timeout_seconds: 30 # Fast-fail cloud requests +# models: +# claude-opus-4.6: +# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls +# openai-codex: +# models: +# gpt-5.4: +# stale_timeout_seconds: 1800 # Longer non-stream stale timeout for slow large-context turns # ============================================================================= # OpenRouter Provider Routing (only applies when using OpenRouter) @@ -91,20 +122,6 @@ model: # # Data policy: "allow" (default) or "deny" to exclude providers that may store data # # data_collection: "deny" -# ============================================================================= -# Smart Model Routing (optional) -# ============================================================================= -# Use a cheaper model for short/simple turns while keeping your main model for -# more complex requests. Disabled by default. -# -# smart_model_routing: -# enabled: true -# max_simple_chars: 160 -# max_simple_words: 28 -# cheap_model: -# provider: openrouter -# model: google/gemini-2.5-flash - # ============================================================================= # Git Worktree Isolation # ============================================================================= @@ -357,6 +374,18 @@ compression: # web_extract: # provider: "auto" # model: "" +# +# # Session search — summarizes matching past sessions +# session_search: +# provider: "auto" +# model: "" +# timeout: 30 +# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s +# extra_body: {} # Provider-specific OpenAI-compatible request fields +# # Example for providers that support request-body +# # reasoning controls: +# # extra_body: +# # enable_thinking: false # ============================================================================= # Persistent Memory @@ -741,10 +770,12 @@ code_execution: # Subagent Delegation # ============================================================================= # The delegate_task tool spawns child agents with isolated context. -# Supports single tasks and batch mode (up to 3 parallel). +# Supports single tasks and batch mode (default 3 parallel, configurable). delegation: max_iterations: 50 # Max tool-calling turns per child (default: 50) - default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents + # max_concurrent_children: 3 # Max parallel child agents (default: 3) + # max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers. + # orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true). # model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent) # provider: "openrouter" # Override provider for subagents (empty = inherit parent) # # Resolves full credentials (base_url, api_key) automatically. @@ -888,3 +919,39 @@ display: # # Names and usernames are NOT affected (user-chosen, publicly visible). # # Routing/delivery still uses the original values internally. # redact_pii: false + +# ============================================================================= +# Shell-script hooks +# ============================================================================= +# Register shell scripts as plugin-hook callbacks. Each entry is executed as +# a subprocess (shell=False, shlex.split) with a JSON payload on stdin. On +# stdout the script may return JSON that either blocks the tool call or +# injects context into the next LLM call. +# +# Valid events (mirror hermes_cli.plugins.VALID_HOOKS): +# pre_tool_call, post_tool_call, pre_llm_call, post_llm_call, +# pre_api_request, post_api_request, on_session_start, on_session_end, +# on_session_finalize, on_session_reset, subagent_stop +# +# First-use consent: each (event, command) pair prompts once on a TTY, then +# is persisted to ~/.hermes/shell-hooks-allowlist.json. Non-interactive +# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the +# hooks_auto_accept key below. +# +# See website/docs/user-guide/features/hooks.md for the full JSON wire +# protocol and worked examples. +# +# hooks: +# pre_tool_call: +# - matcher: "terminal" +# command: "~/.hermes/agent-hooks/block-rm-rf.sh" +# timeout: 10 +# post_tool_call: +# - matcher: "write_file|patch" +# command: "~/.hermes/agent-hooks/auto-format.sh" +# pre_llm_call: +# - command: "~/.hermes/agent-hooks/inject-cwd-context.sh" +# subagent_stop: +# - command: "~/.hermes/agent-hooks/log-orchestration.sh" +# +# hooks_auto_accept: false diff --git a/cli.py b/cli.py index c9ce95e9f2..9d87ff3562 100644 --- a/cli.py +++ b/cli.py @@ -19,12 +19,14 @@ import shutil import sys import json import re +import concurrent.futures import base64 import atexit import tempfile import time import uuid import textwrap +from urllib.parse import unquote, urlparse from contextlib import contextmanager from pathlib import Path from datetime import datetime @@ -65,6 +67,7 @@ from agent.usage_pricing import ( format_duration_compact, format_token_count_compact, ) +from agent.account_usage import fetch_account_usage, render_account_usage_lines from hermes_cli.banner import _format_context_length, format_banner_version_label _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏") @@ -74,6 +77,7 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧ # User-managed env files should override stale shell exports on restart. from hermes_constants import get_hermes_home, display_hermes_home from hermes_cli.env_loader import load_hermes_dotenv +from utils import base_url_host_matches _hermes_home = get_hermes_home() _project_env = Path(__file__).parent / '.env' @@ -310,12 +314,6 @@ def load_cli_config() -> Dict[str, Any]: "enabled": True, # Auto-compress when approaching context limit "threshold": 0.50, # Compress at 50% of model's context limit }, - "smart_model_routing": { - "enabled": False, - "max_simple_chars": 160, - "max_simple_words": 28, - "cheap_model": {}, - }, "agent": { "max_turns": 90, # Default max tool-calling iterations (shared with subagents) "verbose": False, @@ -373,7 +371,6 @@ def load_cli_config() -> Dict[str, Any]: }, "delegation": { "max_iterations": 45, # Max tool-calling turns per child agent - "default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents "model": "", # Subagent model override (empty = inherit parent model) "provider": "", # Subagent provider override (empty = inherit parent provider) "base_url": "", # Direct OpenAI-compatible endpoint for subagents @@ -534,7 +531,6 @@ def load_cli_config() -> Dict[str, Any]: if _file_has_terminal_config or env_var not in os.environ: val = terminal_config[config_key] if isinstance(val, list): - import json os.environ[env_var] = json.dumps(val) else: os.environ[env_var] = str(val) @@ -918,6 +914,32 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None: print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m") +def _run_state_db_auto_maintenance(session_db) -> None: + """Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config. + + Reads the ``sessions:`` section from config.yaml via + :func:`hermes_cli.config.load_config` (the authoritative loader that + deep-merges DEFAULT_CONFIG, so unmigrated configs still get default + values). Honours ``auto_prune`` / ``retention_days`` / + ``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the + DB. Never raises — maintenance must never block interactive startup. + """ + if session_db is None: + return + try: + from hermes_cli.config import load_config as _load_full_config + cfg = (_load_full_config().get("sessions") or {}) + if not cfg.get("auto_prune", False): + return + session_db.maybe_auto_prune_and_vacuum( + retention_days=int(cfg.get("retention_days", 90)), + min_interval_hours=int(cfg.get("min_interval_hours", 24)), + vacuum=bool(cfg.get("vacuum_after_prune", True)), + ) + except Exception as exc: + logger.debug("state.db auto-maintenance skipped: %s", exc) + + def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None: """Remove stale worktrees and orphaned branches on startup. @@ -1147,6 +1169,41 @@ def _rich_text_from_ansi(text: str) -> _RichText: return _RichText.from_ansi(text or "") +def _strip_markdown_syntax(text: str) -> str: + """Best-effort markdown marker removal for plain-text display.""" + plain = _rich_text_from_ansi(text or "").plain + plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE) + plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE) + # Preserve blockquotes, lists, and checkboxes because they carry structure. + plain = re.sub(r"(```+|~~~+)", "", plain) + plain = re.sub(r"`([^`]*)`", r"\1", plain) + plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain) + plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain) + plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain) + plain = re.sub(r"(? Path | None: if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")): token = token[1:-1].strip() + token = token.replace('\\ ', ' ') if not token: return None - expanded = os.path.expandvars(os.path.expanduser(token)) + expanded = token + if token.startswith("file://"): + try: + parsed = urlparse(token) + if parsed.scheme == "file": + expanded = unquote(parsed.path or "") + if parsed.netloc and os.name == "nt": + expanded = f"//{parsed.netloc}{expanded}" + except Exception: + expanded = token + expanded = os.path.expandvars(os.path.expanduser(expanded)) if os.name != "nt": normalized = expanded.replace("\\", "/") if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha(): @@ -1330,6 +1398,7 @@ def _detect_file_drop(user_input: str) -> "dict | None": or stripped.startswith("~") or stripped.startswith("./") or stripped.startswith("../") + or stripped.startswith("file://") or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha()) or stripped.startswith('"/') or stripped.startswith('"~') @@ -1340,8 +1409,25 @@ def _detect_file_drop(user_input: str) -> "dict | None": if not starts_like_path: return None + direct_path = _resolve_attachment_path(stripped) + if direct_path is not None: + return { + "path": direct_path, + "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS, + "remainder": "", + } + first_token, remainder = _split_path_input(stripped) drop_path = _resolve_attachment_path(first_token) + if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}: + space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "] + for pos in reversed(space_positions): + candidate = stripped[:pos].rstrip() + resolved = _resolve_attachment_path(candidate) + if resolved is not None: + drop_path = resolved + remainder = stripped[pos + 1 :].strip() + break if drop_path is None: return None @@ -1724,10 +1810,30 @@ class HermesCLI: # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml) self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False) + self.final_response_markdown = str( + CLI_CONFIG["display"].get("final_response_markdown", "strip") + ).strip().lower() or "strip" + if self.final_response_markdown not in {"render", "strip", "raw"}: + self.final_response_markdown = "strip" # Inline diff previews for write actions (display.inline_diffs in config.yaml) self._inline_diffs_enabled = CLI_CONFIG["display"].get("inline_diffs", True) + # Submitted multiline user-message preview (display.user_message_preview in config.yaml) + _ump = CLI_CONFIG["display"].get("user_message_preview", {}) + if not isinstance(_ump, dict): + _ump = {} + try: + _ump_first_lines = int(_ump.get("first_lines", 2)) + except (TypeError, ValueError): + _ump_first_lines = 2 + try: + _ump_last_lines = int(_ump.get("last_lines", 2)) + except (TypeError, ValueError): + _ump_last_lines = 2 + self.user_message_preview_first_lines = max(1, _ump_first_lines) + self.user_message_preview_last_lines = max(0, _ump_last_lines) + # Streaming display state self._stream_buf = "" # Partial line buffer for line-buffered rendering self._stream_started = False # True once first delta arrives @@ -1785,7 +1891,7 @@ class HermesCLI: # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY, # custom endpoint → prefer OPENAI_API_KEY (issue #560). # Note: _ensure_runtime_credentials() re-resolves this before first use. - if self.base_url and "openrouter.ai" in self.base_url: + if self.base_url and base_url_host_matches(self.base_url, "openrouter.ai"): self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") else: self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY") @@ -1810,7 +1916,7 @@ class HermesCLI: mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys()) invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names] if invalid: - self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") + self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") # Filesystem checkpoints: CLI flag > config cp_cfg = CLI_CONFIG.get("checkpoints", {}) @@ -1857,8 +1963,9 @@ class HermesCLI: fb = [fb] if fb.get("provider") and fb.get("model") else [] self._fallback_model = fb - # Optional cheap-vs-strong routing for simple turns - self._smart_model_routing = CLI_CONFIG.get("smart_model_routing", {}) or {} + # Signature of the currently-initialised agent's runtime. Used to + # rebuild the agent when provider / model / base_url changes across + # turns (e.g. after /model or credential rotation). self._active_agent_route_signature = None # Agent will be initialized on first use @@ -1869,6 +1976,10 @@ class HermesCLI: self.conversation_history: List[Dict[str, Any]] = [] self.session_start = datetime.now() self._resumed = False + # Per-prompt elapsed timer — started at the beginning of each chat turn, + # frozen when the agent thread completes, displayed in the status bar. + self._prompt_start_time: Optional[float] = None # time.time() when turn started + self._prompt_duration: float = 0.0 # frozen duration of last completed turn # Initialize SQLite session store early so /title works before first message self._session_db = None try: @@ -1876,7 +1987,13 @@ class HermesCLI: self._session_db = SessionDB() except Exception as e: logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e) - + + # Opportunistic state.db maintenance — runs at most once per + # min_interval_hours, tracked via state_meta in state.db itself so + # it's shared across all Hermes processes for this HERMES_HOME. + # Never blocks startup on failure. + _run_state_db_auto_maintenance(self._session_db) + # Deferred title: stored in memory until the session is created in the DB self._pending_title: Optional[str] = None @@ -1945,8 +2062,7 @@ class HermesCLI: def _invalidate(self, min_interval: float = 0.25) -> None: """Throttled UI repaint — prevents terminal blinking on slow/SSH connections.""" - import time as _time - now = _time.monotonic() + now = time.monotonic() if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval: self._last_invalidate = now self._app.invalidate() @@ -1967,6 +2083,44 @@ class HermesCLI: filled = round((safe_percent / 100) * width) return f"[{('█' * filled) + ('░' * max(0, width - filled))}]" + @staticmethod + def _format_prompt_elapsed(prompt_start_time: Optional[float], prompt_duration: float, live: bool = False) -> str: + """Format per-prompt elapsed time for the status bar. + + Always returns a string — shows 0s on fresh start before first turn. + Keeps seconds visible at all scales so it increments smoothly: + 59s → 1m → 1m 1s → ... → 1m 59s → 2m → 2m 1s → ... + 59m 59s → 1h → 1h 0m 1s → ... + 23h 59m 59s → 1d → 1d 0h 1m → ... + + Emoji prefix: ⏱ when turn is live, ⏲ when frozen or fresh start. + Uses width-1 (no variation selector) glyphs so the status bar stays + aligned in monospace terminals. + """ + if prompt_start_time is None and prompt_duration == 0.0: + return "⏲ 0s" + elapsed = time.time() - prompt_start_time if prompt_start_time is not None else prompt_duration + elapsed = max(0.0, elapsed) + + days = int(elapsed // 86400) + remaining = elapsed % 86400 + hours = int(remaining // 3600) + remaining = remaining % 3600 + minutes = int(remaining // 60) + seconds = int(remaining % 60) + + if days > 0: + time_str = f"{days}d {hours}h {minutes}m" + elif hours > 0: + time_str = f"{hours}h {minutes}m {seconds}s" if seconds else f"{hours}h {minutes}m" + elif minutes > 0: + time_str = f"{minutes}m {seconds}s" if seconds else f"{minutes}m" + else: + time_str = f"{int(elapsed)}s" + + emoji = "⏱" if live else "⏲" + return f"{emoji} {time_str}" + def _get_status_bar_snapshot(self) -> Dict[str, Any]: # Prefer the agent's model name — it updates on fallback. # self.model reflects the originally configured model and never @@ -1985,6 +2139,11 @@ class HermesCLI: "model_name": model_name, "model_short": model_short, "duration": format_duration_compact(elapsed_seconds), + "prompt_elapsed": self._format_prompt_elapsed( + getattr(self, "_prompt_start_time", None), + getattr(self, "_prompt_duration", 0.0), + live=getattr(self, "_prompt_start_time", None) is not None, + ), "context_tokens": 0, "context_length": None, "context_percent": None, @@ -2121,8 +2280,7 @@ class HermesCLI: return "" t0 = getattr(self, "_tool_start_time", 0) or 0 if t0 > 0: - import time as _time - elapsed = _time.monotonic() - t0 + elapsed = time.monotonic() - t0 if elapsed >= 60: _m, _s = int(elapsed // 60), int(elapsed % 60) elapsed_str = f"{_m}m {_s}s" @@ -2176,6 +2334,9 @@ class HermesCLI: parts = [f"⚕ {snapshot['model_short']}", context_label, percent_label] parts.append(duration_label) + prompt_elapsed = snapshot.get("prompt_elapsed") + if prompt_elapsed: + parts.append(prompt_elapsed) return self._trim_status_bar_text(" │ ".join(parts), width) except Exception: return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -2234,8 +2395,13 @@ class HermesCLI: (bar_style, percent_label), ("class:status-bar-dim", " │ "), ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), ] + # Position 7: per-prompt elapsed timer (live or frozen) + prompt_elapsed = snapshot.get("prompt_elapsed") + if prompt_elapsed: + frags.append(("class:status-bar-dim", " │ ")) + frags.append(("class:status-bar-dim", prompt_elapsed)) + frags.append(("class:status-bar", " ")) total_width = sum(self._status_bar_display_width(text) for _, text in frags) if total_width > width: @@ -2261,7 +2427,7 @@ class HermesCLI: normalized_model = normalize_model_for_provider(current_model, resolved_provider) if normalized_model and normalized_model != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]" ) self.model = normalized_model @@ -2277,7 +2443,7 @@ class HermesCLI: canonical = normalize_copilot_model_id(current_model, api_key=self.api_key) if canonical and canonical != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Normalized Copilot model '{current_model}' to '{canonical}'.[/]" ) self.model = canonical @@ -2299,7 +2465,7 @@ class HermesCLI: canonical = normalize_opencode_model_id(resolved_provider, current_model) if canonical and canonical != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]" ) self.model = canonical @@ -2321,7 +2487,7 @@ class HermesCLI: if "/" in current_model: slug = current_model.split("/", 1)[1] if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; " f"using '{slug}' for OpenAI Codex.[/]" ) @@ -2369,9 +2535,6 @@ class HermesCLI: def _emit_reasoning_preview(self, reasoning_text: str) -> None: """Render a buffered reasoning preview as a single [thinking] block.""" - import re - import textwrap - preview_text = reasoning_text.strip() if not preview_text: return @@ -2454,6 +2617,59 @@ class HermesCLI: if flush_text: self._emit_reasoning_preview(flush_text) + def _format_submitted_user_message_preview(self, user_input: str) -> str: + """Format the submitted user-message scrollback preview.""" + lines = user_input.split("\n") + if len(lines) <= 1: + return f"[bold {_accent_hex()}]●[/] [bold]{_escape(user_input)}[/]" + + first_lines = int(getattr(self, "user_message_preview_first_lines", 2)) + last_lines = int(getattr(self, "user_message_preview_last_lines", 2)) + first_lines = max(1, first_lines) + last_lines = max(0, last_lines) + head = lines[:first_lines] + remaining_after_head = max(0, len(lines) - len(head)) + tail_count = min(last_lines, remaining_after_head) + tail = lines[-tail_count:] if tail_count else [] + + hidden_middle_count = len(lines) - len(head) - len(tail) + if hidden_middle_count < 0: + hidden_middle_count = 0 + tail = [] + + preview_lines = [ + f"[bold {_accent_hex()}]●[/] [bold]{_escape(head[0])}[/]" + ] + preview_lines.extend(f"[bold]{_escape(line)}[/]" for line in head[1:]) + + if hidden_middle_count > 0: + noun = "line" if hidden_middle_count == 1 else "lines" + preview_lines.append(f"[dim]... (+{hidden_middle_count} more {noun})[/]") + + preview_lines.extend(f"[bold]{_escape(line)}[/]" for line in tail) + return "\n".join(preview_lines) + + def _expand_paste_references(self, text: str | None) -> str: + """Expand [Pasted text #N -> file] placeholders into file contents.""" + if not isinstance(text, str) or "[Pasted text #" not in text: + return text or "" + paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') + + def _expand_ref(match): + path = Path(match.group(1)) + return path.read_text(encoding="utf-8") if path.exists() else match.group(0) + + return paste_ref_re.sub(_expand_ref, text) + + def _print_user_message_preview(self, user_input: str) -> None: + """Render a user message using the normal chat scrollback style.""" + ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]") + text = str(user_input or "") + if "\n" in text: + ChatConsole().print(self._format_submitted_user_message_preview(text)) + else: + ChatConsole().print(f"[bold {_accent_hex()}]●[/] [bold]{_escape(text)}[/]") + def _stream_reasoning_delta(self, text: str) -> None: """Stream reasoning/thinking tokens into a dim box above the response. @@ -2697,6 +2913,8 @@ class HermesCLI: _tc = getattr(self, "_stream_text_ansi", "") while "\n" in self._stream_buf: line, self._stream_buf = self._stream_buf.split("\n", 1) + if self.final_response_markdown == "strip": + line = _strip_markdown_syntax(line) _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}") def _flush_stream(self) -> None: @@ -2714,7 +2932,8 @@ class HermesCLI: if self._stream_buf: _tc = getattr(self, "_stream_text_ansi", "") - _cprint(f"{_STREAM_PAD}{_tc}{self._stream_buf}{_RST}" if _tc else f"{_STREAM_PAD}{self._stream_buf}") + line = _strip_markdown_syntax(self._stream_buf) if self.final_response_markdown == "strip" else self._stream_buf + _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}") self._stream_buf = "" # Close the response box @@ -2757,9 +2976,7 @@ class HermesCLI: def _command_spinner_frame(self) -> str: """Return the current spinner frame for slow slash commands.""" - import time as _time - - frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES) + frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES) return _COMMAND_SPINNER_FRAMES[frame_idx] @contextmanager @@ -2776,6 +2993,39 @@ class HermesCLI: self._command_status = "" self._invalidate(min_interval=0.0) + def _open_external_editor(self, buffer=None) -> bool: + """Open the active input buffer in an external editor.""" + app = getattr(self, "_app", None) + if not app: + _cprint(f"{_DIM}External editor is only available inside the interactive CLI.{_RST}") + return False + if self._command_running: + _cprint(f"{_DIM}Wait for the current command to finish before opening the editor.{_RST}") + return False + if self._sudo_state or self._secret_state or self._approval_state or self._clarify_state: + _cprint(f"{_DIM}Finish the active prompt before opening the editor.{_RST}") + return False + target_buffer = buffer or getattr(app, "current_buffer", None) + if target_buffer is None: + _cprint(f"{_DIM}No active input buffer is available for the external editor.{_RST}") + return False + try: + existing_text = getattr(target_buffer, "text", "") + expanded_text = self._expand_paste_references(existing_text) + if expanded_text != existing_text and hasattr(target_buffer, "text"): + self._skip_paste_collapse = True + target_buffer.text = expanded_text + if hasattr(target_buffer, "cursor_position"): + target_buffer.cursor_position = len(expanded_text) + # Set skip flag (again) so the text-change event fired when the + # editor closes does not re-collapse the returned content. + self._skip_paste_collapse = True + target_buffer.open_in_editor(validate_and_handle=False) + return True + except Exception as exc: + _cprint(f"{_DIM}Failed to open external editor: {exc}{_RST}") + return False + def _ensure_runtime_credentials(self) -> bool: """ Ensure runtime credentials are resolved before agent use. @@ -2883,24 +3133,36 @@ class HermesCLI: return True def _resolve_turn_agent_config(self, user_message: str) -> dict: - """Resolve model/runtime overrides for a single user turn.""" - from agent.smart_model_routing import resolve_turn_route + """Build the effective model/runtime config for a single user turn. + + Always uses the session's primary model/provider. If the user has + toggled `/fast` on and the current model supports Priority + Processing / Anthropic fast mode, attach `request_overrides` so the + API call is marked accordingly. + """ from hermes_cli.models import resolve_fast_mode_overrides - route = resolve_turn_route( - user_message, - self._smart_model_routing, - { - "model": self.model, - "api_key": self.api_key, - "base_url": self.base_url, - "provider": self.provider, - "api_mode": self.api_mode, - "command": self.acp_command, - "args": list(self.acp_args or []), - "credential_pool": getattr(self, "_credential_pool", None), - }, - ) + runtime = { + "api_key": self.api_key, + "base_url": self.base_url, + "provider": self.provider, + "api_mode": self.api_mode, + "command": self.acp_command, + "args": list(self.acp_args or []), + "credential_pool": getattr(self, "_credential_pool", None), + } + route = { + "model": self.model, + "runtime": runtime, + "signature": ( + self.model, + runtime["provider"], + runtime["base_url"], + runtime["api_mode"], + runtime["command"], + tuple(runtime["args"]), + ), + } service_tier = getattr(self, "service_tier", None) if not service_tier: @@ -2908,13 +3170,13 @@ class HermesCLI: return route try: - overrides = resolve_fast_mode_overrides(route.get("model")) + overrides = resolve_fast_mode_overrides(route["model"]) except Exception: overrides = None route["request_overrides"] = overrides return route - def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool: + def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, request_overrides: dict | None = None) -> bool: """ Initialize the agent on first use. When resuming a session, restores conversation history from SQLite. @@ -3070,7 +3332,7 @@ class HermesCLI: use_compact = self.compact or term_width < 80 if use_compact: - self.console.print(_build_compact_banner()) + self._console_print(_build_compact_banner()) self._show_status() else: # Get tools for display @@ -3095,25 +3357,25 @@ class HermesCLI: # Warn about very low context lengths (common with local servers) if ctx_len and ctx_len <= 8192: - self.console.print() - self.console.print( + self._console_print() + self._console_print( f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — " f"this is likely too low for agent use with tools.[/]" ) - self.console.print( + self._console_print( "[dim] Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]" ) base_url = getattr(self, "base_url", "") or "" if "11434" in base_url or "ollama" in base_url.lower(): - self.console.print( + self._console_print( "[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]" ) elif "1234" in base_url: - self.console.print( + self._console_print( "[dim] LM Studio fix: Set context length in model settings → reload model[/]" ) else: - self.console.print( + self._console_print( "[dim] Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]" ) @@ -3122,20 +3384,20 @@ class HermesCLI: model_name = getattr(self, "model", "") or "" if is_nous_hermes_non_agentic(model_name): - self.console.print() - self.console.print( + self._console_print() + self._console_print( "[bold yellow]⚠ Nous Research Hermes 3 & 4 models are NOT agentic and are not " "designed for use with Hermes Agent.[/]" ) - self.console.print( + self._console_print( "[dim] They lack tool-calling capabilities required for agent workflows. " "Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]" ) - self.console.print( + self._console_print( "[dim] Switch with: /model sonnet or /model gpt5[/]" ) - self.console.print() + self._console_print() def _preload_resumed_session(self) -> bool: """Load a resumed session's history from the DB early (before first chat). @@ -3153,10 +3415,10 @@ class HermesCLI: session_meta = self._session_db.get_session(self.session_id) if not session_meta: - self.console.print( + self._console_print( f"[bold red]Session not found: {self.session_id}[/]" ) - self.console.print( + self._console_print( "[dim]Use a session ID from a previous CLI run " "(hermes sessions list).[/]" ) @@ -3171,7 +3433,7 @@ class HermesCLI: if session_meta.get("title"): title_part = f' "{session_meta["title"]}"' accent_color = _accent_hex() - self.console.print( + self._console_print( f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]" f"{title_part} " f"({msg_count} user message{'s' if msg_count != 1 else ''}, " @@ -3179,7 +3441,7 @@ class HermesCLI: ) else: accent_color = _accent_hex() - self.console.print( + self._console_print( f"[{accent_color}]Session {self.session_id} found but has no " f"messages. Starting fresh.[/]" ) @@ -3354,7 +3616,7 @@ class HermesCLI: padding=(0, 1), style=_history_text_c, ) - self.console.print(panel) + self._console_print(panel) def _try_attach_clipboard_image(self) -> bool: """Check clipboard for an image and attach it if found. @@ -3725,7 +3987,6 @@ class HermesCLI: image later with ``vision_analyze`` if needed. """ import asyncio as _asyncio - import json as _json from tools.vision_tools import vision_analyze_tool analysis_prompt = ( @@ -3745,7 +4006,7 @@ class HermesCLI: result_json = _asyncio.run( vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt) ) - result = _json.loads(result_json) + result = json.loads(result_json) if result.get("success"): description = result.get("analysis", "") enriched_parts.append( @@ -3790,14 +4051,14 @@ class HermesCLI: api_key_missing = [u for u in unavailable if u["missing_vars"]] if api_key_missing: - self.console.print() - self.console.print("[yellow]⚠️ Some tools disabled (missing API keys):[/]") + self._console_print() + self._console_print("[yellow]⚠️ Some tools disabled (missing API keys):[/]") for item in api_key_missing: tools_str = ", ".join(item["tools"][:2]) # Show first 2 tools if len(item["tools"]) > 2: tools_str += f", +{len(item['tools'])-2} more" - self.console.print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]") - self.console.print("[dim] Run 'hermes setup' to configure[/]") + self._console_print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]") + self._console_print("[dim] Run 'hermes setup' to configure[/]") except Exception: pass # Don't crash on import errors @@ -3835,7 +4096,7 @@ class HermesCLI: if self._provider_source: provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]" - self.console.print( + self._console_print( f" {api_indicator} [{accent_color}]{model_short}[/] " f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]" f"{toolsets_info}{provider_info}" @@ -3892,7 +4153,7 @@ class HermesCLI: f"Tokens: {total_tokens:,}", f"Agent Running: {'Yes' if is_running else 'No'}", ]) - self.console.print("\n".join(lines), highlight=False, markup=False) + self._console_print("\n".join(lines), highlight=False, markup=False) def _fast_command_available(self) -> bool: try: @@ -3941,6 +4202,7 @@ class HermesCLI: _cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}") _cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}") + _cprint(f" {_DIM}Draft editor: Ctrl+G{_RST}") if _is_termux_environment(): _cprint(f" {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n") else: @@ -3999,8 +4261,37 @@ class HermesCLI: """ import shlex from argparse import Namespace + from contextlib import redirect_stdout + from io import StringIO from hermes_cli.tools_config import tools_disable_enable_command + def _run_capture(ns: Namespace) -> None: + """Run tools_disable_enable_command, routing its ANSI-colored + print() output through _cprint when inside the interactive TUI + so escapes aren't mangled by patch_stdout's StdoutProxy into + garbled '?[32m...?[0m' text. + + Outside the TUI (standalone mode, tests), call straight through + so real stdout / pytest capture works as expected. + """ + # Standalone/tests, run as usual + if getattr(self, "_app", None) is None: + tools_disable_enable_command(ns) + return + + # Buffer reports isatty()=True so color() in hermes_cli/colors.py + # still emits ANSI escapes. StringIO.isatty() is False, which + # would otherwise strip all colors before we re-render them. + class _TTYBuf(StringIO): + def isatty(self) -> bool: + return True + + buf = _TTYBuf() + with redirect_stdout(buf): + tools_disable_enable_command(ns) + for line in buf.getvalue().splitlines(): + _cprint(line) + try: parts = shlex.split(cmd) except ValueError: @@ -4012,8 +4303,7 @@ class HermesCLI: return if subcommand == "list": - tools_disable_enable_command( - Namespace(tools_action="list", platform="cli")) + _run_capture(Namespace(tools_action="list", platform="cli")) return names = parts[2:] @@ -4030,8 +4320,7 @@ class HermesCLI: label = ", ".join(names) _cprint(f"{_ACCENT}{verb} {label}...{_RST}") - tools_disable_enable_command( - Namespace(tools_action=subcommand, names=names, platform="cli")) + _run_capture(Namespace(tools_action=subcommand, names=names, platform="cli")) # Reset session so the new tool config is picked up from a clean state from hermes_cli.tools_config import _get_platform_tools @@ -4758,7 +5047,7 @@ class HermesCLI: pass cache_enabled = ( - ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower()) + (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower()) or result.api_mode == "anthropic_messages" ) if cache_enabled: @@ -4986,7 +5275,7 @@ class HermesCLI: # Cache notice cache_enabled = ( - ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower()) + (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower()) or result.api_mode == "anthropic_messages" ) if cache_enabled: @@ -5017,6 +5306,30 @@ class HermesCLI: except Exception: return False + def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool: + """Return True when /steer should be dispatched immediately while the agent is running. + + /steer MUST bypass the normal _pending_input → process_loop path when + the agent is active, because process_loop is blocked inside + self.chat() for the duration of the run. By the time the queued + command is pulled from _pending_input, _agent_running has already + flipped back to False, and process_command() takes the idle + fallback — delivering the steer as a next-turn message instead of + injecting it mid-run. Dispatching inline on the UI thread calls + agent.steer() directly, which is thread-safe (uses _pending_steer_lock). + """ + if not text or has_images or not _looks_like_slash_command(text): + return False + if not getattr(self, "_agent_running", False): + return False + try: + from hermes_cli.commands import resolve_command + base = text.split(None, 1)[0].lower().lstrip('/') + cmd = resolve_command(base) + return bool(cmd and cmd.name == "steer") + except Exception: + return False + def _show_model_and_providers(self): """Show current model + provider and list all authenticated providers. @@ -5090,8 +5403,15 @@ class HermesCLI: print(" To change model or provider, use: hermes model") + def _output_console(self): + """Use prompt_toolkit-safe Rich rendering once the TUI is live.""" + if getattr(self, "_app", None): + return ChatConsole() + return self.console - + def _console_print(self, *args, **kwargs): + """Print through the active command-safe console.""" + self._output_console().print(*args, **kwargs) @staticmethod def _resolve_personality_prompt(value) -> str: @@ -5111,14 +5431,14 @@ class HermesCLI: from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials from agent.google_code_assist import retrieve_user_quota, CodeAssistError except ImportError as exc: - self.console.print(f" [red]Gemini modules unavailable: {exc}[/]") + self._console_print(f" [red]Gemini modules unavailable: {exc}[/]") return try: access_token = get_valid_access_token() except GoogleOAuthError as exc: - self.console.print(f" [yellow]{exc}[/]") - self.console.print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.") + self._console_print(f" [yellow]{exc}[/]") + self._console_print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.") return creds = load_credentials() @@ -5127,18 +5447,18 @@ class HermesCLI: try: buckets = retrieve_user_quota(access_token, project_id=project_id) except CodeAssistError as exc: - self.console.print(f" [red]Quota lookup failed:[/] {exc}") + self._console_print(f" [red]Quota lookup failed:[/] {exc}") return if not buckets: - self.console.print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]") + self._console_print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]") return # Sort for stable display, group by model buckets.sort(key=lambda b: (b.model_id, b.token_type)) - self.console.print() - self.console.print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})") - self.console.print() + self._console_print() + self._console_print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})") + self._console_print() for b in buckets: pct = max(0.0, min(1.0, b.remaining_fraction)) width = 20 @@ -5148,8 +5468,8 @@ class HermesCLI: header = b.model_id if b.token_type: header += f" [{b.token_type}]" - self.console.print(f" {header:40s} {bar} {pct_str}") - self.console.print() + self._console_print(f" {header:40s} {bar} {pct_str}") + self._console_print() def _handle_personality_command(self, cmd: str): """Handle the /personality command to set predefined personalities.""" @@ -5280,7 +5600,7 @@ class HermesCLI: print(" /cron list") print(' /cron add "every 2h" "Check server status" [--skill blogwatcher]') print(' /cron edit --schedule "every 4h" --prompt "New task"') - print(" /cron edit --skill blogwatcher --skill find-nearby") + print(" /cron edit --skill blogwatcher --skill maps") print(" /cron edit --remove-skill blogwatcher") print(" /cron edit --clear-skills") print(" /cron pause ") @@ -5597,7 +5917,7 @@ class HermesCLI: _tip_color = get_active_skin().get_color("banner_dim", "#B8860B") except Exception: _tip_color = "#B8860B" - self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") + self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") except Exception: pass elif canonical == "history": @@ -5691,7 +6011,7 @@ class HermesCLI: elif canonical == "statusbar": self._status_bar_visible = not self._status_bar_visible state = "visible" if self._status_bar_visible else "hidden" - self.console.print(f" Status bar {state}") + self._console_print(f" Status bar {state}") elif canonical == "verbose": self._toggle_verbose() elif canonical == "yolo": @@ -5814,15 +6134,15 @@ class HermesCLI: ) output = result.stdout.strip() or result.stderr.strip() if output: - self.console.print(_rich_text_from_ansi(output)) + self._console_print(_rich_text_from_ansi(output)) else: - self.console.print("[dim]Command returned no output[/]") + self._console_print("[dim]Command returned no output[/]") except subprocess.TimeoutExpired: - self.console.print("[bold red]Quick command timed out (30s)[/]") + self._console_print("[bold red]Quick command timed out (30s)[/]") except Exception as e: - self.console.print(f"[bold red]Quick command error: {e}[/]") + self._console_print(f"[bold red]Quick command error: {e}[/]") else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]") elif qcmd.get("type") == "alias": target = qcmd.get("target", "").strip() if target: @@ -5831,9 +6151,9 @@ class HermesCLI: aliased_command = f"{target} {user_args}".strip() return self.process_command(aliased_command) else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]") else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]") # Check for plugin-registered slash commands elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names(): from hermes_cli.plugins import get_plugin_command_handler @@ -6012,8 +6332,7 @@ class HermesCLI: # with the output (fixes #2718). if self._app: self._app.invalidate() - import time as _tmod - _tmod.sleep(0.05) # brief pause for refresh + time.sleep(0.05) # brief pause for refresh print() ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]") _cprint(f" ✅ Background task #{task_num} complete") @@ -6033,7 +6352,7 @@ class HermesCLI: _chat_console = ChatConsole() _chat_console.print(Panel( - _rich_text_from_ansi(response), + _render_final_assistant_content(response, mode=self.final_response_markdown), title=f"[{_resp_color} bold]{label} (background #{task_num})[/]", title_align="left", border_style=_resp_color, @@ -6053,8 +6372,7 @@ class HermesCLI: # Same TUI refresh pattern as success path (#2718) if self._app: self._app.invalidate() - import time as _tmod - _tmod.sleep(0.05) + time.sleep(0.05) print() _cprint(f" ❌ Background task #{task_num} failed: {e}") finally: @@ -6158,7 +6476,7 @@ class HermesCLI: _resp_color = "#4F6D4A" ChatConsole().print(Panel( - _rich_text_from_ansi(response), + _render_final_assistant_content(response, mode=self.final_response_markdown), title=f"[{_resp_color} bold]⚕ /btw[/]", title_align="left", border_style=_resp_color, @@ -6274,7 +6592,6 @@ class HermesCLI: _launched = self._try_launch_chrome_debug(_port, _plat.system()) if _launched: # Wait for the port to come up - import time as _time for _wait in range(10): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -6284,7 +6601,7 @@ class HermesCLI: _already_open = True break except (OSError, socket.timeout): - _time.sleep(0.5) + time.sleep(0.5) if _already_open: print(f" ✓ Chrome launched and listening on port {_port}") else: @@ -6650,6 +6967,18 @@ class HermesCLI: focus_topic=focus_topic or None, ) self.conversation_history = compressed + # _compress_context ends the old session and creates a new child + # session on the agent (run_agent.py::_compress_context). Sync the + # CLI's session_id so /status, /resume, exit summary, and title + # generation all point at the live continuation session, not the + # ended parent. Without this, subsequent end_session() calls target + # the already-closed parent and the child is orphaned. + if ( + getattr(self.agent, "session_id", None) + and self.agent.session_id != self.session_id + ): + self.session_id = self.agent.session_id + self._pending_title = None new_tokens = estimate_messages_tokens_rough(self.conversation_history) summary = summarize_manual_compression( original_history, @@ -6752,6 +7081,27 @@ class HermesCLI: if cost_result.status == "unknown": print(f" Note: Pricing unknown for {agent.model}") + # Account limits -- fetched off-thread with a hard timeout so slow + # provider APIs don't hang the prompt. + provider = getattr(agent, "provider", None) or getattr(self, "provider", None) + base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None) + api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None) + account_snapshot = None + if provider: + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool: + try: + account_snapshot = _pool.submit( + fetch_account_usage, provider, + base_url=base_url, api_key=api_key, + ).result(timeout=10.0) + except (concurrent.futures.TimeoutError, Exception): + account_snapshot = None + account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)] + if account_lines: + print() + for line in account_lines: + print(line) + if self.verbose: logging.getLogger().setLevel(logging.DEBUG) for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'): @@ -6802,7 +7152,6 @@ class HermesCLI: known state. When a change is detected, triggers _reload_mcp() and informs the user so they know the tool list has been refreshed. """ - import time import yaml as _yaml CONFIG_WATCH_INTERVAL = 5.0 # seconds between config.yaml stat() calls @@ -6894,7 +7243,6 @@ class HermesCLI: # Refresh the agent's tool list so the model can call new tools if self.agent is not None: - from model_tools import get_tool_definitions self.agent.tools = get_tool_definitions( enabled_toolsets=self.agent.enabled_toolsets if hasattr(self.agent, "enabled_toolsets") else None, @@ -6977,7 +7325,6 @@ class HermesCLI: full history of tool calls (not just the current one in the spinner). """ if event_type == "tool.completed": - import time as _time self._tool_start_time = 0.0 # Print stacked scrollback line for "all" / "new" modes if function_name and self.tool_progress_mode in ("all", "new"): @@ -7006,7 +7353,6 @@ class HermesCLI: if event_type != "tool.started": return if function_name and not function_name.startswith("_"): - import time as _time from agent.display import get_tool_emoji emoji = get_tool_emoji(function_name) label = preview or function_name @@ -7015,7 +7361,7 @@ class HermesCLI: if _pl > 0 and len(label) > _pl: label = label[:_pl - 3] + "..." self._spinner_text = f"{emoji} {label}" - self._tool_start_time = _time.monotonic() + self._tool_start_time = time.monotonic() # Store args for stacked scrollback line on completion self._pending_tool_info.setdefault(function_name, []).append( function_args if function_args is not None else {} @@ -7132,11 +7478,12 @@ class HermesCLI: self._voice_stop_and_transcribe() # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict) - try: - from tools.voice_mode import play_beep - play_beep(frequency=880, count=1) - except Exception: - pass + if self._voice_beeps_enabled(): + try: + from tools.voice_mode import play_beep + play_beep(frequency=880, count=1) + except Exception: + pass try: self._voice_recorder.start(on_silence_stop=_on_silence) @@ -7184,11 +7531,12 @@ class HermesCLI: wav_path = self._voice_recorder.stop() # Audio cue: double beep after stream stopped (no CoreAudio conflict) - try: - from tools.voice_mode import play_beep - play_beep(frequency=660, count=2) - except Exception: - pass + if self._voice_beeps_enabled(): + try: + from tools.voice_mode import play_beep + play_beep(frequency=660, count=2) + except Exception: + pass if wav_path is None: _cprint(f"{_DIM}No speech detected.{_RST}") @@ -7271,7 +7619,6 @@ class HermesCLI: try: from tools.tts_tool import text_to_speech_tool from tools.voice_mode import play_audio_file - import re # Strip markdown and non-speech content for cleaner TTS tts_text = text[:4000] if len(text) > 4000 else text @@ -7339,6 +7686,17 @@ class HermesCLI: _cprint(f"Unknown voice subcommand: {subcommand}") _cprint("Usage: /voice [on|off|tts|status]") + def _voice_beeps_enabled(self) -> bool: + """Return whether CLI voice mode should play record start/stop beeps.""" + try: + from hermes_cli.config import load_config + voice_cfg = load_config().get("voice", {}) + if isinstance(voice_cfg, dict): + return bool(voice_cfg.get("beep_enabled", True)) + except Exception: + pass + return True + def _enable_voice_mode(self): """Enable voice mode after checking requirements.""" if self._voice_mode: @@ -7648,7 +8006,9 @@ class HermesCLI: return selected = state.get("selected", 0) - choices = state.get("choices") or [] + choices = state.get("choices") + if not isinstance(choices, list): + choices = [] if not (0 <= selected < len(choices)): return @@ -7740,8 +8100,18 @@ class HermesCLI: choice_wrapped: list[tuple[int, str]] = [] for i, choice in enumerate(choices): label = choice_labels.get(choice, choice) - prefix = '❯ ' if i == selected else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' # No number for items beyond 10th + if i == selected: + prefix = f'❯ {num_prefix}. ' + else: + prefix = f' {num_prefix}. ' + for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): choice_wrapped.append((i, wrapped)) # Budget vertical space so HSplit never clips the command or choices. @@ -7904,7 +8274,6 @@ class HermesCLI: if not self._init_agent( model_override=turn_route["model"], runtime_override=turn_route["runtime"], - route_label=turn_route["label"], request_overrides=turn_route.get("request_overrides"), ): return None @@ -8033,6 +8402,17 @@ class HermesCLI: def run_agent(): nonlocal result + # Set callbacks inside the agent thread so thread-local storage + # in terminal_tool is populated for this thread. The main thread + # registration (run() line ~9046) is invisible here because + # _callback_tls is threading.local(). Matches the pattern used + # by acp_adapter/server.py for ACP sessions. + set_sudo_password_callback(self._sudo_password_callback) + set_approval_callback(self._approval_callback) + try: + set_secret_capture_callback(self._secret_capture_callback) + except Exception: + pass agent_message = _voice_prefix + message if _voice_prefix else message # Prepend pending model switch note so the model knows about the switch _msn = getattr(self, '_pending_model_switch_note', None) @@ -8058,10 +8438,23 @@ class HermesCLI: "failed": True, "error": _summary, } + finally: + # Clear thread-local callbacks so a reused thread doesn't + # hold stale references to a disposed CLI instance. + try: + set_sudo_password_callback(None) + set_approval_callback(None) + set_secret_capture_callback(None) + except Exception: + pass # Start agent in background thread (daemon so it cannot keep the # process alive when the user closes the terminal tab — SIGHUP # exits the main thread and daemon threads are reaped automatically). + # Start per-prompt elapsed timer — frozen after the agent thread + # finishes; reset on the next turn. + self._prompt_start_time = time.time() + self._prompt_duration = 0.0 agent_thread = threading.Thread(target=run_agent, daemon=True) agent_thread.start() @@ -8091,8 +8484,7 @@ class HermesCLI: try: _dbg = _hermes_home / "interrupt_debug.log" with open(_dbg, "a") as _f: - import time as _t - _f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, " + _f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, " f"children={len(self.agent._active_children)}, " f"parent._interrupt={self.agent._interrupt_requested}\n") for _ci, _ch in enumerate(self.agent._active_children): @@ -8139,6 +8531,12 @@ class HermesCLI: # but guard against edge cases. agent_thread.join(timeout=30) + # Freeze per-prompt elapsed timer once the agent thread has + # exited (or been abandoned as a daemon after interrupt). + if self._prompt_start_time is not None: + self._prompt_duration = max(0.0, time.time() - self._prompt_start_time) + self._prompt_start_time = None + # Proactively clean up async clients whose event loop is dead. # The agent thread may have created AsyncOpenAI clients bound # to a per-thread event loop; if that loop is now closed, those @@ -8162,13 +8560,26 @@ class HermesCLI: # buffer so tool/status lines render ABOVE our response box. # The flush pushes data into the renderer queue; the short # sleep lets the renderer actually paint it before we draw. - import time as _time sys.stdout.flush() - _time.sleep(0.15) + time.sleep(0.15) # Update history with full conversation self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history + # If auto-compression fired mid-turn, the agent created a new + # continuation session and mutated self.agent.session_id. Sync + # the CLI's session_id so /status, /resume, title generation, + # and the exit summary all target the live child session rather + # than the ended parent. Mirrors the gateway's post-run sync + # (gateway/run.py around line 9983). + if ( + self.agent + and getattr(self.agent, "session_id", None) + and self.agent.session_id != self.session_id + ): + self.session_id = self.agent.session_id + self._pending_title = None + # Get the final response response = result.get("final_response", "") if result else "" @@ -8258,7 +8669,7 @@ class HermesCLI: else: _chat_console = ChatConsole() _chat_console.print(Panel( - _rich_text_from_ansi(response), + _render_final_assistant_content(response, mode=self.final_response_markdown), title=f"[{_resp_color} bold]{label}[/]", title_align="left", border_style=_resp_color, @@ -8603,7 +9014,7 @@ class HermesCLI: except Exception: _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands." _welcome_color = "#FFF8DC" - self.console.print(f"[{_welcome_color}]{_welcome_text}[/]") + self._console_print(f"[{_welcome_color}]{_welcome_text}[/]") # Show a random tip to help users discover features try: from hermes_cli.tips import get_random_tip @@ -8612,16 +9023,16 @@ class HermesCLI: _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B") except Exception: _tip_color = "#B8860B" - self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") + self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") except Exception: pass # Tips are non-critical — never break startup if self.preloaded_skills and not self._startup_skills_line_shown: skills_label = ", ".join(self.preloaded_skills) - self.console.print( + self._console_print( f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}" ) self._startup_skills_line_shown = True - self.console.print() + self._console_print() # State for async operation self._agent_running = False @@ -8787,6 +9198,17 @@ class HermesCLI: event.app.current_buffer.reset(append_to_history=True) return + # Handle /steer while the agent is running immediately on the + # UI thread. Queuing through _pending_input would deadlock the + # steer until after the agent loop finishes (process_loop is + # blocked inside self.chat()), which turns /steer into a + # post-run next-turn message — defeating mid-run injection. + # agent.steer() is thread-safe (holds _pending_steer_lock). + if self._should_handle_steer_command_inline(text, has_images=has_images): + self.process_command(text) + event.app.current_buffer.reset(append_to_history=True) + return + # Snapshot and clear attached images images = list(self._attached_images) self._attached_images.clear() @@ -8805,8 +9227,7 @@ class HermesCLI: try: _dbg = _hermes_home / "interrupt_debug.log" with open(_dbg, "a") as _f: - import time as _t - _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, " + _f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, " f"agent_running={self._agent_running}\n") except Exception: pass @@ -8824,6 +9245,16 @@ class HermesCLI: """Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter.""" event.current_buffer.insert_text('\n') + @kb.add( + 'c-g', + filter=Condition( + lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state + ), + ) + def handle_open_in_editor(event): + """Ctrl+G opens the current draft in an external editor.""" + cli_ref._open_external_editor(event.current_buffer) + @kb.add('tab', eager=True) def handle_tab(event): """Tab: accept completion, auto-suggestion, or start completions. @@ -8875,6 +9306,29 @@ class HermesCLI: self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1) event.app.invalidate() + # Number keys for quick clarify selection (1-9, 0 for 10th item) + def _make_clarify_number_handler(idx): + def handler(event): + if self._clarify_state and not self._clarify_freetext: + choices = self._clarify_state.get("choices") or [] + # Map index to choice (treating "Other" as the last option) + if idx < len(choices): + # Select a numbered choice + self._clarify_state["response_queue"].put(choices[idx]) + self._clarify_state = None + self._clarify_freetext = False + event.app.invalidate() + elif idx == len(choices): + # Select "Other" option + self._clarify_freetext = True + event.app.invalidate() + return handler + + for _num in range(10): + # 1-9 select items 0-8, 0 selects item 9 (10thitem) + _idx = 9 if _num == 0 else _num - 1 + kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx)) + # --- Dangerous command approval: arrow-key navigation --- @kb.add('up', filter=Condition(lambda: bool(self._approval_state))) @@ -8916,6 +9370,20 @@ class HermesCLI: event.app.current_buffer.reset() event.app.invalidate() + # Number keys for quick approval selection (1-9, 0 for 10th item) + def _make_approval_number_handler(idx): + def handler(event): + if self._approval_state and idx < len(self._approval_state["choices"]): + self._approval_state["selected"] = idx + self._handle_approval_selection() + event.app.invalidate() + return handler + + for _num in range(10): + # 1-9 select items 0-8, 0 selects item 9 (10th item) + _idx = 9 if _num == 0 else _num - 1 + kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx)) + # --- History navigation: up/down browse history in normal input mode --- # The TextArea is multiline, so by default up/down only move the cursor. # Buffer.auto_up/auto_down handle both: cursor movement when multi-line, @@ -8944,8 +9412,7 @@ class HermesCLI: 2. Interrupt the running agent (first press) 3. Force exit (second press within 2s, or when idle) """ - import time as _time - now = _time.time() + now = time.time() # Cancel active voice recording. # Run cancel() in a background thread to prevent blocking the @@ -9053,12 +9520,11 @@ class HermesCLI: @kb.add('c-z') def handle_ctrl_z(event): """Handle Ctrl+Z - suspend process to background (Unix only).""" - import sys if sys.platform == 'win32': _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}") event.app.invalidate() return - import os, signal as _sig + import signal as _sig from prompt_toolkit.application import run_in_terminal from hermes_cli.skin_engine import get_active_skin agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent") @@ -9275,6 +9741,7 @@ class HermesCLI: _prev_text_len = [0] _prev_newline_count = [0] _paste_just_collapsed = [False] + self._skip_paste_collapse = False def _on_text_changed(buf): """Detect large pastes and collapse them to a file reference. @@ -9294,8 +9761,9 @@ class HermesCLI: text = buf.text chars_added = len(text) - _prev_text_len[0] _prev_text_len[0] = len(text) - if _paste_just_collapsed[0]: + if _paste_just_collapsed[0] or self._skip_paste_collapse: _paste_just_collapsed[0] = False + self._skip_paste_collapse = False _prev_newline_count[0] = text.count('\n') return line_count = text.count('\n') @@ -9304,12 +9772,10 @@ class HermesCLI: is_paste = chars_added > 1 or newlines_added >= 4 if line_count >= 5 and is_paste and not text.startswith('/'): _paste_counter[0] += 1 - # Save to temp file paste_dir = _hermes_home / "pastes" paste_dir.mkdir(parents=True, exist_ok=True) paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt" paste_file.write_text(text, encoding="utf-8") - # Replace buffer with compact reference _paste_just_collapsed[0] = True buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]" buf.cursor_position = len(buf.text) @@ -9372,31 +9838,29 @@ class HermesCLI: # extra instructions (sudo countdown, approval navigation, clarify). # The agent-running interrupt hint is now an inline placeholder above. def get_hint_text(): - import time as _time - if cli_ref._sudo_state: - remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic())) return [ ('class:hint', ' password hidden · Enter to skip'), ('class:clarify-countdown', f' ({remaining}s)'), ] if cli_ref._secret_state: - remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._secret_deadline - time.monotonic())) return [ ('class:hint', ' secret hidden · Enter to skip'), ('class:clarify-countdown', f' ({remaining}s)'), ] if cli_ref._approval_state: - remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._approval_deadline - time.monotonic())) return [ ('class:hint', ' ↑/↓ to select, Enter to confirm'), ('class:clarify-countdown', f' ({remaining}s)'), ] if cli_ref._clarify_state: - remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic())) countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else '' if cli_ref._clarify_freetext: return [ @@ -9488,14 +9952,32 @@ class HermesCLI: selected = state.get("selected", 0) preview_lines = _wrap_panel_text(question, 60) for i, choice in enumerate(choices): - prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else " " - preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" ")) + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' + if i == selected and not cli_ref._clarify_freetext: + prefix = f"❯ {num_prefix}. " + else: + prefix = f" {num_prefix}. " + preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" ")) + # "Other" option in preview + other_num = len(choices) + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' + else: + other_num_prefix = ' ' other_label = ( - "❯ Other (type below)" if cli_ref._clarify_freetext - else "❯ Other (type your answer)" if selected == len(choices) - else " Other (type your answer)" + f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext + else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices) + else f" {other_num_prefix}. Other (type your answer)" ) - preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" ")) + preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" ")) box_width = _panel_box_width("Hermes needs your input", preview_lines) inner_text_width = max(8, box_width - 2) @@ -9503,18 +9985,35 @@ class HermesCLI: choice_wrapped: list[tuple[int, str]] = [] if choices: for i, choice in enumerate(choices): - prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' + if i == selected and not cli_ref._clarify_freetext: + prefix = f'❯ {num_prefix}. ' + else: + prefix = f' {num_prefix}. ' + for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): choice_wrapped.append((i, wrapped)) # Trailing Other row(s) other_idx = len(choices) - if selected == other_idx and not cli_ref._clarify_freetext: - other_label_mand = '❯ Other (type your answer)' - elif cli_ref._clarify_freetext: - other_label_mand = '❯ Other (type below)' + other_num = other_idx + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' else: - other_label_mand = ' Other (type your answer)' - other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") + other_num_prefix = ' ' + if selected == other_idx and not cli_ref._clarify_freetext: + other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)' + elif cli_ref._clarify_freetext: + other_label_mand = f'❯ {other_num_prefix}. Other (type below)' + else: + other_label_mand = f' {other_num_prefix}. Other (type your answer)' + other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") elif cli_ref._clarify_freetext: # Freetext-only mode: the guidance line takes the place of choices. other_wrapped = _wrap_panel_text( @@ -9579,6 +10078,15 @@ class HermesCLI: # "Other" option (trailing row(s), only shown when choices exist) other_idx = len(choices) + # Calculate number prefix for "Other" option + other_num = other_idx + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' + else: + other_num_prefix = ' ' + if selected == other_idx and not cli_ref._clarify_freetext: other_style = 'class:clarify-selected' elif cli_ref._clarify_freetext: @@ -9686,7 +10194,8 @@ class HermesCLI: if stage == "provider": title = "⚙ Model Picker — Select Provider" choices = [] - for p in state.get("providers") or []: + _providers = state.get("providers") + for p in _providers if isinstance(_providers, list) else []: count = p.get("total_models", len(p.get("models", []))) label = f"{p['name']} ({count} model{'s' if count != 1 else ''})" if p.get("is_current"): @@ -9943,22 +10452,20 @@ class HermesCLI: app._on_resize = _resize_clear_ghosts def spinner_loop(): - import time as _time - last_idle_refresh = 0.0 while not self._should_exit: if not self._app: - _time.sleep(0.1) + time.sleep(0.1) continue if self._command_running: self._invalidate(min_interval=0.1) - _time.sleep(0.1) + time.sleep(0.1) else: - now = _time.monotonic() + now = time.monotonic() if now - last_idle_refresh >= 1.0: last_idle_refresh = now self._invalidate(min_interval=1.0) - _time.sleep(0.2) + time.sleep(0.2) spinner_thread = threading.Thread(target=spinner_loop, daemon=True) spinner_thread.start() @@ -10027,49 +10534,12 @@ class HermesCLI: continue # Expand paste references back to full content - import re as _re - _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') + _paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else [] if paste_refs: - def _expand_ref(m): - p = Path(m.group(1)) - return p.read_text(encoding="utf-8") if p.exists() else m.group(0) - expanded = _paste_ref_re.sub(_expand_ref, user_input) - total_lines = expanded.count('\n') + 1 - n_pastes = len(paste_refs) - _user_bar = f"[{_accent_hex()}]{'─' * 40}[/]" - print() - ChatConsole().print(_user_bar) - # Show any surrounding user text alongside the paste summary - split_parts = _paste_ref_re.split(user_input) - visible_user_text = " ".join( - split_parts[i].strip() for i in range(0, len(split_parts), 2) if split_parts[i].strip() - ) - if visible_user_text: - ChatConsole().print( - f"[bold {_accent_hex()}]\u25cf[/] [bold]{_escape(visible_user_text)}[/] " - f"[dim]({n_pastes} pasted block{'s' if n_pastes > 1 else ''}, {total_lines} lines total)[/]" - ) - else: - ChatConsole().print( - f"[bold {_accent_hex()}]\u25cf[/] [bold]{_escape(f'[Pasted text: {total_lines} lines]')}[/]" - ) - user_input = expanded - else: - _user_bar = f"[{_accent_hex()}]{'─' * 40}[/]" - if '\n' in user_input: - first_line = user_input.split('\n')[0] - line_count = user_input.count('\n') + 1 - print() - ChatConsole().print(_user_bar) - ChatConsole().print( - f"[bold {_accent_hex()}]●[/] [bold]{_escape(first_line)}[/] " - f"[dim](+{line_count - 1} lines)[/]" - ) - else: - print() - ChatConsole().print(_user_bar) - ChatConsole().print(f"[bold {_accent_hex()}]●[/] [bold]{_escape(user_input)}[/]") + user_input = self._expand_paste_references(user_input) + print() + self._print_user_message_preview(user_input) # Show image attachment count if submit_images: @@ -10156,13 +10626,12 @@ class HermesCLI: try: if getattr(self, "agent", None) and getattr(self, "_agent_running", False): self.agent.interrupt(f"received signal {signum}") - import time as _t try: _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5")) except (TypeError, ValueError): _grace = 1.5 if _grace > 0: - _t.sleep(_grace) + time.sleep(_grace) except Exception: pass # never block signal handling raise KeyboardInterrupt() @@ -10195,8 +10664,7 @@ class HermesCLI: # uv-managed Python, fd 0 can be invalid or unregisterable with the # asyncio selector, causing "KeyError: '0 is not registered'" (#6393). try: - import os as _os - _os.fstat(0) + os.fstat(0) except OSError: print( "Error: stdin (fd 0) is not available.\n" @@ -10489,13 +10957,12 @@ def main( _agent = getattr(cli, "agent", None) if _agent is not None: _agent.interrupt(f"received signal {signum}") - import time as _t try: _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5")) except (TypeError, ValueError): _grace = 1.5 if _grace > 0: - _t.sleep(_grace) + time.sleep(_grace) except Exception: pass # never block signal handling raise KeyboardInterrupt() @@ -10528,7 +10995,6 @@ def main( if cli._init_agent( model_override=turn_route["model"], runtime_override=turn_route["runtime"], - route_label=turn_route["label"], request_overrides=turn_route.get("request_overrides"), ): cli.agent.quiet_mode = True @@ -10542,6 +11008,15 @@ def main( user_message=effective_query, conversation_history=cli.conversation_history, ) + # Sync session_id if mid-run compression created a + # continuation session. The exit line below reports + # session_id to stderr for automation wrappers; without + # this sync it would point at the ended parent. + if ( + getattr(cli.agent, "session_id", None) + and cli.agent.session_id != cli.session_id + ): + cli.session_id = cli.agent.session_id response = result.get("final_response", "") if isinstance(result, dict) else str(result) if response: print(response) diff --git a/cron/jobs.py b/cron/jobs.py index 06d782888f..8fb3f868a9 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -9,6 +9,7 @@ import copy import json import logging import tempfile +import threading import os import re import uuid @@ -34,6 +35,11 @@ except ImportError: HERMES_DIR = get_hermes_home().resolve() CRON_DIR = HERMES_DIR / "cron" JOBS_FILE = CRON_DIR / "jobs.json" + +# In-process lock protecting load_jobs→modify→save_jobs cycles. +# Required when tick() runs jobs in parallel threads — without this, +# concurrent mark_job_run / advance_next_run calls can clobber each other. +_jobs_file_lock = threading.Lock() OUTPUT_DIR = CRON_DIR / "output" ONESHOT_GRACE_SECONDS = 120 @@ -594,43 +600,44 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None, ``delivery_error`` is tracked separately from the agent error — a job can succeed (agent produced output) but fail delivery (platform down). """ - jobs = load_jobs() - for i, job in enumerate(jobs): - if job["id"] == job_id: - now = _hermes_now().isoformat() - job["last_run_at"] = now - job["last_status"] = "ok" if success else "error" - job["last_error"] = error if not success else None - # Track delivery failures separately — cleared on successful delivery - job["last_delivery_error"] = delivery_error - - # Increment completed count - if job.get("repeat"): - job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1 + with _jobs_file_lock: + jobs = load_jobs() + for i, job in enumerate(jobs): + if job["id"] == job_id: + now = _hermes_now().isoformat() + job["last_run_at"] = now + job["last_status"] = "ok" if success else "error" + job["last_error"] = error if not success else None + # Track delivery failures separately — cleared on successful delivery + job["last_delivery_error"] = delivery_error - # Check if we've hit the repeat limit - times = job["repeat"].get("times") - completed = job["repeat"]["completed"] - if times is not None and times > 0 and completed >= times: - # Remove the job (limit reached) - jobs.pop(i) - save_jobs(jobs) - return - - # Compute next run - job["next_run_at"] = compute_next_run(job["schedule"], now) + # Increment completed count + if job.get("repeat"): + job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1 + + # Check if we've hit the repeat limit + times = job["repeat"].get("times") + completed = job["repeat"]["completed"] + if times is not None and times > 0 and completed >= times: + # Remove the job (limit reached) + jobs.pop(i) + save_jobs(jobs) + return + + # Compute next run + job["next_run_at"] = compute_next_run(job["schedule"], now) - # If no next run (one-shot completed), disable - if job["next_run_at"] is None: - job["enabled"] = False - job["state"] = "completed" - elif job.get("state") != "paused": - job["state"] = "scheduled" + # If no next run (one-shot completed), disable + if job["next_run_at"] is None: + job["enabled"] = False + job["state"] = "completed" + elif job.get("state") != "paused": + job["state"] = "scheduled" - save_jobs(jobs) - return + save_jobs(jobs) + return - logger.warning("mark_job_run: job_id %s not found, skipping save", job_id) + logger.warning("mark_job_run: job_id %s not found, skipping save", job_id) def advance_next_run(job_id: str) -> bool: @@ -645,20 +652,21 @@ def advance_next_run(job_id: str) -> bool: Returns True if next_run_at was advanced, False otherwise. """ - jobs = load_jobs() - for job in jobs: - if job["id"] == job_id: - kind = job.get("schedule", {}).get("kind") - if kind not in ("cron", "interval"): + with _jobs_file_lock: + jobs = load_jobs() + for job in jobs: + if job["id"] == job_id: + kind = job.get("schedule", {}).get("kind") + if kind not in ("cron", "interval"): + return False + now = _hermes_now().isoformat() + new_next = compute_next_run(job["schedule"], now) + if new_next and new_next != job.get("next_run_at"): + job["next_run_at"] = new_next + save_jobs(jobs) + return True return False - now = _hermes_now().isoformat() - new_next = compute_next_run(job["schedule"], now) - if new_next and new_next != job.get("next_run_at"): - job["next_run_at"] = new_next - save_jobs(jobs) - return True - return False - return False + return False def get_due_jobs() -> List[Dict[str, Any]]: diff --git a/cron/scheduler.py b/cron/scheduler.py index 8938063c7f..61d5537d90 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata) future = asyncio.run_coroutine_threadsafe(coro, loop) - result = future.result(timeout=30) + try: + result = future.result(timeout=30) + except TimeoutError: + future.cancel() + raise if result and not getattr(result, "success", True): logger.warning( "Job '%s': media send failed for %s: %s", @@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata), loop, ) - send_result = future.result(timeout=60) + try: + send_result = future.result(timeout=60) + except TimeoutError: + future.cancel() + raise if send_result and not getattr(send_result, "success", True): err = getattr(send_result, "error", "unknown") logger.warning( @@ -422,7 +430,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option # prevent "coroutine was never awaited" RuntimeWarning, then retry in a # fresh thread that has no running loop. coro.close() - import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)) result = future.result(timeout=30) @@ -564,15 +571,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]: return False, f"Script execution failed: {exc}" -def _build_job_prompt(job: dict) -> str: - """Build the effective prompt for a cron job, optionally loading one or more skills first.""" +def _parse_wake_gate(script_output: str) -> bool: + """Parse the last non-empty stdout line of a cron job's pre-check script + as a wake gate. + + The convention (ported from nanoclaw #1232): if the last stdout line is + JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no + LLM run, no delivery. Any other output (non-JSON, missing flag, gate + absent, or ``wakeAgent: true``) means wake the agent normally. + + Returns True if the agent should wake, False to skip. + """ + if not script_output: + return True + stripped_lines = [line for line in script_output.splitlines() if line.strip()] + if not stripped_lines: + return True + last_line = stripped_lines[-1].strip() + try: + gate = json.loads(last_line) + except (json.JSONDecodeError, ValueError): + return True + if not isinstance(gate, dict): + return True + return gate.get("wakeAgent", True) is not False + + +def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: + """Build the effective prompt for a cron job, optionally loading one or more skills first. + + Args: + job: The cron job dict. + prerun_script: Optional ``(success, stdout)`` from a script that has + already been executed by the caller (e.g. for a wake-gate check). + When provided, the script is not re-executed and the cached + result is used for prompt injection. When omitted, the script + (if any) runs inline as before. + """ prompt = job.get("prompt", "") skills = job.get("skills") # Run data-collection script if configured, inject output as context. script_path = job.get("script") if script_path: - success, script_output = _run_job_script(script_path) + if prerun_script is not None: + success, script_output = prerun_script + else: + success, script_output = _run_job_script(script_path) if success: if script_output: prompt = ( @@ -674,7 +719,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: job_id = job["id"] job_name = job["name"] - prompt = _build_job_prompt(job) + + # Wake-gate: if this job has a pre-check script, run it BEFORE building + # the prompt so a ``{"wakeAgent": false}`` response can short-circuit + # the whole agent run. We pass the result into _build_job_prompt so + # the script is only executed once. + prerun_script = None + script_path = job.get("script") + if script_path: + prerun_script = _run_job_script(script_path) + _ran_ok, _script_output = prerun_script + if _ran_ok and not _parse_wake_gate(_script_output): + logger.info( + "Job '%s' (ID: %s): wakeAgent=false, skipping agent run", + job_name, job_id, + ) + silent_doc = ( + f"# Cron Job: {job_name}\n\n" + f"**Job ID:** {job_id}\n" + f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" + "Script gate returned `wakeAgent=false` — agent skipped.\n" + ) + return True, silent_doc, SILENT_MARKER, None + + prompt = _build_job_prompt(job, prerun_script=prerun_script) origin = _resolve_origin(job) _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}" @@ -686,14 +754,17 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: # scheduler process — every job this process runs is a cron job. os.environ["HERMES_CRON_SESSION"] = "1" + # Use ContextVars for per-job session/delivery state so parallel jobs + # don't clobber each other's targets (os.environ is process-global). + from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP + + _ctx_tokens = set_session_vars( + platform=origin["platform"] if origin else "", + chat_id=str(origin["chat_id"]) if origin else "", + chat_name=origin.get("chat_name", "") if origin else "", + ) + try: - # Inject origin context so the agent's send_message tool knows the chat. - # Must be INSIDE the try block so the finally cleanup always runs. - if origin: - os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"] - os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"]) - if origin.get("chat_name"): - os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"] # Re-read .env and config.yaml fresh every run so provider/key # changes take effect without a gateway restart. from dotenv import load_dotenv @@ -704,10 +775,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: delivery_target = _resolve_delivery_target(job) if delivery_target: - os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"] - os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"]) + _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"]) + _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"])) if delivery_target.get("thread_id") is not None: - os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"]) + _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"])) model = job.get("model") or os.getenv("HERMES_MODEL") or "" @@ -746,14 +817,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: prefill_messages = None prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "") if prefill_file: - import json as _json pfpath = Path(prefill_file).expanduser() if not pfpath.is_absolute(): pfpath = _hermes_home / pfpath if pfpath.exists(): try: with open(pfpath, "r", encoding="utf-8") as _pf: - prefill_messages = _json.load(_pf) + prefill_messages = json.load(_pf) if not isinstance(prefill_messages, list): prefill_messages = None except Exception as e: @@ -765,7 +835,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: # Provider routing pr = _cfg.get("provider_routing", {}) - smart_routing = _cfg.get("smart_model_routing", {}) or {} from hermes_cli.runtime_provider import ( resolve_runtime_provider, @@ -782,24 +851,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc - from agent.smart_model_routing import resolve_turn_route - turn_route = resolve_turn_route( - prompt, - smart_routing, - { - "model": model, - "api_key": runtime.get("api_key"), - "base_url": runtime.get("base_url"), - "provider": runtime.get("provider"), - "api_mode": runtime.get("api_mode"), - "command": runtime.get("command"), - "args": list(runtime.get("args") or []), - }, - ) - fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None credential_pool = None - runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower() + runtime_provider = str(runtime.get("provider") or "").strip().lower() if runtime_provider: try: from agent.credential_pool import load_pool @@ -816,13 +870,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e) agent = AIAgent( - model=turn_route["model"], - api_key=turn_route["runtime"].get("api_key"), - base_url=turn_route["runtime"].get("base_url"), - provider=turn_route["runtime"].get("provider"), - api_mode=turn_route["runtime"].get("api_mode"), - acp_command=turn_route["runtime"].get("command"), - acp_args=turn_route["runtime"].get("args"), + model=model, + api_key=runtime.get("api_key"), + base_url=runtime.get("base_url"), + provider=runtime.get("provider"), + api_mode=runtime.get("api_mode"), + acp_command=runtime.get("command"), + acp_args=runtime.get("args"), max_iterations=max_iterations, reasoning_config=reasoning_config, prefill_messages=prefill_messages, @@ -967,16 +1021,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: return False, output, "", error_msg finally: - # Clean up injected env vars so they don't leak to other jobs - for key in ( - "HERMES_SESSION_PLATFORM", - "HERMES_SESSION_CHAT_ID", - "HERMES_SESSION_CHAT_NAME", - "HERMES_CRON_AUTO_DELIVER_PLATFORM", - "HERMES_CRON_AUTO_DELIVER_CHAT_ID", - "HERMES_CRON_AUTO_DELIVER_THREAD_ID", - ): - os.environ.pop(key, None) + # Clean up ContextVar session/delivery state for this job. + clear_session_vars(_ctx_tokens) if _session_db: try: _session_db.end_session(_cron_session_id, "cron_complete") @@ -1029,15 +1075,41 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: if verbose: logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs)) - executed = 0 + # Advance next_run_at for all recurring jobs FIRST, under the file lock, + # before any execution begins. This preserves at-most-once semantics. for job in due_jobs: - try: - # For recurring jobs (cron/interval), advance next_run_at to the - # next future occurrence BEFORE execution. This way, if the - # process crashes mid-run, the job won't re-fire on restart. - # One-shot jobs are left alone so they can retry on restart. - advance_next_run(job["id"]) + advance_next_run(job["id"]) + # Resolve max parallel workers: env var > config.yaml > unbounded. + # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour. + _max_workers: Optional[int] = None + try: + _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip() + if _env_par: + _max_workers = int(_env_par) or None + except (ValueError, TypeError): + logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded") + if _max_workers is None: + try: + _ucfg = load_config() or {} + _cfg_par = ( + _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {} + ).get("max_parallel_jobs") + if _cfg_par is not None: + _max_workers = int(_cfg_par) or None + except Exception: + pass + + if verbose: + logger.info( + "Running %d job(s) in parallel (max_workers=%s)", + len(due_jobs), + _max_workers if _max_workers else "unbounded", + ) + + def _process_job(job: dict) -> bool: + """Run one due job end-to-end: execute, save, deliver, mark.""" + try: success, output, final_response, error = run_job(job) output_file = save_job_output(job["id"], output) @@ -1069,13 +1141,23 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)" mark_job_run(job["id"], success, error, delivery_error=delivery_error) - executed += 1 + return True except Exception as e: logger.error("Error processing job %s: %s", job['id'], e) mark_job_run(job["id"], False, str(e)) + return False - return executed + # Run all due jobs concurrently, each in its own ContextVar copy + # so session/delivery state stays isolated per-thread. + with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool: + _futures = [] + for job in due_jobs: + _ctx = contextvars.copy_context() + _futures.append(_tick_pool.submit(_ctx.run, _process_job, job)) + _results = [f.result() for f in _futures] + + return sum(_results) finally: if fcntl: fcntl.flock(lock_fd, fcntl.LOCK_UN) diff --git a/docs/acp-setup.md b/docs/acp-setup.md deleted file mode 100644 index 8da4e2a215..0000000000 --- a/docs/acp-setup.md +++ /dev/null @@ -1,228 +0,0 @@ -# Hermes Agent — ACP (Agent Client Protocol) Setup Guide - -Hermes Agent supports the **Agent Client Protocol (ACP)**, allowing it to run as -a coding agent inside your editor. ACP lets your IDE send tasks to Hermes, and -Hermes responds with file edits, terminal commands, and explanations — all shown -natively in the editor UI. - ---- - -## Prerequisites - -- Hermes Agent installed and configured (`hermes setup` completed) -- An API key / provider set up in `~/.hermes/.env` or via `hermes login` -- Python 3.11+ - -Install the ACP extra: - -```bash -pip install -e ".[acp]" -``` - ---- - -## VS Code Setup - -### 1. Install the ACP Client extension - -Open VS Code and install **ACP Client** from the marketplace: - -- Press `Ctrl+Shift+X` (or `Cmd+Shift+X` on macOS) -- Search for **"ACP Client"** -- Click **Install** - -Or install from the command line: - -```bash -code --install-extension anysphere.acp-client -``` - -### 2. Configure settings.json - -Open your VS Code settings (`Ctrl+,` → click the `{}` icon for JSON) and add: - -```json -{ - "acpClient.agents": [ - { - "name": "hermes-agent", - "registryDir": "/path/to/hermes-agent/acp_registry" - } - ] -} -``` - -Replace `/path/to/hermes-agent` with the actual path to your Hermes Agent -installation (e.g. `~/.hermes/hermes-agent`). - -Alternatively, if `hermes` is on your PATH, the ACP Client can discover it -automatically via the registry directory. - -### 3. Restart VS Code - -After configuring, restart VS Code. You should see **Hermes Agent** appear in -the ACP agent picker in the chat/agent panel. - ---- - -## Zed Setup - -Zed has built-in ACP support. - -### 1. Configure Zed settings - -Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your -`settings.json`: - -```json -{ - "agent_servers": { - "hermes-agent": { - "type": "custom", - "command": "hermes", - "args": ["acp"], - }, - }, -} -``` - -### 2. Restart Zed - -Hermes Agent will appear in the agent panel. Select it and start a conversation. - ---- - -## JetBrains Setup (IntelliJ, PyCharm, WebStorm, etc.) - -### 1. Install the ACP plugin - -- Open **Settings** → **Plugins** → **Marketplace** -- Search for **"ACP"** or **"Agent Client Protocol"** -- Install and restart the IDE - -### 2. Configure the agent - -- Open **Settings** → **Tools** → **ACP Agents** -- Click **+** to add a new agent -- Set the registry directory to your `acp_registry/` folder: - `/path/to/hermes-agent/acp_registry` -- Click **OK** - -### 3. Use the agent - -Open the ACP panel (usually in the right sidebar) and select **Hermes Agent**. - ---- - -## What You Will See - -Once connected, your editor provides a native interface to Hermes Agent: - -### Chat Panel -A conversational interface where you can describe tasks, ask questions, and -give instructions. Hermes responds with explanations and actions. - -### File Diffs -When Hermes edits files, you see standard diffs in the editor. You can: -- **Accept** individual changes -- **Reject** changes you don't want -- **Review** the full diff before applying - -### Terminal Commands -When Hermes needs to run shell commands (builds, tests, installs), the editor -shows them in an integrated terminal. Depending on your settings: -- Commands may run automatically -- Or you may be prompted to **approve** each command - -### Approval Flow -For potentially destructive operations, the editor will prompt you for -approval before Hermes proceeds. This includes: -- File deletions -- Shell commands -- Git operations - ---- - -## Configuration - -Hermes Agent under ACP uses the **same configuration** as the CLI: - -- **API keys / providers**: `~/.hermes/.env` -- **Agent config**: `~/.hermes/config.yaml` -- **Skills**: `~/.hermes/skills/` -- **Sessions**: `~/.hermes/state.db` - -You can run `hermes setup` to configure providers, or edit `~/.hermes/.env` -directly. - -### Changing the model - -Edit `~/.hermes/config.yaml`: - -```yaml -model: openrouter/nous/hermes-3-llama-3.1-70b -``` - -Or set the `HERMES_MODEL` environment variable. - -### Toolsets - -ACP sessions use the curated `hermes-acp` toolset by default. It is designed for editor workflows and intentionally excludes things like messaging delivery, cronjob management, and audio-first UX features. - ---- - -## Troubleshooting - -### Agent doesn't appear in the editor - -1. **Check the registry path** — make sure the `acp_registry/` directory path - in your editor settings is correct and contains `agent.json`. -2. **Check `hermes` is on PATH** — run `which hermes` in a terminal. If not - found, you may need to activate your virtualenv or add it to PATH. -3. **Restart the editor** after changing settings. - -### Agent starts but errors immediately - -1. Run `hermes doctor` to check your configuration. -2. Check that you have a valid API key: `hermes status` -3. Try running `hermes acp` directly in a terminal to see error output. - -### "Module not found" errors - -Make sure you installed the ACP extra: - -```bash -pip install -e ".[acp]" -``` - -### Slow responses - -- ACP streams responses, so you should see incremental output. If the agent - appears stuck, check your network connection and API provider status. -- Some providers have rate limits. Try switching to a different model/provider. - -### Permission denied for terminal commands - -If the editor blocks terminal commands, check your ACP Client extension -settings for auto-approval or manual-approval preferences. - -### Logs - -Hermes logs are written to stderr when running in ACP mode. Check: -- VS Code: **Output** panel → select **ACP Client** or **Hermes Agent** -- Zed: **View** → **Toggle Terminal** and check the process output -- JetBrains: **Event Log** or the ACP tool window - -You can also enable verbose logging: - -```bash -HERMES_LOG_LEVEL=DEBUG hermes acp -``` - ---- - -## Further Reading - -- [ACP Specification](https://github.com/anysphere/acp) -- [Hermes Agent Documentation](https://github.com/NousResearch/hermes-agent) -- Run `hermes --help` for all CLI options diff --git a/docs/honcho-integration-spec.html b/docs/honcho-integration-spec.html deleted file mode 100644 index 455fb84f23..0000000000 --- a/docs/honcho-integration-spec.html +++ /dev/null @@ -1,698 +0,0 @@ - - - - - -honcho-integration-spec - - - - - - - -
- -
- -
-

honcho-integration-spec

-

Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.

-
- hermes-agent / openclaw-honcho - Python + TypeScript - 2026-03-09 -
-
- - - - -
-

Overview

- -

Two independent Honcho integrations have been built for two different agent runtimes: Hermes Agent (Python, baked into the runner) and openclaw-honcho (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, session.context(), peer.chat() — but they made different tradeoffs at every layer.

- -

This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.

- -
- Scope Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive. -
-
- - -
-

Architecture comparison

- -

Hermes: baked-in runner

-

Honcho is initialised directly inside AIAgent.__init__. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into _cached_system_prompt) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.

- -
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%% -flowchart TD - U["user message"] --> P["_honcho_prefetch()
(reads cache — no HTTP)"] - P --> SP["_build_system_prompt()
(first turn only, cached)"] - SP --> LLM["LLM call"] - LLM --> R["response"] - R --> FP["_honcho_fire_prefetch()
(daemon threads, turn end)"] - FP --> C1["prefetch_context() thread"] - FP --> C2["prefetch_dialectic() thread"] - C1 --> CACHE["_context_cache / _dialectic_cache"] - C2 --> CACHE - - style U fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style P fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9 - style SP fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9 - style LLM fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style R fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style FP fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9 - style C1 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9 - style C2 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9 - style CACHE fill:#11151c,stroke:#484f58,color:#6e7681 -
- -

openclaw-honcho: hook-based plugin

-

The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside before_prompt_build on every turn. Message capture happens in agent_end. The multi-agent hierarchy is tracked via subagent_spawned. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.

- -
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%% -flowchart TD - U2["user message"] --> BPB["before_prompt_build
(BLOCKING HTTP — every turn)"] - BPB --> CTX["session.context()"] - CTX --> SP2["system prompt assembled"] - SP2 --> LLM2["LLM call"] - LLM2 --> R2["response"] - R2 --> AE["agent_end hook"] - AE --> SAVE["session.addMessages()
session.setMetadata()"] - - style U2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style BPB fill:#3a1515,stroke:#f47067,color:#c9d1d9 - style CTX fill:#3a1515,stroke:#f47067,color:#c9d1d9 - style SP2 fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9 - style LLM2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style R2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style AE fill:#162030,stroke:#3d6ea5,color:#c9d1d9 - style SAVE fill:#11151c,stroke:#484f58,color:#6e7681 -
-
- - -
-

Diff table

- -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DimensionHermes Agentopenclaw-honcho
Context injection timingOnce per session (cached). Zero HTTP on response path after turn 1.Every turn, blocking. Fresh context per turn but adds latency.
Prefetch strategyDaemon threads fire at turn end; consumed next turn from cache.None. Blocking call at prompt-build time.
Dialectic (peer.chat)Prefetched async; result injected into system prompt next turn.On-demand via honcho_recall / honcho_analyze tools.
Reasoning levelDynamic: scales with message length. Floor = config default. Cap = "high".Fixed per tool: recall=minimal, analyze=medium.
Memory modesuser_memory_mode / agent_memory_mode: hybrid / honcho / local.None. Always writes to Honcho.
Write frequencyasync (background queue), turn, session, N turns.After every agent_end (no control).
AI peer identityobserve_me=True, seed_ai_identity(), get_ai_representation(), SOUL.md → AI peer.Agent files uploaded to agent peer at setup. No ongoing self-observation seeding.
Context scopeUser peer + AI peer representation, both injected.User peer (owner) representation + conversation summary. peerPerspective on context call.
Session namingper-directory / global / manual map / title-based.Derived from platform session key.
Multi-agentSingle-agent only.Parent observer hierarchy via subagent_spawned.
Tool surfaceSingle query_user_context tool (on-demand dialectic).6 tools: session, profile, search, context (fast) + recall, analyze (LLM).
Platform metadataNot stripped.Explicitly stripped before Honcho storage.
Message dedupNone (sends on every save cycle).lastSavedIndex in session metadata prevents re-sending.
CLI surface in promptManagement commands injected into system prompt. Agent knows its own CLI.Not injected.
AI peer name in identityReplaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured.Not implemented.
QMD / local file searchNot implemented.Passthrough tools when QMD backend configured.
Workspace metadataNot implemented.agentPeerMap in workspace metadata tracks agent→peer ID.
-
-
- - -
-

Hermes patterns to port

- -

Six patterns from Hermes are worth adopting in any Honcho integration. They are described below as integration-agnostic interfaces — the implementation will differ per runtime, but the contract is the same.

- -
-
-

Patterns Hermes contributes

-
    -
  • Async prefetch (zero-latency)
  • -
  • Dynamic reasoning level
  • -
  • Per-peer memory modes
  • -
  • AI peer identity formation
  • -
  • Session naming strategies
  • -
  • CLI surface injection
  • -
-
-
-

Patterns openclaw contributes back

-
    -
  • lastSavedIndex dedup
  • -
  • Platform metadata stripping
  • -
  • Multi-agent observer hierarchy
  • -
  • peerPerspective on context()
  • -
  • Tiered tool surface (fast/LLM)
  • -
  • Workspace agentPeerMap
  • -
-
-
-
- - -
-

Spec: async prefetch

- -

Problem

-

Calling session.context() and peer.chat() synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn. Users experience this as the agent "thinking slowly."

- -

Pattern

-

Fire both calls as non-blocking background work at the end of each turn. Store results in a per-session cache keyed by session ID. At the start of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.

- -

Interface contract

-
// TypeScript (openclaw / nanobot plugin shape)
-
-interface AsyncPrefetch {
-  // Fire context + dialectic fetches at turn end. Non-blocking.
-  firePrefetch(sessionId: string, userMessage: string): void;
-
-  // Pop cached results at turn start. Returns empty if cache is cold.
-  popContextResult(sessionId: string): ContextResult | null;
-  popDialecticResult(sessionId: string): string | null;
-}
-
-type ContextResult = {
-  representation: string;
-  card: string[];
-  aiRepresentation?: string;  // AI peer context if enabled
-  summary?: string;            // conversation summary if fetched
-};
- -

Implementation notes

-
    -
  • Python: threading.Thread(daemon=True). Write to dict[session_id, result] — GIL makes this safe for simple writes.
  • -
  • TypeScript: Promise stored in Map<string, Promise<ContextResult>>. Await at pop time. If not resolved yet, skip (return null) — do not block.
  • -
  • The pop is destructive: clears the cache entry after reading so stale data never accumulates.
  • -
  • Prefetch should also fire on first turn (even though it won't be consumed until turn 2) — this ensures turn 2 is never cold.
  • -
- -

openclaw-honcho adoption

-

Move session.context() from before_prompt_build to a post-agent_end background task. Store result in state.contextCache. In before_prompt_build, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.

-
- - -
-

Spec: dynamic reasoning level

- -

Problem

-

Honcho's dialectic endpoint supports reasoning levels from minimal to max. A fixed level per tool wastes budget on simple queries and under-serves complex ones.

- -

Pattern

-

Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at high — never select max automatically.

- -

Interface contract

-
// Shared helper — identical logic in any language
-
-const LEVELS = ["minimal", "low", "medium", "high", "max"];
-
-function dynamicReasoningLevel(
-  query: string,
-  configDefault: string = "low"
-): string {
-  const baseIdx = Math.max(0, LEVELS.indexOf(configDefault));
-  const n = query.length;
-  const bump = n < 120 ? 0 : n < 400 ? 1 : 2;
-  return LEVELS[Math.min(baseIdx + bump, 3)]; // cap at "high" (idx 3)
-}
- -

Config key

-

Add a dialecticReasoningLevel config field (string, default "low"). This sets the floor. Users can raise or lower it. The dynamic bump always applies on top.

- -

openclaw-honcho adoption

-

Apply in honcho_recall and honcho_analyze: replace the fixed reasoningLevel with the dynamic selector. honcho_recall should use floor "minimal" and honcho_analyze floor "medium" — both still bump with message length.

-
- - -
-

Spec: per-peer memory modes

- -

Problem

-

Users want independent control over whether user context and agent context are written locally, to Honcho, or both. A single memoryMode shorthand is not granular enough.

- -

Pattern

-

Three modes per peer: hybrid (write both local + Honcho), honcho (Honcho only, disable local files), local (local files only, skip Honcho sync for this peer). Two orthogonal axes: user peer and agent peer.

- -

Config schema

-
// ~/.openclaw/openclaw.json  (or ~/.nanobot/config.json)
-{
-  "plugins": {
-    "openclaw-honcho": {
-      "config": {
-        "apiKey": "...",
-        "memoryMode": "hybrid",          // shorthand: both peers
-        "userMemoryMode": "honcho",       // override for user peer
-        "agentMemoryMode": "hybrid"       // override for agent peer
-      }
-    }
-  }
-}
- -

Resolution order

-
    -
  1. Per-peer field (userMemoryMode / agentMemoryMode) — wins if present.
  2. -
  3. Shorthand memoryMode — applies to both peers as default.
  4. -
  5. Hardcoded default: "hybrid".
  6. -
- -

Effect on Honcho sync

-
    -
  • userMemoryMode=local: skip adding user peer messages to Honcho.
  • -
  • agentMemoryMode=local: skip adding assistant peer messages to Honcho.
  • -
  • Both local: skip session.addMessages() entirely.
  • -
  • userMemoryMode=honcho: disable local USER.md writes.
  • -
  • agentMemoryMode=honcho: disable local MEMORY.md / SOUL.md writes.
  • -
-
- - -
-

Spec: AI peer identity formation

- -

Problem

-

Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if observe_me=True is set for the agent peer. Without it, the agent peer accumulates nothing and Honcho's AI-side model never forms.

- -

Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation, rather than waiting for it to emerge from scratch.

- -

Part A: observe_me=True for agent peer

-
// TypeScript — in session.addPeers() call
-await session.addPeers([
-  [ownerPeer.id, { observeMe: true,  observeOthers: false }],
-  [agentPeer.id, { observeMe: true,  observeOthers: true  }], // was false
-]);
- -

This is a one-line change but foundational. Without it, Honcho's AI peer representation stays empty regardless of what the agent says.

- -

Part B: seedAiIdentity()

-
async function seedAiIdentity(
-  session: HonchoSession,
-  agentPeer: Peer,
-  content: string,
-  source: string
-): Promise<boolean> {
-  const wrapped = [
-    `<ai_identity_seed>`,
-    `<source>${source}</source>`,
-    ``,
-    content.trim(),
-    `</ai_identity_seed>`,
-  ].join("\n");
-
-  await agentPeer.addMessage("assistant", wrapped);
-  return true;
-}
- -

Part C: migrate agent files at setup

-

During openclaw honcho setup, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md, BOOTSTRAP.md) to the agent peer using seedAiIdentity() instead of session.uploadFile(). This routes the content through Honcho's observation pipeline rather than the file store.

- -

Part D: AI peer name in identity

-

When the agent has a configured name (non-default), inject it into the agent's self-identity prefix. In OpenClaw this means adding to the injected system prompt section:

-
// In context hook return value
-return {
-  systemPrompt: [
-    agentName ? `You are ${agentName}.` : "",
-    "## User Memory Context",
-    ...sections,
-  ].filter(Boolean).join("\n\n")
-};
- -

CLI surface: honcho identity subcommand

-
openclaw honcho identity <file>    # seed from file
-openclaw honcho identity --show    # show current AI peer representation
-
- - -
-

Spec: session naming strategies

- -

Problem

-

When Honcho is used across multiple projects or directories, a single global session means every project shares the same context. Per-directory sessions provide isolation without requiring users to name sessions manually.

- -

Strategies

-
- - - - - - - - -
StrategySession keyWhen to use
per-directorybasename of CWDDefault. Each project gets its own session.
globalfixed string "global"Single cross-project session.
manual mapuser-configured per pathsessions config map overrides directory basename.
title-basedsanitized session titleWhen agent supports named sessions; title set mid-conversation.
-
- -

Config schema

-
{
-  "sessionStrategy": "per-directory",   // "per-directory" | "global"
-  "sessionPeerPrefix": false,            // prepend peer name to session key
-  "sessions": {                            // manual overrides
-    "/home/user/projects/foo": "foo-project"
-  }
-}
- -

CLI surface

-
openclaw honcho sessions              # list all mappings
-openclaw honcho map <name>           # map cwd to session name
-openclaw honcho map                   # no-arg = list mappings
- -

Resolution order: manual map wins → session title → directory basename → platform key.

-
- - -
-

Spec: CLI surface injection

- -

Problem

-

When a user asks "how do I change my memory settings?" or "what Honcho commands are available?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.

- -

Pattern

-

When Honcho is active, append a compact command reference to the system prompt. The agent can cite these commands directly instead of guessing.

- -
// In context hook, append to systemPrompt
-const honchoSection = [
-  "# Honcho memory integration",
-  `Active. Session: ${sessionKey}. Mode: ${mode}.`,
-  "Management commands:",
-  "  openclaw honcho status                    — show config + connection",
-  "  openclaw honcho mode [hybrid|honcho|local] — show or set memory mode",
-  "  openclaw honcho sessions                  — list session mappings",
-  "  openclaw honcho map <name>                — map directory to session",
-  "  openclaw honcho identity [file] [--show]  — seed or show AI identity",
-  "  openclaw honcho setup                     — full interactive wizard",
-].join("\n");
- -
- Keep it compact. This section is injected every turn. Keep it under 300 chars of context. List commands, not explanations — the agent can explain them on request. -
-
- - -
-

openclaw-honcho checklist

- -

Ordered by impact. Each item maps to a spec section above.

- -
    -
  • Async prefetch — move session.context() out of before_prompt_build into post-agent_end background Promise. Pop from cache at prompt build. (spec)
  • -
  • observe_me=True for agent peer — one-line change in session.addPeers() config for agent peer. (spec)
  • -
  • Dynamic reasoning level — add dynamicReasoningLevel() helper; apply in honcho_recall and honcho_analyze. Add dialecticReasoningLevel to config schema. (spec)
  • -
  • Per-peer memory modes — add userMemoryMode / agentMemoryMode to config; gate Honcho sync and local writes accordingly. (spec)
  • -
  • seedAiIdentity() — add helper; apply during setup migration for SOUL.md / IDENTITY.md instead of session.uploadFile(). (spec)
  • -
  • Session naming strategies — add sessionStrategy, sessions map, sessionPeerPrefix to config; implement resolution function. (spec)
  • -
  • CLI surface injection — append command reference to before_prompt_build return value when Honcho is active. (spec)
  • -
  • honcho identity subcommand — add openclaw honcho identity CLI command. (spec)
  • -
  • AI peer name injection — if aiPeer name configured, prepend to injected system prompt. (spec)
  • -
  • honcho mode / honcho sessions / honcho map — CLI parity with Hermes. (spec)
  • -
- -
- Already done in openclaw-honcho (do not re-implement): lastSavedIndex dedup, platform metadata stripping, multi-agent parent observer hierarchy, peerPerspective on context(), tiered tool surface (fast/LLM), workspace agentPeerMap, QMD passthrough, self-hosted Honcho support. -
-
- - -
-

nanobot-honcho checklist

- -

nanobot-honcho is a greenfield integration. Start from openclaw-honcho's architecture (hook-based, dual peer) and apply all Hermes patterns from day one rather than retrofitting. Priority order:

- -

Phase 1 — core correctness

-
    -
  • Dual peer model (owner + agent peer), both with observe_me=True
  • -
  • Message capture at turn end with lastSavedIndex dedup
  • -
  • Platform metadata stripping before Honcho storage
  • -
  • Async prefetch from day one — do not implement blocking context injection
  • -
  • Legacy file migration at first activation (USER.md → owner peer, SOUL.md → seedAiIdentity())
  • -
- -

Phase 2 — configuration

-
    -
  • Config schema: apiKey, workspaceId, baseUrl, memoryMode, userMemoryMode, agentMemoryMode, dialecticReasoningLevel, sessionStrategy, sessions
  • -
  • Per-peer memory mode gating
  • -
  • Dynamic reasoning level
  • -
  • Session naming strategies
  • -
- -

Phase 3 — tools and CLI

-
    -
  • Tool surface: honcho_profile, honcho_recall, honcho_analyze, honcho_search, honcho_context
  • -
  • CLI: setup, status, sessions, map, mode, identity
  • -
  • CLI surface injection into system prompt
  • -
  • AI peer name wired into agent identity
  • -
-
- -
- - - - - diff --git a/docs/honcho-integration-spec.md b/docs/honcho-integration-spec.md deleted file mode 100644 index 7731a262d9..0000000000 --- a/docs/honcho-integration-spec.md +++ /dev/null @@ -1,377 +0,0 @@ -# honcho-integration-spec - -Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations. - ---- - -## Overview - -Two independent Honcho integrations have been built for two different agent runtimes: **Hermes Agent** (Python, baked into the runner) and **openclaw-honcho** (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, `session.context()`, `peer.chat()` — but they made different tradeoffs at every layer. - -This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language. - -> **Scope** Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive. - ---- - -## Architecture comparison - -### Hermes: baked-in runner - -Honcho is initialised directly inside `AIAgent.__init__`. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into `_cached_system_prompt`) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider. - -Turn flow: - -``` -user message - → _honcho_prefetch() (reads cache — no HTTP) - → _build_system_prompt() (first turn only, cached) - → LLM call - → response - → _honcho_fire_prefetch() (daemon threads, turn end) - → prefetch_context() thread ──┐ - → prefetch_dialectic() thread ─┴→ _context_cache / _dialectic_cache -``` - -### openclaw-honcho: hook-based plugin - -The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside `before_prompt_build` on every turn. Message capture happens in `agent_end`. The multi-agent hierarchy is tracked via `subagent_spawned`. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin. - -Turn flow: - -``` -user message - → before_prompt_build (BLOCKING HTTP — every turn) - → session.context() - → system prompt assembled - → LLM call - → response - → agent_end hook - → session.addMessages() - → session.setMetadata() -``` - ---- - -## Diff table - -| Dimension | Hermes Agent | openclaw-honcho | -|---|---|---| -| **Context injection timing** | Once per session (cached). Zero HTTP on response path after turn 1. | Every turn, blocking. Fresh context per turn but adds latency. | -| **Prefetch strategy** | Daemon threads fire at turn end; consumed next turn from cache. | None. Blocking call at prompt-build time. | -| **Dialectic (peer.chat)** | Prefetched async; result injected into system prompt next turn. | On-demand via `honcho_recall` / `honcho_analyze` tools. | -| **Reasoning level** | Dynamic: scales with message length. Floor = config default. Cap = "high". | Fixed per tool: recall=minimal, analyze=medium. | -| **Memory modes** | `user_memory_mode` / `agent_memory_mode`: hybrid / honcho / local. | None. Always writes to Honcho. | -| **Write frequency** | async (background queue), turn, session, N turns. | After every agent_end (no control). | -| **AI peer identity** | `observe_me=True`, `seed_ai_identity()`, `get_ai_representation()`, SOUL.md → AI peer. | Agent files uploaded to agent peer at setup. No ongoing self-observation. | -| **Context scope** | User peer + AI peer representation, both injected. | User peer (owner) representation + conversation summary. `peerPerspective` on context call. | -| **Session naming** | per-directory / global / manual map / title-based. | Derived from platform session key. | -| **Multi-agent** | Single-agent only. | Parent observer hierarchy via `subagent_spawned`. | -| **Tool surface** | Single `query_user_context` tool (on-demand dialectic). | 6 tools: session, profile, search, context (fast) + recall, analyze (LLM). | -| **Platform metadata** | Not stripped. | Explicitly stripped before Honcho storage. | -| **Message dedup** | None. | `lastSavedIndex` in session metadata prevents re-sending. | -| **CLI surface in prompt** | Management commands injected into system prompt. Agent knows its own CLI. | Not injected. | -| **AI peer name in identity** | Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured. | Not implemented. | -| **QMD / local file search** | Not implemented. | Passthrough tools when QMD backend configured. | -| **Workspace metadata** | Not implemented. | `agentPeerMap` in workspace metadata tracks agent→peer ID. | - ---- - -## Patterns - -Six patterns from Hermes are worth adopting in any Honcho integration. Each is described as an integration-agnostic interface. - -**Hermes contributes:** -- Async prefetch (zero-latency) -- Dynamic reasoning level -- Per-peer memory modes -- AI peer identity formation -- Session naming strategies -- CLI surface injection - -**openclaw-honcho contributes back (Hermes should adopt):** -- `lastSavedIndex` dedup -- Platform metadata stripping -- Multi-agent observer hierarchy -- `peerPerspective` on `context()` -- Tiered tool surface (fast/LLM) -- Workspace `agentPeerMap` - ---- - -## Spec: async prefetch - -### Problem - -Calling `session.context()` and `peer.chat()` synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn. - -### Pattern - -Fire both calls as non-blocking background work at the **end** of each turn. Store results in a per-session cache keyed by session ID. At the **start** of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path. - -### Interface contract - -```typescript -interface AsyncPrefetch { - // Fire context + dialectic fetches at turn end. Non-blocking. - firePrefetch(sessionId: string, userMessage: string): void; - - // Pop cached results at turn start. Returns empty if cache is cold. - popContextResult(sessionId: string): ContextResult | null; - popDialecticResult(sessionId: string): string | null; -} - -type ContextResult = { - representation: string; - card: string[]; - aiRepresentation?: string; // AI peer context if enabled - summary?: string; // conversation summary if fetched -}; -``` - -### Implementation notes - -- **Python:** `threading.Thread(daemon=True)`. Write to `dict[session_id, result]` — GIL makes this safe for simple writes. -- **TypeScript:** `Promise` stored in `Map>`. Await at pop time. If not resolved yet, return null — do not block. -- The pop is destructive: clears the cache entry after reading so stale data never accumulates. -- Prefetch should also fire on first turn (even though it won't be consumed until turn 2). - -### openclaw-honcho adoption - -Move `session.context()` from `before_prompt_build` to a post-`agent_end` background task. Store result in `state.contextCache`. In `before_prompt_build`, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn. - ---- - -## Spec: dynamic reasoning level - -### Problem - -Honcho's dialectic endpoint supports reasoning levels from `minimal` to `max`. A fixed level per tool wastes budget on simple queries and under-serves complex ones. - -### Pattern - -Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at `high` — never select `max` automatically. - -### Logic - -``` -< 120 chars → default (typically "low") -120–400 chars → one level above default (cap at "high") -> 400 chars → two levels above default (cap at "high") -``` - -### Config key - -Add `dialecticReasoningLevel` (string, default `"low"`). This sets the floor. The dynamic bump always applies on top. - -### openclaw-honcho adoption - -Apply in `honcho_recall` and `honcho_analyze`: replace fixed `reasoningLevel` with the dynamic selector. `honcho_recall` uses floor `"minimal"`, `honcho_analyze` uses floor `"medium"` — both still bump with message length. - ---- - -## Spec: per-peer memory modes - -### Problem - -Users want independent control over whether user context and agent context are written locally, to Honcho, or both. - -### Modes - -| Mode | Effect | -|---|---| -| `hybrid` | Write to both local files and Honcho (default) | -| `honcho` | Honcho only — disable corresponding local file writes | -| `local` | Local files only — skip Honcho sync for this peer | - -### Config schema - -```json -{ - "memoryMode": "hybrid", - "userMemoryMode": "honcho", - "agentMemoryMode": "hybrid" -} -``` - -Resolution order: per-peer field wins → shorthand `memoryMode` → default `"hybrid"`. - -### Effect on Honcho sync - -- `userMemoryMode=local`: skip adding user peer messages to Honcho -- `agentMemoryMode=local`: skip adding assistant peer messages to Honcho -- Both local: skip `session.addMessages()` entirely -- `userMemoryMode=honcho`: disable local USER.md writes -- `agentMemoryMode=honcho`: disable local MEMORY.md / SOUL.md writes - ---- - -## Spec: AI peer identity formation - -### Problem - -Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if `observe_me=True` is set for the agent peer. Without it, the agent peer accumulates nothing. - -Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation. - -### Part A: observe_me=True for agent peer - -```typescript -await session.addPeers([ - [ownerPeer.id, { observeMe: true, observeOthers: false }], - [agentPeer.id, { observeMe: true, observeOthers: true }], // was false -]); -``` - -One-line change. Foundational. Without it, the AI peer representation stays empty regardless of what the agent says. - -### Part B: seedAiIdentity() - -```typescript -async function seedAiIdentity( - agentPeer: Peer, - content: string, - source: string -): Promise { - const wrapped = [ - ``, - `${source}`, - ``, - content.trim(), - ``, - ].join("\n"); - - await agentPeer.addMessage("assistant", wrapped); - return true; -} -``` - -### Part C: migrate agent files at setup - -During `honcho setup`, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md) to the agent peer via `seedAiIdentity()` instead of `session.uploadFile()`. This routes content through Honcho's observation pipeline. - -### Part D: AI peer name in identity - -When the agent has a configured name, prepend it to the injected system prompt: - -```typescript -const namePrefix = agentName ? `You are ${agentName}.\n\n` : ""; -return { systemPrompt: namePrefix + "## User Memory Context\n\n" + sections }; -``` - -### CLI surface - -``` -honcho identity # seed from file -honcho identity --show # show current AI peer representation -``` - ---- - -## Spec: session naming strategies - -### Problem - -A single global session means every project shares the same Honcho context. Per-directory sessions provide isolation without requiring users to name sessions manually. - -### Strategies - -| Strategy | Session key | When to use | -|---|---|---| -| `per-directory` | basename of CWD | Default. Each project gets its own session. | -| `global` | fixed string `"global"` | Single cross-project session. | -| manual map | user-configured per path | `sessions` config map overrides directory basename. | -| title-based | sanitized session title | When agent supports named sessions set mid-conversation. | - -### Config schema - -```json -{ - "sessionStrategy": "per-directory", - "sessionPeerPrefix": false, - "sessions": { - "/home/user/projects/foo": "foo-project" - } -} -``` - -### CLI surface - -``` -honcho sessions # list all mappings -honcho map # map cwd to session name -honcho map # no-arg = list mappings -``` - -Resolution order: manual map → session title → directory basename → platform key. - ---- - -## Spec: CLI surface injection - -### Problem - -When a user asks "how do I change my memory settings?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface. - -### Pattern - -When Honcho is active, append a compact command reference to the system prompt. Keep it under 300 chars. - -``` -# Honcho memory integration -Active. Session: {sessionKey}. Mode: {mode}. -Management commands: - honcho status — show config + connection - honcho mode [hybrid|honcho|local] — show or set memory mode - honcho sessions — list session mappings - honcho map — map directory to session - honcho identity [file] [--show] — seed or show AI identity - honcho setup — full interactive wizard -``` - ---- - -## openclaw-honcho checklist - -Ordered by impact: - -- [ ] **Async prefetch** — move `session.context()` out of `before_prompt_build` into post-`agent_end` background Promise -- [ ] **observe_me=True for agent peer** — one-line change in `session.addPeers()` -- [ ] **Dynamic reasoning level** — add helper; apply in `honcho_recall` and `honcho_analyze`; add `dialecticReasoningLevel` to config -- [ ] **Per-peer memory modes** — add `userMemoryMode` / `agentMemoryMode` to config; gate Honcho sync and local writes -- [ ] **seedAiIdentity()** — add helper; use during setup migration for SOUL.md / IDENTITY.md -- [ ] **Session naming strategies** — add `sessionStrategy`, `sessions` map, `sessionPeerPrefix` -- [ ] **CLI surface injection** — append command reference to `before_prompt_build` return value -- [ ] **honcho identity subcommand** — seed from file or `--show` current representation -- [ ] **AI peer name injection** — if `aiPeer` name configured, prepend to injected system prompt -- [ ] **honcho mode / sessions / map** — CLI parity with Hermes - -Already done in openclaw-honcho (do not re-implement): `lastSavedIndex` dedup, platform metadata stripping, multi-agent parent observer, `peerPerspective` on `context()`, tiered tool surface, workspace `agentPeerMap`, QMD passthrough, self-hosted Honcho. - ---- - -## nanobot-honcho checklist - -Greenfield integration. Start from openclaw-honcho's architecture and apply all Hermes patterns from day one. - -### Phase 1 — core correctness - -- [ ] Dual peer model (owner + agent peer), both with `observe_me=True` -- [ ] Message capture at turn end with `lastSavedIndex` dedup -- [ ] Platform metadata stripping before Honcho storage -- [ ] Async prefetch from day one — do not implement blocking context injection -- [ ] Legacy file migration at first activation (USER.md → owner peer, SOUL.md → `seedAiIdentity()`) - -### Phase 2 — configuration - -- [ ] Config schema: `apiKey`, `workspaceId`, `baseUrl`, `memoryMode`, `userMemoryMode`, `agentMemoryMode`, `dialecticReasoningLevel`, `sessionStrategy`, `sessions` -- [ ] Per-peer memory mode gating -- [ ] Dynamic reasoning level -- [ ] Session naming strategies - -### Phase 3 — tools and CLI - -- [ ] Tool surface: `honcho_profile`, `honcho_recall`, `honcho_analyze`, `honcho_search`, `honcho_context` -- [ ] CLI: `setup`, `status`, `sessions`, `map`, `mode`, `identity` -- [ ] CLI surface injection into system prompt -- [ ] AI peer name wired into agent identity diff --git a/docs/migration/openclaw.md b/docs/migration/openclaw.md deleted file mode 100644 index 30f2f97e4d..0000000000 --- a/docs/migration/openclaw.md +++ /dev/null @@ -1,142 +0,0 @@ -# Migrating from OpenClaw to Hermes Agent - -This guide covers how to import your OpenClaw settings, memories, skills, and API keys into Hermes Agent. - -## Three Ways to Migrate - -### 1. Automatic (during first-time setup) - -When you run `hermes setup` for the first time and Hermes detects `~/.openclaw`, it automatically offers to import your OpenClaw data before configuration begins. Just accept the prompt and everything is handled for you. - -### 2. CLI Command (quick, scriptable) - -```bash -hermes claw migrate # Preview then migrate (always shows preview first) -hermes claw migrate --dry-run # Preview only, no changes -hermes claw migrate --preset user-data # Migrate without API keys/secrets -hermes claw migrate --yes # Skip confirmation prompt -``` - -The migration always shows a full preview of what will be imported before making any changes. You review the preview and confirm before anything is written. - -**All options:** - -| Flag | Description | -|------|-------------| -| `--source PATH` | Path to OpenClaw directory (default: `~/.openclaw`) | -| `--dry-run` | Preview only — no files are modified | -| `--preset {user-data,full}` | Migration preset (default: `full`). `user-data` excludes secrets | -| `--overwrite` | Overwrite existing files (default: skip conflicts) | -| `--migrate-secrets` | Include allowlisted secrets (auto-enabled with `full` preset) | -| `--workspace-target PATH` | Copy workspace instructions (AGENTS.md) to this absolute path | -| `--skill-conflict {skip,overwrite,rename}` | How to handle skill name conflicts (default: `skip`) | -| `--yes`, `-y` | Skip confirmation prompts | - -### 3. Agent-Guided (interactive, with previews) - -Ask the agent to run the migration for you: - -``` -> Migrate my OpenClaw setup to Hermes -``` - -The agent will use the `openclaw-migration` skill to: -1. Run a preview first to show what would change -2. Ask about conflict resolution (SOUL.md, skills, etc.) -3. Let you choose between `user-data` and `full` presets -4. Execute the migration with your choices -5. Print a detailed summary of what was migrated - -## What Gets Migrated - -### `user-data` preset -| Item | Source | Destination | -|------|--------|-------------| -| SOUL.md | `~/.openclaw/workspace/SOUL.md` | `~/.hermes/SOUL.md` | -| Memory entries | `~/.openclaw/workspace/MEMORY.md` | `~/.hermes/memories/MEMORY.md` | -| User profile | `~/.openclaw/workspace/USER.md` | `~/.hermes/memories/USER.md` | -| Skills | `~/.openclaw/workspace/skills/` | `~/.hermes/skills/openclaw-imports/` | -| Command allowlist | `~/.openclaw/workspace/exec_approval_patterns.yaml` | Merged into `~/.hermes/config.yaml` | -| Messaging settings | `~/.openclaw/config.yaml` (TELEGRAM_ALLOWED_USERS, MESSAGING_CWD) | `~/.hermes/.env` | -| TTS assets | `~/.openclaw/workspace/tts/` | `~/.hermes/tts/` | - -Workspace files are also checked at `workspace.default/` and `workspace-main/` as fallback paths (OpenClaw renamed `workspace/` to `workspace-main/` in recent versions). - -### `full` preset (adds to `user-data`) -| Item | Source | Destination | -|------|--------|-------------| -| Telegram bot token | `openclaw.json` channels config | `~/.hermes/.env` | -| OpenRouter API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | -| OpenAI API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | -| Anthropic API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | -| ElevenLabs API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` | - -API keys are searched across four sources: inline config values, `~/.openclaw/.env`, the `openclaw.json` `"env"` sub-object, and per-agent auth profiles. - -Only allowlisted secrets are ever imported. Other credentials are skipped and reported. - -## OpenClaw Schema Compatibility - -The migration handles both old and current OpenClaw config layouts: - -- **Channel tokens**: Reads from flat paths (`channels.telegram.botToken`) and the newer `accounts.default` layout (`channels.telegram.accounts.default.botToken`) -- **TTS provider**: OpenClaw renamed "edge" to "microsoft" — both are recognized and mapped to Hermes' "edge" -- **Provider API types**: Both short (`openai`, `anthropic`) and hyphenated (`openai-completions`, `anthropic-messages`, `google-generative-ai`) values are mapped correctly -- **thinkingDefault**: All enum values are handled including newer ones (`minimal`, `xhigh`, `adaptive`) -- **Matrix**: Uses `accessToken` field (not `botToken`) -- **SecretRef formats**: Plain strings, env templates (`${VAR}`), and `source: "env"` SecretRefs are resolved. `source: "file"` and `source: "exec"` SecretRefs produce a warning — add those keys manually after migration. - -## Conflict Handling - -By default, the migration **will not overwrite** existing Hermes data: - -- **SOUL.md** — skipped if one already exists in `~/.hermes/` -- **Memory entries** — skipped if memories already exist (to avoid duplicates) -- **Skills** — skipped if a skill with the same name already exists -- **API keys** — skipped if the key is already set in `~/.hermes/.env` - -To overwrite conflicts, use `--overwrite`. The migration creates backups before overwriting. - -For skills, you can also use `--skill-conflict rename` to import conflicting skills under a new name (e.g., `skill-name-imported`). - -## Migration Report - -Every migration produces a report showing: -- **Migrated items** — what was successfully imported -- **Conflicts** — items skipped because they already exist -- **Skipped items** — items not found in the source -- **Errors** — items that failed to import - -For executed migrations, the full report is saved to `~/.hermes/migration/openclaw//`. - -## Post-Migration Notes - -- **Skills require a new session** — imported skills take effect after restarting your agent or starting a new chat. -- **WhatsApp requires re-pairing** — WhatsApp uses QR-code pairing, not token-based auth. Run `hermes whatsapp` to pair. -- **Archive cleanup** — after migration, you'll be offered to rename `~/.openclaw/` to `.openclaw.pre-migration/` to prevent state confusion. You can also run `hermes claw cleanup` later. - -## Troubleshooting - -### "OpenClaw directory not found" -The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moltbot`. If your OpenClaw is installed elsewhere, use `--source`: -```bash -hermes claw migrate --source /path/to/.openclaw -``` - -### "Migration script not found" -The migration script ships with Hermes Agent. If you installed via pip (not git clone), the `optional-skills/` directory may not be present. Install the skill from the Skills Hub: -```bash -hermes skills install openclaw-migration -``` - -### Memory overflow -If your OpenClaw MEMORY.md or USER.md exceeds Hermes' character limits, excess entries are exported to an overflow file in the migration report directory. You can manually review and add the most important ones. - -### API keys not found -Keys might be stored in different places depending on your OpenClaw setup: -- `~/.openclaw/.env` file -- Inline in `openclaw.json` under `models.providers.*.apiKey` -- In `openclaw.json` under the `"env"` or `"env.vars"` sub-objects -- In `~/.openclaw/agents/main/agent/auth-profiles.json` - -The migration checks all four. If keys use `source: "file"` or `source: "exec"` SecretRefs, they can't be resolved automatically — add them via `hermes config set`. diff --git a/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md b/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md deleted file mode 100644 index a75f14ff5a..0000000000 --- a/docs/plans/2026-03-16-pricing-accuracy-architecture-design.md +++ /dev/null @@ -1,608 +0,0 @@ -# Pricing Accuracy Architecture - -Date: 2026-03-16 - -## Goal - -Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path. - -This design replaces the current static, heuristic pricing flow in: - -- `run_agent.py` -- `agent/usage_pricing.py` -- `agent/insights.py` -- `cli.py` - -with a provider-aware pricing system that: - -- handles cache billing correctly -- distinguishes `actual` vs `estimated` vs `included` vs `unknown` -- reconciles post-hoc costs when providers expose authoritative billing data -- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints - -## Problems In The Current Design - -Current Hermes behavior has four structural issues: - -1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately. -2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing. -3. It assumes public API list pricing matches the user's real billing path. -4. It has no distinction between live estimates and reconciled billed cost. - -## Design Principles - -1. Normalize usage before pricing. -2. Never fold cached tokens into plain input cost. -3. Track certainty explicitly. -4. Treat the billing path as part of the model identity. -5. Prefer official machine-readable sources over scraped docs. -6. Use post-hoc provider cost APIs when available. -7. Show `n/a` rather than inventing precision. - -## High-Level Architecture - -The new system has four layers: - -1. `usage_normalization` - Converts raw provider usage into a canonical usage record. -2. `pricing_source_resolution` - Determines the billing path, source of truth, and applicable pricing source. -3. `cost_estimation_and_reconciliation` - Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later. -4. `presentation` - `/usage`, `/insights`, and the status bar display cost with certainty metadata. - -## Canonical Usage Record - -Add a canonical usage model that every provider path maps into before any pricing math happens. - -Suggested structure: - -```python -@dataclass -class CanonicalUsage: - provider: str - billing_provider: str - model: str - billing_route: str - - input_tokens: int = 0 - output_tokens: int = 0 - cache_read_tokens: int = 0 - cache_write_tokens: int = 0 - reasoning_tokens: int = 0 - request_count: int = 1 - - raw_usage: dict[str, Any] | None = None - raw_usage_fields: dict[str, str] | None = None - computed_fields: set[str] | None = None - - provider_request_id: str | None = None - provider_generation_id: str | None = None - provider_response_id: str | None = None -``` - -Rules: - -- `input_tokens` means non-cached input only. -- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`. -- `output_tokens` excludes cache metrics. -- `reasoning_tokens` is telemetry unless a provider officially bills it separately. - -This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids. - -## Provider Normalization Rules - -### OpenAI Direct - -Source usage fields: - -- `prompt_tokens` -- `completion_tokens` -- `prompt_tokens_details.cached_tokens` - -Normalization: - -- `cache_read_tokens = cached_tokens` -- `input_tokens = prompt_tokens - cached_tokens` -- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route -- `output_tokens = completion_tokens` - -### Anthropic Direct - -Source usage fields: - -- `input_tokens` -- `output_tokens` -- `cache_read_input_tokens` -- `cache_creation_input_tokens` - -Normalization: - -- `input_tokens = input_tokens` -- `output_tokens = output_tokens` -- `cache_read_tokens = cache_read_input_tokens` -- `cache_write_tokens = cache_creation_input_tokens` - -### OpenRouter - -Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible. - -Reconciliation-time records should also store: - -- OpenRouter generation id -- native token fields when available -- `total_cost` -- `cache_discount` -- `upstream_inference_cost` -- `is_byok` - -### Gemini / Vertex - -Use official Gemini or Vertex usage fields where available. - -If cached content tokens are exposed: - -- map them to `cache_read_tokens` - -If a route exposes no cache creation metric: - -- store `cache_write_tokens = 0` -- preserve the raw usage payload for later extension - -### DeepSeek And Other Direct Providers - -Normalize only the fields that are officially exposed. - -If a provider does not expose cache buckets: - -- do not infer them unless the provider explicitly documents how to derive them - -### Subscription / Included-Cost Routes - -These still use the canonical usage model. - -Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists. - -## Billing Route Model - -Hermes must stop keying pricing solely by `model`. - -Introduce a billing route descriptor: - -```python -@dataclass -class BillingRoute: - provider: str - base_url: str | None - model: str - billing_mode: str - organization_hint: str | None = None -``` - -`billing_mode` values: - -- `official_cost_api` -- `official_generation_api` -- `official_models_api` -- `official_docs_snapshot` -- `subscription_included` -- `user_override` -- `custom_contract` -- `unknown` - -Examples: - -- OpenAI direct API with Costs API access: `official_cost_api` -- Anthropic direct API with Usage & Cost API access: `official_cost_api` -- OpenRouter request before reconciliation: `official_models_api` -- OpenRouter request after generation lookup: `official_generation_api` -- GitHub Copilot style subscription route: `subscription_included` -- local OpenAI-compatible server: `unknown` -- enterprise contract with configured rates: `custom_contract` - -## Cost Status Model - -Every displayed cost should have: - -```python -@dataclass -class CostResult: - amount_usd: Decimal | None - status: Literal["actual", "estimated", "included", "unknown"] - source: Literal[ - "provider_cost_api", - "provider_generation_api", - "provider_models_api", - "official_docs_snapshot", - "user_override", - "custom_contract", - "none", - ] - label: str - fetched_at: datetime | None - pricing_version: str | None - notes: list[str] -``` - -Presentation rules: - -- `actual`: show dollar amount as final -- `estimated`: show dollar amount with estimate labeling -- `included`: show `included` or `$0.00 (included)` depending on UX choice -- `unknown`: show `n/a` - -## Official Source Hierarchy - -Resolve cost using this order: - -1. Request-level or account-level official billed cost -2. Official machine-readable model pricing -3. Official docs snapshot -4. User override or custom contract -5. Unknown - -The system must never skip to a lower level if a higher-confidence source exists for the current billing route. - -## Provider-Specific Truth Rules - -### OpenAI Direct - -Preferred truth: - -1. Costs API for reconciled spend -2. Official pricing page for live estimate - -### Anthropic Direct - -Preferred truth: - -1. Usage & Cost API for reconciled spend -2. Official pricing docs for live estimate - -### OpenRouter - -Preferred truth: - -1. `GET /api/v1/generation` for reconciled `total_cost` -2. `GET /api/v1/models` pricing for live estimate - -Do not use underlying provider public pricing as the source of truth for OpenRouter billing. - -### Gemini / Vertex - -Preferred truth: - -1. official billing export or billing API for reconciled spend when available for the route -2. official pricing docs for estimate - -### DeepSeek - -Preferred truth: - -1. official machine-readable cost source if available in the future -2. official pricing docs snapshot today - -### Subscription-Included Routes - -Preferred truth: - -1. explicit route config marking the model as included in subscription - -These should display `included`, not an API list-price estimate. - -### Custom Endpoint / Local Model - -Preferred truth: - -1. user override -2. custom contract config -3. unknown - -These should default to `unknown`. - -## Pricing Catalog - -Replace the current `MODEL_PRICING` dict with a richer pricing catalog. - -Suggested record: - -```python -@dataclass -class PricingEntry: - provider: str - route_pattern: str - model_pattern: str - - input_cost_per_million: Decimal | None = None - output_cost_per_million: Decimal | None = None - cache_read_cost_per_million: Decimal | None = None - cache_write_cost_per_million: Decimal | None = None - request_cost: Decimal | None = None - image_cost: Decimal | None = None - - source: str = "official_docs_snapshot" - source_url: str | None = None - fetched_at: datetime | None = None - pricing_version: str | None = None -``` - -The catalog should be route-aware: - -- `openai:gpt-5` -- `anthropic:claude-opus-4-6` -- `openrouter:anthropic/claude-opus-4.6` -- `copilot:gpt-4o` - -This avoids conflating direct-provider billing with aggregator billing. - -## Pricing Sync Architecture - -Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table. - -Suggested modules: - -- `agent/pricing/catalog.py` -- `agent/pricing/sources.py` -- `agent/pricing/sync.py` -- `agent/pricing/reconcile.py` -- `agent/pricing/types.py` - -### Sync Sources - -- OpenRouter models API -- official provider docs snapshots where no API exists -- user overrides from config - -### Sync Output - -Cache pricing entries locally with: - -- source URL -- fetch timestamp -- version/hash -- confidence/source type - -### Sync Frequency - -- startup warm cache -- background refresh every 6 to 24 hours depending on source -- manual `hermes pricing sync` - -## Reconciliation Architecture - -Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost. - -Suggested flow: - -1. Agent call completes. -2. Hermes stores canonical usage plus reconciliation ids. -3. Hermes computes an immediate estimate if a pricing source exists. -4. A reconciliation worker fetches actual cost when supported. -5. Session and message records are updated with `actual` cost. - -This can run: - -- inline for cheap lookups -- asynchronously for delayed provider accounting - -## Persistence Changes - -Session storage should stop storing only aggregate prompt/completion totals. - -Add fields for both usage and cost certainty: - -- `input_tokens` -- `output_tokens` -- `cache_read_tokens` -- `cache_write_tokens` -- `reasoning_tokens` -- `estimated_cost_usd` -- `actual_cost_usd` -- `cost_status` -- `cost_source` -- `pricing_version` -- `billing_provider` -- `billing_mode` - -If schema expansion is too large for one PR, add a new pricing events table: - -```text -session_cost_events - id - session_id - request_id - provider - model - billing_mode - input_tokens - output_tokens - cache_read_tokens - cache_write_tokens - estimated_cost_usd - actual_cost_usd - cost_status - cost_source - pricing_version - created_at - updated_at -``` - -## Hermes Touchpoints - -### `run_agent.py` - -Current responsibility: - -- parse raw provider usage -- update session token counters - -New responsibility: - -- build `CanonicalUsage` -- update canonical counters -- store reconciliation ids -- emit usage event to pricing subsystem - -### `agent/usage_pricing.py` - -Current responsibility: - -- static lookup table -- direct cost arithmetic - -New responsibility: - -- move or replace with pricing catalog facade -- no fuzzy model-family heuristics -- no direct pricing without billing-route context - -### `cli.py` - -Current responsibility: - -- compute session cost directly from prompt/completion totals - -New responsibility: - -- display `CostResult` -- show status badges: - - `actual` - - `estimated` - - `included` - - `n/a` - -### `agent/insights.py` - -Current responsibility: - -- recompute historical estimates from static pricing - -New responsibility: - -- aggregate stored pricing events -- prefer actual cost over estimate -- surface estimates only when reconciliation is unavailable - -## UX Rules - -### Status Bar - -Show one of: - -- `$1.42` -- `~$1.42` -- `included` -- `cost n/a` - -Where: - -- `$1.42` means `actual` -- `~$1.42` means `estimated` -- `included` means subscription-backed or explicitly zero-cost route -- `cost n/a` means unknown - -### `/usage` - -Show: - -- token buckets -- estimated cost -- actual cost if available -- cost status -- pricing source - -### `/insights` - -Aggregate: - -- actual cost totals -- estimated-only totals -- unknown-cost sessions count -- included-cost sessions count - -## Config And Overrides - -Add user-configurable pricing overrides in config: - -```yaml -pricing: - mode: hybrid - sync_on_startup: true - sync_interval_hours: 12 - overrides: - - provider: openrouter - model: anthropic/claude-opus-4.6 - billing_mode: custom_contract - input_cost_per_million: 4.25 - output_cost_per_million: 22.0 - cache_read_cost_per_million: 0.5 - cache_write_cost_per_million: 6.0 - included_routes: - - provider: copilot - model: "*" - - provider: codex-subscription - model: "*" -``` - -Overrides must win over catalog defaults for the matching billing route. - -## Rollout Plan - -### Phase 1 - -- add canonical usage model -- split cache token buckets in `run_agent.py` -- stop pricing cache-inflated prompt totals -- preserve current UI with improved backend math - -### Phase 2 - -- add route-aware pricing catalog -- integrate OpenRouter models API sync -- add `estimated` vs `included` vs `unknown` - -### Phase 3 - -- add reconciliation for OpenRouter generation cost -- add actual cost persistence -- update `/insights` to prefer actual cost - -### Phase 4 - -- add direct OpenAI and Anthropic reconciliation paths -- add user overrides and contract pricing -- add pricing sync CLI command - -## Testing Strategy - -Add tests for: - -- OpenAI cached token subtraction -- Anthropic cache read/write separation -- OpenRouter estimated vs actual reconciliation -- subscription-backed models showing `included` -- custom endpoints showing `n/a` -- override precedence -- stale catalog fallback behavior - -Current tests that assume heuristic pricing should be replaced with route-aware expectations. - -## Non-Goals - -- exact enterprise billing reconstruction without an official source or user override -- backfilling perfect historical cost for old sessions that lack cache bucket data -- scraping arbitrary provider web pages at request time - -## Recommendation - -Do not expand the existing `MODEL_PRICING` dict. - -That path cannot satisfy the product requirement. Hermes should instead migrate to: - -- canonical usage normalization -- route-aware pricing sources -- estimate-then-reconcile cost lifecycle -- explicit certainty states in the UI - -This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible. diff --git a/docs/plans/2026-04-01-ink-gateway-tui-migration-plan.md b/docs/plans/2026-04-01-ink-gateway-tui-migration-plan.md deleted file mode 100644 index 0210a878cb..0000000000 --- a/docs/plans/2026-04-01-ink-gateway-tui-migration-plan.md +++ /dev/null @@ -1,108 +0,0 @@ -# Ink Gateway TUI Migration — Post-mortem - -Planned: 2026-04-01 · Delivered: 2026-04 · Status: shipped, classic (prompt_toolkit) CLI still present - -## What Shipped - -Three layers, same repo, Python runtime unchanged. - -``` -ui-tui (Node/TS) ──stdio JSON-RPC──▶ tui_gateway (Py) ──▶ AIAgent (run_agent.py) -``` - -### Backend — `tui_gateway/` - -``` -tui_gateway/ -├── entry.py # subprocess entrypoint, stdio read/write loop -├── server.py # everything: sessions dict, @method handlers, _emit -├── render.py # stream renderer, diff rendering, message rendering -├── slash_worker.py # subprocess that runs hermes_cli slash commands -└── __init__.py -``` - -`server.py` owns the full runtime-control surface: session store (`_sessions: dict[str, dict]`), method registry (`@method("…")` decorator), event emitter (`_emit`), agent lifecycle (`_make_agent`, `_init_session`, `_wire_callbacks`), approval/sudo/clarify round-trips, and JSON-RPC dispatch. - -Protocol methods (`@method(...)` in `server.py`): - -- session: `session.{create, resume, list, close, interrupt, usage, history, compress, branch, title, save, undo}` -- prompt: `prompt.{submit, background, btw}` -- tools: `tools.{list, show, configure}` -- slash: `slash.exec`, `command.{dispatch, resolve}`, `commands.catalog`, `complete.{path, slash}` -- approvals: `approval.respond`, `sudo.respond`, `clarify.respond`, `secret.respond` -- config/state: `config.{get, set, show}`, `model.options`, `reload.mcp` -- ops: `shell.exec`, `cli.exec`, `terminal.resize`, `input.detect_drop`, `clipboard.paste`, `paste.collapse`, `image.attach`, `process.stop` -- misc: `agents.list`, `skills.manage`, `plugins.list`, `cron.manage`, `insights.get`, `rollback.{list, diff, restore}`, `browser.manage` - -Protocol events (`_emit(…)` → handled in `ui-tui/src/app/createGatewayEventHandler.ts`): - -- lifecycle: `gateway.{ready, stderr}`, `session.info`, `skin.changed` -- stream: `message.{start, delta, complete}`, `thinking.delta`, `reasoning.{delta, available}`, `status.update` -- tools: `tool.{start, progress, complete, generating}`, `subagent.{start, thinking, tool, progress, complete}` -- interactive: `approval.request`, `sudo.request`, `clarify.request`, `secret.request` -- async: `background.complete`, `btw.complete`, `error` - -### Frontend — `ui-tui/src/` - -``` -src/ -├── entry.tsx # node bootstrap: bootBanner → spawn python → dynamic-import Ink → render() -├── app.tsx # wraps -├── bootBanner.ts # raw-ANSI banner to stdout in ~2ms, pre-React -├── gatewayClient.ts # JSON-RPC client over child_process stdio -├── gatewayTypes.ts # typed RPC responses + GatewayEvent union -├── theme.ts # DEFAULT_THEME + fromSkin -│ -├── app/ # hooks + stores — the orchestration layer -│ ├── uiStore.ts # nanostore: sid, info, busy, usage, theme, status… -│ ├── turnStore.ts # nanostore: per-turn activity / reasoning / tools -│ ├── turnController.ts # imperative singleton for stream-time operations -│ ├── overlayStore.ts # nanostore: modal/overlay state -│ ├── useMainApp.ts # top-level composition hook -│ ├── useSessionLifecycle.ts # session.create/resume/close/reset -│ ├── useSubmission.ts # shell/slash/prompt dispatch + interpolation -│ ├── useConfigSync.ts # config.get + mtime poll -│ ├── useComposerState.ts # input buffer, paste snippets, editor mode -│ ├── useInputHandlers.ts # key bindings -│ ├── createGatewayEventHandler.ts # event-stream dispatcher -│ ├── createSlashHandler.ts # slash command router (registry + python fallback) -│ └── slash/commands/ # core.ts, ops.ts, session.ts — TS-owned slash commands -│ -├── components/ # AppLayout, AppChrome, AppOverlays, MessageLine, Thinking, Markdown, pickers, prompts, Banner, SessionPanel -├── config/ # env, limits, timing constants -├── content/ # charms, faces, fortunes, hotkeys, placeholders, verbs -├── domain/ # details, messages, paths, roles, slash, usage, viewport -├── protocol/ # interpolation, paste regex -├── hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory -└── lib/ # history, messages, osc52, rpc, text -``` - -### CLI entry points — `hermes_cli/main.py` - -- `hermes --tui` → `node dist/entry.js` (auto-builds when `.ts`/`.tsx` newer than `dist/entry.js`) -- `hermes --tui --dev` → `tsx src/entry.tsx` (skip build) -- `HERMES_TUI_DIR=…` → external prebuilt dist (nix, distro packaging) - -## Diverged From Original Plan - -| Plan | Reality | Why | -|---|---|---| -| `tui_gateway/{controller,session_state,events,protocol}.py` | all collapsed into `server.py` | no second consumer ever emerged, keeping one file cheaper than four | -| `ui-tui/src/main.tsx` | split into `entry.tsx` (bootstrap) + `app.tsx` (shell) | boot banner + early python spawn wanted a pre-React moment | -| `ui-tui/src/state/store.ts` | three nanostores (`uiStore`, `turnStore`, `overlayStore`) | separate lifetimes: ui persists, turn resets per reply, overlay is modal | -| `approval.requested` / `sudo.requested` / `clarify.requested` | `*.request` (no `-ed`) | cosmetic | -| `session.cancel` | dropped | `session.interrupt` covers it | -| `HERMES_EXPERIMENTAL_TUI=1`, `display.experimental_tui: true`, `/tui on/off/status` | none shipped | `--tui` went from opt-in to first-class without an experimental phase | - -## Post-migration Additions (not in original plan) - -- **Async `session.create`** — returns sid in ~1ms, agent builds on a background thread, `session.info` broadcasts when ready; `_wait_agent()` gates every agent-touching handler via `_sess` -- **`bootBanner`** — raw-ANSI logo painted to stdout at T≈2ms, before Ink loads; `` wipes it seamlessly when React mounts -- **Selection uniform bg** — `theme.color.selectionBg` wired via `useSelection().setSelectionBgColor`; replaces SGR-inverse per-cell swap that fragmented over amber/gold fg -- **Slash command registry** — TS-owned commands in `app/slash/commands/{core,ops,session}.ts`, everything else falls through to `slash.exec` (python worker) -- **Turn store + controller split** — imperative singleton (`turnController`) holds refs/timers, nanostore (`turnStore`) holds render-visible state - -## What's Still Open - -- **Classic CLI not deleted.** `cli.py` still has ~80 `prompt_toolkit` references; classic REPL is still the default when `--tui` is absent. The original plan's "Cut 4 · prompt_toolkit removal later" hasn't happened. -- **No config-file opt-in.** `HERMES_EXPERIMENTAL_TUI` and `display.experimental_tui` were never built; only the CLI flag exists. Fine for now — if we want "default to TUI", a single line in `main.py` flips it. diff --git a/docs/skins/example-skin.yaml b/docs/skins/example-skin.yaml deleted file mode 100644 index fb0be89da6..0000000000 --- a/docs/skins/example-skin.yaml +++ /dev/null @@ -1,106 +0,0 @@ -# ============================================================================ -# Hermes Agent — Example Skin Template -# ============================================================================ -# -# Copy this file to ~/.hermes/skins/.yaml to create a custom skin. -# All fields are optional — missing values inherit from the default skin. -# Activate with: /skin or display.skin: in config.yaml -# -# Keys are marked: -# (both) — applies to both the classic CLI and the TUI -# (classic) — classic CLI only (see hermes --tui in user-guide/tui.md) -# (tui) — TUI only -# -# See hermes_cli/skin_engine.py for the full schema reference. -# ============================================================================ - -# Required: unique skin name (used in /skin command and config) -name: example -description: An example custom skin — copy and modify this template - -# ── Colors ────────────────────────────────────────────────────────────────── -# Hex color values. These control the visual palette. -colors: - # Banner panel (the startup welcome box) — (both) - banner_border: "#CD7F32" # Panel border - banner_title: "#FFD700" # Panel title text - banner_accent: "#FFBF00" # Section headers (Available Tools, Skills, etc.) - banner_dim: "#B8860B" # Dim/muted text (separators, model info) - banner_text: "#FFF8DC" # Body text (tool names, skill names) - - # UI elements — (both) - ui_accent: "#FFBF00" # General accent (falls back to banner_accent) - ui_label: "#4dd0e1" # Labels - ui_ok: "#4caf50" # Success indicators - ui_error: "#ef5350" # Error indicators - ui_warn: "#ffa726" # Warning indicators - - # Input area - prompt: "#FFF8DC" # Prompt text / `❯` glyph color (both) - input_rule: "#CD7F32" # Horizontal rule above input (classic) - - # Response box — (classic) - response_border: "#FFD700" # Response box border - - # Session display — (both) - session_label: "#DAA520" # "Session: " label - session_border: "#8B8682" # Session ID text - - # TUI / CLI surfaces — (classic: status bar, voice badge, completion meta) - status_bar_bg: "#1a1a2e" # Status / usage bar background (classic) - voice_status_bg: "#1a1a2e" # Voice-mode badge background (classic) - completion_menu_bg: "#1a1a2e" # Completion list background (both) - completion_menu_current_bg: "#333355" # Active completion row background (both) - completion_menu_meta_bg: "#1a1a2e" # Completion meta column bg (classic) - completion_menu_meta_current_bg: "#333355" # Active meta bg (classic) - - # Drag-to-select background — (tui) - selection_bg: "#3a3a55" # Uniform selection highlight in the TUI - -# ── Spinner ───────────────────────────────────────────────────────────────── -# (classic) — the TUI uses its own animated indicators; spinner config here -# is only read by the classic prompt_toolkit CLI. -spinner: - # Faces shown while waiting for the API response - waiting_faces: - - "(。◕‿◕。)" - - "(◕‿◕✿)" - - "٩(◕‿◕。)۶" - - # Faces shown during extended thinking/reasoning - thinking_faces: - - "(。•́︿•̀。)" - - "(◔_◔)" - - "(¬‿¬)" - - # Verbs used in spinner messages (e.g., "pondering your request...") - thinking_verbs: - - "pondering" - - "contemplating" - - "musing" - - "ruminating" - - # Optional: left/right decorations around the spinner - # Each entry is a [left, right] pair. Omit entirely for no wings. - # wings: - # - ["⟪⚔", "⚔⟫"] - # - ["⟪▲", "▲⟫"] - -# ── Branding ──────────────────────────────────────────────────────────────── -# Text strings used throughout the interface. -branding: - agent_name: "Hermes Agent" # (both) Banner title, about display - welcome: "Welcome! Type your message or /help for commands." # (both) - goodbye: "Goodbye! ⚕" # (both) Exit message - response_label: " ⚕ Hermes " # (classic) Response box header label - prompt_symbol: "❯ " # (both) Input prompt glyph - help_header: "(^_^)? Available Commands" # (both) /help overlay title - -# ── Tool Output ───────────────────────────────────────────────────────────── -# Character used as the prefix for tool output lines. (both) -# Default is "┊" (thin dotted vertical line). Some alternatives: -# "╎" (light triple dash vertical) -# "▏" (left one-eighth block) -# "│" (box drawing light vertical) -# "┃" (box drawing heavy vertical) -tool_prefix: "┊" diff --git a/docs/specs/container-cli-review-fixes.md b/docs/specs/container-cli-review-fixes.md deleted file mode 100644 index 0eb9070dbf..0000000000 --- a/docs/specs/container-cli-review-fixes.md +++ /dev/null @@ -1,329 +0,0 @@ -# Container-Aware CLI Review Fixes Spec - -**PR:** NousResearch/hermes-agent#7543 -**Review:** cursor[bot] bugbot review (4094049442) + two prior rounds -**Date:** 2026-04-12 -**Branch:** `feat/container-aware-cli-clean` - -## Review Issues Summary - -Six issues were raised across three bugbot review rounds. Three were fixed in intermediate commits (38277a6a, 726cf90f). This spec addresses remaining design concerns surfaced by those reviews and simplifies the implementation based on interview decisions. - -| # | Issue | Severity | Status | -|---|-------|----------|--------| -| 1 | `os.execvp` retry loop unreachable | Medium | Fixed in 79e8cd12 (switched to subprocess.run) | -| 2 | Redundant `shutil.which("sudo")` | Medium | Fixed in 38277a6a (reuses `sudo` var) | -| 3 | Missing `chown -h` on symlink update | Low | Fixed in 38277a6a | -| 4 | Container routing after `parse_args()` | High | Fixed in 726cf90f | -| 5 | Hardcoded `/home/${user}` | Medium | Fixed in 726cf90f | -| 6 | Group membership not gated on `container.enable` | Low | Fixed in 726cf90f | - -The mechanical fixes are in place but the overall design needs revision. The retry loop, error swallowing, and process model have deeper issues than what the bugbot flagged. - ---- - -## Spec: Revised `_exec_in_container` - -### Design Principles - -1. **Let it crash.** No silent fallbacks. If `.container-mode` exists but something goes wrong, the error propagates naturally (Python traceback). The only case where container routing is skipped is when `.container-mode` doesn't exist or `HERMES_DEV=1`. -2. **No retries.** Probe once for sudo, exec once. If it fails, docker/podman's stderr reaches the user verbatim. -3. **Completely transparent.** No error wrapping, no prefixes, no spinners. Docker's output goes straight through. -4. **`os.execvp` on the happy path.** Replace the Python process entirely so there's no idle parent during interactive sessions. Note: `execvp` never returns on success (process is replaced) and raises `OSError` on failure (it does not return a value). The container process's exit code becomes the process exit code by definition — no explicit propagation needed. -5. **One human-readable exception to "let it crash".** `subprocess.TimeoutExpired` from the sudo probe gets a specific catch with a readable message, since a raw traceback for "your Docker daemon is slow" is confusing. All other exceptions propagate naturally. - -### Execution Flow - -``` -1. get_container_exec_info() - - HERMES_DEV=1 → return None (skip routing) - - Inside container → return None (skip routing) - - .container-mode doesn't exist → return None (skip routing) - - .container-mode exists → parse and return dict - - .container-mode exists but malformed/unreadable → LET IT CRASH (no try/except) - -2. _exec_in_container(container_info, sys.argv[1:]) - a. shutil.which(backend) → if None, print "{backend} not found on PATH" and sys.exit(1) - b. Sudo probe: subprocess.run([runtime, "inspect", "--format", "ok", container_name], timeout=15) - - If succeeds → needs_sudo = False - - If fails → try subprocess.run([sudo, "-n", runtime, "inspect", ...], timeout=15) - - If succeeds → needs_sudo = True - - If fails → print error with sudoers hint (including why -n is required) and sys.exit(1) - - If TimeoutExpired → catch specifically, print human-readable message about slow daemon - c. Build exec_cmd: [sudo? + runtime, "exec", tty_flags, "-u", exec_user, env_flags, container, hermes_bin, *cli_args] - d. os.execvp(exec_cmd[0], exec_cmd) - - On success: process is replaced — Python is gone, container exit code IS the process exit code - - On OSError: let it crash (natural traceback) -``` - -### Changes to `hermes_cli/main.py` - -#### `_exec_in_container` — rewrite - -Remove: -- The entire retry loop (`max_retries`, `for attempt in range(...)`) -- Spinner logic (`"Waiting for container..."`, dots) -- Exit code classification (125/126/127 handling) -- `subprocess.run` for the exec call (keep it only for the sudo probe) -- Special TTY vs non-TTY retry counts -- The `time` import (no longer needed) - -Change: -- Use `os.execvp(exec_cmd[0], exec_cmd)` as the final call -- Keep the `subprocess` import only for the sudo probe -- Keep TTY detection for the `-it` vs `-i` flag -- Keep env var forwarding (TERM, COLORTERM, LANG, LC_ALL) -- Keep the sudo probe as-is (it's the one "smart" part) -- Bump probe `timeout` from 5s to 15s — cold podman on a loaded machine needs headroom -- Catch `subprocess.TimeoutExpired` specifically on both probe calls — print a readable message about the daemon being unresponsive instead of a raw traceback -- Expand the sudoers hint error message to explain *why* `-n` (non-interactive) is required: a password prompt would hang the CLI or break piped commands - -The function becomes roughly: - -```python -def _exec_in_container(container_info: dict, cli_args: list): - """Replace the current process with a command inside the managed container. - - Probes whether sudo is needed (rootful containers), then os.execvp - into the container. If exec fails, the OS error propagates naturally. - """ - import shutil - import subprocess - - backend = container_info["backend"] - container_name = container_info["container_name"] - exec_user = container_info["exec_user"] - hermes_bin = container_info["hermes_bin"] - - runtime = shutil.which(backend) - if not runtime: - print(f"Error: {backend} not found on PATH. Cannot route to container.", - file=sys.stderr) - sys.exit(1) - - # Probe whether we need sudo to see the rootful container. - # Timeout is 15s — cold podman on a loaded machine can take a while. - # TimeoutExpired is caught specifically for a human-readable message; - # all other exceptions propagate naturally. - needs_sudo = False - sudo = None - try: - probe = subprocess.run( - [runtime, "inspect", "--format", "ok", container_name], - capture_output=True, text=True, timeout=15, - ) - except subprocess.TimeoutExpired: - print( - f"Error: timed out waiting for {backend} to respond.\n" - f"The {backend} daemon may be unresponsive or starting up.", - file=sys.stderr, - ) - sys.exit(1) - - if probe.returncode != 0: - sudo = shutil.which("sudo") - if sudo: - try: - probe2 = subprocess.run( - [sudo, "-n", runtime, "inspect", "--format", "ok", container_name], - capture_output=True, text=True, timeout=15, - ) - except subprocess.TimeoutExpired: - print( - f"Error: timed out waiting for sudo {backend} to respond.", - file=sys.stderr, - ) - sys.exit(1) - - if probe2.returncode == 0: - needs_sudo = True - else: - print( - f"Error: container '{container_name}' not found via {backend}.\n" - f"\n" - f"The NixOS service runs the container as root. Your user cannot\n" - f"see it because {backend} uses per-user namespaces.\n" - f"\n" - f"Fix: grant passwordless sudo for {backend}. The -n (non-interactive)\n" - f"flag is required because the CLI calls sudo non-interactively —\n" - f"a password prompt would hang or break piped commands:\n" - f"\n" - f' security.sudo.extraRules = [{{\n' - f' users = [ "{os.getenv("USER", "your-user")}" ];\n' - f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n' - f' }}];\n' - f"\n" - f"Or run: sudo hermes {' '.join(cli_args)}", - file=sys.stderr, - ) - sys.exit(1) - else: - print( - f"Error: container '{container_name}' not found via {backend}.\n" - f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}", - file=sys.stderr, - ) - sys.exit(1) - - is_tty = sys.stdin.isatty() - tty_flags = ["-it"] if is_tty else ["-i"] - - env_flags = [] - for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"): - val = os.environ.get(var) - if val: - env_flags.extend(["-e", f"{var}={val}"]) - - cmd_prefix = [sudo, "-n", runtime] if needs_sudo else [runtime] - exec_cmd = ( - cmd_prefix + ["exec"] - + tty_flags - + ["-u", exec_user] - + env_flags - + [container_name, hermes_bin] - + cli_args - ) - - # execvp replaces this process entirely — it never returns on success. - # On failure it raises OSError, which propagates naturally. - os.execvp(exec_cmd[0], exec_cmd) -``` - -#### Container routing call site in `main()` — remove try/except - -Current: -```python -try: - from hermes_cli.config import get_container_exec_info - container_info = get_container_exec_info() - if container_info: - _exec_in_container(container_info, sys.argv[1:]) - sys.exit(1) # exec failed if we reach here -except SystemExit: - raise -except Exception: - pass # Container routing unavailable, proceed locally -``` - -Revised: -```python -from hermes_cli.config import get_container_exec_info -container_info = get_container_exec_info() -if container_info: - _exec_in_container(container_info, sys.argv[1:]) - # Unreachable: os.execvp never returns on success (process is replaced) - # and raises OSError on failure (which propagates as a traceback). - # This line exists only as a defensive assertion. - sys.exit(1) -``` - -No try/except. If `.container-mode` doesn't exist, `get_container_exec_info()` returns `None` and we skip routing. If it exists but is broken, the exception propagates with a natural traceback. - -Note: `sys.exit(1)` after `_exec_in_container` is dead code in all paths — `os.execvp` either replaces the process or raises. It's kept as a belt-and-suspenders assertion with a comment marking it unreachable, not as actual error handling. - -### Changes to `hermes_cli/config.py` - -#### `get_container_exec_info` — remove inner try/except - -Current code catches `(OSError, IOError)` and returns `None`. This silently hides permission errors, corrupt files, etc. - -Change: Remove the try/except around file reading. Keep the early returns for `HERMES_DEV=1` and `_is_inside_container()`. The `FileNotFoundError` from `open()` when `.container-mode` doesn't exist should still return `None` (this is the "container mode not enabled" case). All other exceptions propagate. - -```python -def get_container_exec_info() -> Optional[dict]: - if os.environ.get("HERMES_DEV") == "1": - return None - if _is_inside_container(): - return None - - container_mode_file = get_hermes_home() / ".container-mode" - - try: - with open(container_mode_file, "r") as f: - # ... parse key=value lines ... - except FileNotFoundError: - return None - # All other exceptions (PermissionError, malformed data, etc.) propagate - - return { ... } -``` - ---- - -## Spec: NixOS Module Changes - -### Symlink creation — simplify to two branches - -Current: 4 branches (symlink exists, directory exists, other file, doesn't exist). - -Revised: 2 branches. - -```bash -if [ -d "${symlinkPath}" ] && [ ! -L "${symlinkPath}" ]; then - # Real directory — back it up, then create symlink - _backup="${symlinkPath}.bak.$(date +%s)" - echo "hermes-agent: backing up existing ${symlinkPath} to $_backup" - mv "${symlinkPath}" "$_backup" -fi -# For everything else (symlink, doesn't exist, etc.) — just force-create -ln -sfn "${target}" "${symlinkPath}" -chown -h ${user}:${cfg.group} "${symlinkPath}" -``` - -`ln -sfn` handles: existing symlink (replaces), doesn't exist (creates), and after the `mv` above (creates). The only case that needs special handling is a real directory, because `ln -sfn` cannot atomically replace a directory. - -Note: there is a theoretical race between the `[ -d ... ]` check and the `mv` (something could create/remove the directory in between). In practice this is a NixOS activation script running as root during `nixos-rebuild switch` — no other process should be touching `~/.hermes` at that moment. Not worth adding locking for. - -### Sudoers — document, don't auto-configure - -Do NOT add `security.sudo.extraRules` to the module. Document the sudoers requirement in the module's description/comments and in the error message the CLI prints when sudo probe fails. - -### Group membership gating — keep as-is - -The fix in 726cf90f (`cfg.container.enable && cfg.container.hostUsers != []`) is correct. Leftover group membership when container mode is disabled is harmless. No cleanup needed. - ---- - -## Spec: Test Rewrite - -The existing test file (`tests/hermes_cli/test_container_aware_cli.py`) has 16 tests. With the simplified exec model, several are obsolete. - -### Tests to keep (update as needed) - -- `test_is_inside_container_dockerenv` — unchanged -- `test_is_inside_container_containerenv` — unchanged -- `test_is_inside_container_cgroup_docker` — unchanged -- `test_is_inside_container_false_on_host` — unchanged -- `test_get_container_exec_info_returns_metadata` — unchanged -- `test_get_container_exec_info_none_inside_container` — unchanged -- `test_get_container_exec_info_none_without_file` — unchanged -- `test_get_container_exec_info_skipped_when_hermes_dev` — unchanged -- `test_get_container_exec_info_not_skipped_when_hermes_dev_zero` — unchanged -- `test_get_container_exec_info_defaults` — unchanged -- `test_get_container_exec_info_docker_backend` — unchanged - -### Tests to add - -- `test_get_container_exec_info_crashes_on_permission_error` — verify that `PermissionError` propagates (no silent `None` return) -- `test_exec_in_container_calls_execvp` — verify `os.execvp` is called with correct args (runtime, tty flags, user, env, container, binary, cli args) -- `test_exec_in_container_sudo_probe_sets_prefix` — verify that when first probe fails and sudo probe succeeds, `os.execvp` is called with `sudo -n` prefix -- `test_exec_in_container_no_runtime_hard_fails` — keep existing, verify `sys.exit(1)` when `shutil.which` returns None -- `test_exec_in_container_non_tty_uses_i_only` — update to check `os.execvp` args instead of `subprocess.run` args -- `test_exec_in_container_probe_timeout_prints_message` — verify that `subprocess.TimeoutExpired` from the probe produces a human-readable error and `sys.exit(1)`, not a raw traceback -- `test_exec_in_container_container_not_running_no_sudo` — verify the path where runtime exists (`shutil.which` returns a path) but probe returns non-zero and no sudo is available. Should print the "container may be running under root" error. This is distinct from `no_runtime_hard_fails` which covers `shutil.which` returning None. - -### Tests to delete - -- `test_exec_in_container_tty_retries_on_container_failure` — retry loop removed -- `test_exec_in_container_non_tty_retries_silently_exits_126` — retry loop removed -- `test_exec_in_container_propagates_hermes_exit_code` — no subprocess.run to check exit codes; execvp replaces the process. Note: exit code propagation still works correctly — when `os.execvp` succeeds, the container's process *becomes* this process, so its exit code is the process exit code by OS semantics. No application code needed, no test needed. A comment in the function docstring documents this intent for future readers. - ---- - -## Out of Scope - -- Auto-configuring sudoers rules in the NixOS module -- Any changes to `get_container_exec_info` parsing logic beyond the try/except narrowing -- Changes to `.container-mode` file format -- Changes to the `HERMES_DEV=1` bypass -- Changes to container detection logic (`_is_inside_container`) diff --git a/environments/tool_context.py b/environments/tool_context.py index 10f537d724..550c5e851c 100644 --- a/environments/tool_context.py +++ b/environments/tool_context.py @@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) try: loop = asyncio.get_running_loop() # We're in an async context -- need to run in thread - import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: future = pool.submit( handle_function_call, tool_name, arguments, task_id diff --git a/gateway/config.py b/gateway/config.py index 2d74073234..67ebf73461 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -576,6 +576,14 @@ def load_gateway_config() -> GatewayConfig: bridged["free_response_channels"] = platform_cfg["free_response_channels"] if "mention_patterns" in platform_cfg: bridged["mention_patterns"] = platform_cfg["mention_patterns"] + if "dm_policy" in platform_cfg: + bridged["dm_policy"] = platform_cfg["dm_policy"] + if "allow_from" in platform_cfg: + bridged["allow_from"] = platform_cfg["allow_from"] + if "group_policy" in platform_cfg: + bridged["group_policy"] = platform_cfg["group_policy"] + if "group_allow_from" in platform_cfg: + bridged["group_allow_from"] = platform_cfg["group_allow_from"] if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg: bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"] if "channel_prompts" in platform_cfg: @@ -608,6 +616,8 @@ def load_gateway_config() -> GatewayConfig: if isinstance(frc, list): frc = ",".join(str(v) for v in frc) os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc) + if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"): + os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower() # Discord settings → env vars (env vars take precedence) discord_cfg = yaml_cfg.get("discord", {}) @@ -662,8 +672,7 @@ def load_gateway_config() -> GatewayConfig: if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"): os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower() if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"): - import json as _json - os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"]) + os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"]) frc = telegram_cfg.get("free_response_chats") if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"): if isinstance(frc, list): @@ -700,6 +709,20 @@ def load_gateway_config() -> GatewayConfig: if isinstance(frc, list): frc = ",".join(str(v) for v in frc) os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc) + if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"): + os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower() + af = whatsapp_cfg.get("allow_from") + if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"): + if isinstance(af, list): + af = ",".join(str(v) for v in af) + os.environ["WHATSAPP_ALLOWED_USERS"] = str(af) + if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"): + os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower() + gaf = whatsapp_cfg.get("group_allow_from") + if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"): + if isinstance(gaf, list): + gaf = ",".join(str(v) for v in gaf) + os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf) # DingTalk settings → env vars (env vars take precedence) dingtalk_cfg = yaml_cfg.get("dingtalk", {}) @@ -1237,7 +1260,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None: if legacy_home: qq_home = legacy_home qq_home_name_env = "QQ_HOME_CHANNEL_NAME" - import logging logging.getLogger(__name__).warning( "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL " "in your .env for consistency with the platform key." diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 9687472f57..a6b52ff323 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -117,6 +117,160 @@ def _normalize_chat_content( return "" +# Content part type aliases used by the OpenAI Chat Completions and Responses +# APIs. We accept both spellings on input and emit a single canonical internal +# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the +# rest of the agent pipeline already understands. +_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"}) +_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"}) +_FILE_PART_TYPES = frozenset({"file", "input_file"}) + + +def _normalize_multimodal_content(content: Any) -> Any: + """Validate and normalize multimodal content for the API server. + + Returns a plain string when the content is text-only, or a list of + ``{"type": "text"|"image_url", ...}`` parts when images are present. + The output shape is the native OpenAI Chat Completions vision format, + which the agent pipeline accepts verbatim (OpenAI-wire providers) or + converts (``_preprocess_anthropic_content`` for Anthropic). + + Raises ``ValueError`` with an OpenAI-style code on invalid input: + * ``unsupported_content_type`` — file/input_file/file_id parts, or + non-image ``data:`` URLs. + * ``invalid_image_url`` — missing URL or unsupported scheme. + * ``invalid_content_part`` — malformed text/image objects. + + Callers translate the ValueError into a 400 response. + """ + # Scalar passthrough mirrors ``_normalize_chat_content``. + if content is None: + return "" + if isinstance(content, str): + return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content + if not isinstance(content, list): + # Mirror the legacy text-normalizer's fallback so callers that + # pre-existed image support still get a string back. + return _normalize_chat_content(content) + + items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content + normalized_parts: List[Dict[str, Any]] = [] + text_accum_len = 0 + + for part in items: + if isinstance(part, str): + if part: + trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH] + normalized_parts.append({"type": "text", "text": trimmed}) + text_accum_len += len(trimmed) + continue + + if not isinstance(part, dict): + # Ignore unknown scalars for forward compatibility with future + # Responses API additions (e.g. ``refusal``). The same policy + # the text normalizer applies. + continue + + raw_type = part.get("type") + part_type = str(raw_type or "").strip().lower() + + if part_type in _TEXT_PART_TYPES: + text = part.get("text") + if text is None: + continue + if not isinstance(text, str): + text = str(text) + if text: + trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH] + normalized_parts.append({"type": "text", "text": trimmed}) + text_accum_len += len(trimmed) + continue + + if part_type in _IMAGE_PART_TYPES: + detail = part.get("detail") + image_ref = part.get("image_url") + # OpenAI Responses sends ``input_image`` with a top-level + # ``image_url`` string; Chat Completions sends ``image_url`` as + # ``{"url": "...", "detail": "..."}``. Support both. + if isinstance(image_ref, dict): + url_value = image_ref.get("url") + detail = image_ref.get("detail", detail) + else: + url_value = image_ref + if not isinstance(url_value, str) or not url_value.strip(): + raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.") + url_value = url_value.strip() + lowered = url_value.lower() + if lowered.startswith("data:"): + if not lowered.startswith("data:image/") or "," not in url_value: + raise ValueError( + "unsupported_content_type:Only image data URLs are supported. " + "Non-image data payloads are not supported." + ) + elif not (lowered.startswith("http://") or lowered.startswith("https://")): + raise ValueError( + "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs." + ) + image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}} + if detail is not None: + if not isinstance(detail, str) or not detail.strip(): + raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.") + image_part["image_url"]["detail"] = detail.strip() + normalized_parts.append(image_part) + continue + + if part_type in _FILE_PART_TYPES: + raise ValueError( + "unsupported_content_type:Inline image inputs are supported, " + "but uploaded files and document inputs are not supported on this endpoint." + ) + + # Unknown part type — reject explicitly so clients get a clear error + # instead of a silently dropped turn. + raise ValueError( + f"unsupported_content_type:Unsupported content part type {raw_type!r}. " + "Only text and image_url/input_image parts are supported." + ) + + if not normalized_parts: + return "" + + # Text-only: collapse to a plain string so downstream logging/trajectory + # code sees the native shape and prompt caching on text-only turns is + # unaffected. + if all(p.get("type") == "text" for p in normalized_parts): + return "\n".join(p["text"] for p in normalized_parts if p.get("text")) + + return normalized_parts + + +def _content_has_visible_payload(content: Any) -> bool: + """True when content has any text or image attachment. Used to reject empty turns.""" + if isinstance(content, str): + return bool(content.strip()) + if isinstance(content, list): + for part in content: + if isinstance(part, dict): + ptype = str(part.get("type") or "").strip().lower() + if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip(): + return True + if ptype in _IMAGE_PART_TYPES: + return True + return False + + +def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response": + """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response.""" + raw = str(exc) + code, _, message = raw.partition(":") + if not message: + code, message = "invalid_content_part", raw + return web.json_response( + _openai_error(message, code=code, param=param), + status=400, + ) + + def check_api_server_requirements() -> bool: """Check if API server dependencies are available.""" return AIOHTTP_AVAILABLE @@ -169,7 +323,6 @@ class ResponseStore: ).fetchone() if row is None: return None - import time self._conn.execute( "UPDATE responses SET accessed_at = ? WHERE response_id = ?", (time.time(), response_id), @@ -179,7 +332,6 @@ class ResponseStore: def put(self, response_id: str, data: Dict[str, Any]) -> None: """Store a response, evicting the oldest if at capacity.""" - import time self._conn.execute( "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)", (response_id, json.dumps(data, default=str), time.time()), @@ -315,12 +467,12 @@ class _IdempotencyCache: def __init__(self, max_items: int = 1000, ttl_seconds: int = 300): from collections import OrderedDict self._store = OrderedDict() + self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {} self._ttl = ttl_seconds self._max = max_items def _purge(self): - import time as _t - now = _t.time() + now = time.time() expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl] for k in expired: self._store.pop(k, None) @@ -332,11 +484,27 @@ class _IdempotencyCache: item = self._store.get(key) if item and item["fp"] == fingerprint: return item["resp"] - resp = await compute_coro() - import time as _t - self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()} - self._purge() - return resp + + inflight_key = (key, fingerprint) + task = self._inflight.get(inflight_key) + if task is None: + async def _compute_and_store(): + resp = await compute_coro() + import time as _t + self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()} + self._purge() + return resp + + task = asyncio.create_task(_compute_and_store()) + self._inflight[inflight_key] = task + + def _clear_inflight(done_task: "asyncio.Task[Any]") -> None: + if self._inflight.get(inflight_key) is done_task: + self._inflight.pop(inflight_key, None) + + task.add_done_callback(_clear_inflight) + + return await asyncio.shield(task) _idem_cache = _IdempotencyCache() @@ -366,6 +534,30 @@ def _derive_chat_session_id( return f"api-{digest}" +_CRON_AVAILABLE = False +try: + from cron.jobs import ( + list_jobs as _cron_list, + get_job as _cron_get, + create_job as _cron_create, + update_job as _cron_update, + remove_job as _cron_remove, + pause_job as _cron_pause, + resume_job as _cron_resume, + trigger_job as _cron_trigger, + ) + _CRON_AVAILABLE = True +except ImportError: + _cron_list = None + _cron_get = None + _cron_create = None + _cron_update = None + _cron_remove = None + _cron_pause = None + _cron_resume = None + _cron_trigger = None + + class APIServerAdapter(BasePlatformAdapter): """ OpenAI-compatible HTTP API server adapter. @@ -637,26 +829,32 @@ class APIServerAdapter(BasePlatformAdapter): system_prompt = None conversation_messages: List[Dict[str, str]] = [] - for msg in messages: + for idx, msg in enumerate(messages): role = msg.get("role", "") - content = _normalize_chat_content(msg.get("content", "")) + raw_content = msg.get("content", "") if role == "system": - # Accumulate system messages + # System messages don't support images (Anthropic rejects, OpenAI + # text-model systems don't render them). Flatten to text. + content = _normalize_chat_content(raw_content) if system_prompt is None: system_prompt = content else: system_prompt = system_prompt + "\n" + content elif role in ("user", "assistant"): + try: + content = _normalize_multimodal_content(raw_content) + except ValueError as exc: + return _multimodal_validation_error(exc, param=f"messages[{idx}].content") conversation_messages.append({"role": role, "content": content}) # Extract the last user message as the primary input - user_message = "" + user_message: Any = "" history = [] if conversation_messages: user_message = conversation_messages[-1].get("content", "") history = conversation_messages[:-1] - if not user_message: + if not _content_has_visible_payload(user_message): return web.json_response( {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}}, status=400, @@ -1424,16 +1622,19 @@ class APIServerAdapter(BasePlatformAdapter): # No error if conversation doesn't exist yet — it's a new conversation # Normalize input to message list - input_messages: List[Dict[str, str]] = [] + input_messages: List[Dict[str, Any]] = [] if isinstance(raw_input, str): input_messages = [{"role": "user", "content": raw_input}] elif isinstance(raw_input, list): - for item in raw_input: + for idx, item in enumerate(raw_input): if isinstance(item, str): input_messages.append({"role": "user", "content": item}) elif isinstance(item, dict): role = item.get("role", "user") - content = _normalize_chat_content(item.get("content", "")) + try: + content = _normalize_multimodal_content(item.get("content", "")) + except ValueError as exc: + return _multimodal_validation_error(exc, param=f"input[{idx}].content") input_messages.append({"role": role, "content": content}) else: return web.json_response(_openai_error("'input' must be a string or array"), status=400) @@ -1442,7 +1643,7 @@ class APIServerAdapter(BasePlatformAdapter): # This lets stateless clients supply their own history instead of # relying on server-side response chaining via previous_response_id. # Precedence: explicit conversation_history > previous_response_id. - conversation_history: List[Dict[str, str]] = [] + conversation_history: List[Dict[str, Any]] = [] raw_history = body.get("conversation_history") if raw_history: if not isinstance(raw_history, list): @@ -1456,7 +1657,11 @@ class APIServerAdapter(BasePlatformAdapter): _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"), status=400, ) - conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])}) + try: + entry_content = _normalize_multimodal_content(entry["content"]) + except ValueError as exc: + return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content") + conversation_history.append({"role": str(entry["role"]), "content": entry_content}) if previous_response_id: logger.debug("Both conversation_history and previous_response_id provided; using conversation_history") @@ -1476,8 +1681,8 @@ class APIServerAdapter(BasePlatformAdapter): conversation_history.append(msg) # Last input message is the user_message - user_message = input_messages[-1].get("content", "") if input_messages else "" - if not user_message: + user_message: Any = input_messages[-1].get("content", "") if input_messages else "" + if not _content_has_visible_payload(user_message): return web.json_response(_openai_error("No user message found in input"), status=400) # Truncation support @@ -1682,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter): # Cron jobs API # ------------------------------------------------------------------ - # Check cron module availability once (not per-request) - _CRON_AVAILABLE = False - try: - from cron.jobs import ( - list_jobs as _cron_list, - get_job as _cron_get, - create_job as _cron_create, - update_job as _cron_update, - remove_job as _cron_remove, - pause_job as _cron_pause, - resume_job as _cron_resume, - trigger_job as _cron_trigger, - ) - # Wrap as staticmethod to prevent descriptor binding — these are plain - # module functions, not instance methods. Without this, self._cron_*() - # injects ``self`` as the first positional argument and every call - # raises TypeError. - _cron_list = staticmethod(_cron_list) - _cron_get = staticmethod(_cron_get) - _cron_create = staticmethod(_cron_create) - _cron_update = staticmethod(_cron_update) - _cron_remove = staticmethod(_cron_remove) - _cron_pause = staticmethod(_cron_pause) - _cron_resume = staticmethod(_cron_resume) - _cron_trigger = staticmethod(_cron_trigger) - _CRON_AVAILABLE = True - except ImportError: - pass - _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}") # Allowed fields for update — prevents clients injecting arbitrary keys _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"} _MAX_NAME_LENGTH = 200 _MAX_PROMPT_LENGTH = 5000 - def _check_jobs_available(self) -> Optional["web.Response"]: + @staticmethod + def _check_jobs_available() -> Optional["web.Response"]: """Return error response if cron module isn't available.""" - if not self._CRON_AVAILABLE: + if not _CRON_AVAILABLE: return web.json_response( {"error": "Cron module not available"}, status=501, ) @@ -1744,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter): return cron_err try: include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1") - jobs = self._cron_list(include_disabled=include_disabled) + jobs = _cron_list(include_disabled=include_disabled) return web.json_response({"jobs": jobs}) except Exception as e: return web.json_response({"error": str(e)}, status=500) @@ -1792,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter): if repeat is not None: kwargs["repeat"] = repeat - job = self._cron_create(**kwargs) + job = _cron_create(**kwargs) return web.json_response({"job": job}) except Exception as e: return web.json_response({"error": str(e)}, status=500) @@ -1809,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_get(job_id) + job = _cron_get(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -1842,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter): return web.json_response( {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400, ) - job = self._cron_update(job_id, sanitized) + job = _cron_update(job_id, sanitized) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -1861,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - success = self._cron_remove(job_id) + success = _cron_remove(job_id) if not success: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"ok": True}) @@ -1880,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_pause(job_id) + job = _cron_pause(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -1899,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_resume(job_id) + job = _cron_resume(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -1918,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_trigger(job_id) + job = _cron_trigger(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 65f7226e10..56bb3c5cb4 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -6,6 +6,7 @@ and implement the required methods. """ import asyncio +import inspect import ipaddress import logging import os @@ -18,6 +19,8 @@ import uuid from abc import ABC, abstractmethod from urllib.parse import urlsplit +from utils import normalize_proxy_url + logger = logging.getLogger(__name__) @@ -158,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: if platform_env_var: value = (os.environ.get(platform_env_var) or "").strip() if value: - return value + return normalize_proxy_url(value) for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = (os.environ.get(key) or "").strip() if value: - return value - return _detect_macos_system_proxy() + return normalize_proxy_url(value) + return normalize_proxy_url(_detect_macos_system_proxy()) def proxy_kwargs_for_bot(proxy_url: str | None) -> dict: @@ -390,12 +393,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> if not is_safe_url(url): raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}") - import asyncio import httpx - import logging as _logging - _log = _logging.getLogger(__name__) + _log = logging.getLogger(__name__) - last_exc = None async with httpx.AsyncClient( timeout=30.0, follow_redirects=True, @@ -413,7 +413,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> response.raise_for_status() return cache_image_from_bytes(response.content, ext) except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < retries: @@ -429,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> await asyncio.sleep(wait) continue raise - raise last_exc def cleanup_image_cache(max_age_hours: int = 24) -> int: @@ -509,12 +507,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> if not is_safe_url(url): raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}") - import asyncio import httpx - import logging as _logging - _log = _logging.getLogger(__name__) + _log = logging.getLogger(__name__) - last_exc = None async with httpx.AsyncClient( timeout=30.0, follow_redirects=True, @@ -532,7 +527,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> response.raise_for_status() return cache_audio_from_bytes(response.content, ext) except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < retries: @@ -548,7 +542,39 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> await asyncio.sleep(wait) continue raise - raise last_exc + + +# --------------------------------------------------------------------------- +# Video cache utilities +# +# Same pattern as image/audio cache -- videos from platforms are downloaded +# here so the agent can reference them by local file path. +# --------------------------------------------------------------------------- + +VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache") + +SUPPORTED_VIDEO_TYPES = { + ".mp4": "video/mp4", + ".mov": "video/quicktime", + ".webm": "video/webm", + ".mkv": "video/x-matroska", + ".avi": "video/x-msvideo", +} + + +def get_video_cache_dir() -> Path: + """Return the video cache directory, creating it if it doesn't exist.""" + VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True) + return VIDEO_CACHE_DIR + + +def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str: + """Save raw video bytes to the cache and return the absolute file path.""" + cache_dir = get_video_cache_dir() + filename = f"video_{uuid.uuid4().hex[:12]}{ext}" + filepath = cache_dir / filename + filepath.write_bytes(data) + return str(filepath) # --------------------------------------------------------------------------- @@ -880,10 +906,11 @@ class BasePlatformAdapter(ABC): # working on a task after --replace or manual restarts. self._background_tasks: set[asyncio.Task] = set() # One-shot callbacks to fire after the main response is delivered. - # Keyed by session_key. GatewayRunner uses this to defer - # background-review notifications ("💾 Skill created") until the - # primary reply has been sent. - self._post_delivery_callbacks: Dict[str, Callable] = {} + # Keyed by session_key. Values are either a bare callback (legacy) or + # a ``(generation, callback)`` tuple so GatewayRunner can make deferred + # deliveries generation-aware and avoid stale runs clearing callbacks + # registered by a fresher run for the same session. + self._post_delivery_callbacks: Dict[str, Any] = {} self._expected_cancelled_tasks: set[asyncio.Task] = set() self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None # Chats where auto-TTS on voice input is disabled (set by /voice off) @@ -1316,7 +1343,7 @@ class BasePlatformAdapter(ABC): # Extract MEDIA: tags, allowing optional whitespace after the colon # and quoted/backticked paths for LLM-formatted outputs. media_pattern = re.compile( - r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' + r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' ) for match in media_pattern.finditer(content): path = match.group("path").strip() @@ -1401,7 +1428,13 @@ class BasePlatformAdapter(ABC): return paths, cleaned - async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None: + async def _keep_typing( + self, + chat_id: str, + interval: float = 2.0, + metadata=None, + stop_event: asyncio.Event | None = None, + ) -> None: """ Continuously send typing indicator until cancelled. @@ -1415,9 +1448,18 @@ class BasePlatformAdapter(ABC): """ try: while True: + if stop_event is not None and stop_event.is_set(): + return if chat_id not in self._typing_paused: await self.send_typing(chat_id, metadata=metadata) - await asyncio.sleep(interval) + if stop_event is None: + await asyncio.sleep(interval) + continue + try: + await asyncio.wait_for(stop_event.wait(), timeout=interval) + except asyncio.TimeoutError: + continue + return except asyncio.CancelledError: pass # Normal cancellation when handler completes finally: @@ -1444,6 +1486,59 @@ class BasePlatformAdapter(ABC): """Resume typing indicator for a chat after approval resolves.""" self._typing_paused.discard(chat_id) + async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None: + """Signal the active session loop to stop and clear typing immediately.""" + if session_key: + interrupt_event = self._active_sessions.get(session_key) + if interrupt_event is not None: + interrupt_event.set() + try: + await self.stop_typing(chat_id) + except Exception: + pass + + def register_post_delivery_callback( + self, + session_key: str, + callback: Callable, + *, + generation: int | None = None, + ) -> None: + """Register a deferred callback to fire after the main response. + + ``generation`` lets callers tie the callback to a specific gateway run + generation so stale runs cannot clear callbacks owned by a fresher run. + """ + if not session_key or not callable(callback): + return + if generation is None: + self._post_delivery_callbacks[session_key] = callback + else: + self._post_delivery_callbacks[session_key] = (int(generation), callback) + + def pop_post_delivery_callback( + self, + session_key: str, + *, + generation: int | None = None, + ) -> Callable | None: + """Pop a deferred callback, optionally requiring generation ownership.""" + if not session_key: + return None + entry = self._post_delivery_callbacks.get(session_key) + if entry is None: + return None + if isinstance(entry, tuple) and len(entry) == 2: + entry_generation, callback = entry + if generation is not None and int(entry_generation) != int(generation): + return None + self._post_delivery_callbacks.pop(session_key, None) + return callback if callable(callback) else None + if generation is not None: + return None + self._post_delivery_callbacks.pop(session_key, None) + return entry if callable(entry) else None + # ── Processing lifecycle hooks ────────────────────────────────────────── # Subclasses override these to react to message processing events # (e.g. Discord adds 👀/✅/❌ reactions). @@ -1684,8 +1779,6 @@ class BasePlatformAdapter(ABC): HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode) HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode) """ - import random - mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower() if mode == "off": return 0.0 @@ -1714,10 +1807,23 @@ class BasePlatformAdapter(ABC): # Fall back to a new Event only if the entry was removed externally. interrupt_event = self._active_sessions.get(session_key) or asyncio.Event() self._active_sessions[session_key] = interrupt_event + callback_generation = getattr(interrupt_event, "_hermes_run_generation", None) # Start continuous typing indicator (refreshes every 2 seconds) _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None - typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata)) + _keep_typing_kwargs = {"metadata": _thread_metadata} + try: + _keep_typing_sig = inspect.signature(self._keep_typing) + except (TypeError, ValueError): + _keep_typing_sig = None + if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters: + _keep_typing_kwargs["stop_event"] = interrupt_event + typing_task = asyncio.create_task( + self._keep_typing( + event.source.chat_id, + **_keep_typing_kwargs, + ) + ) try: await self._run_processing_hook("on_processing_start", event) @@ -1976,7 +2082,14 @@ class BasePlatformAdapter(ABC): finally: # Fire any one-shot post-delivery callback registered for this # session (e.g. deferred background-review notifications). - _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None) + _callback_generation = callback_generation + if hasattr(self, "pop_post_delivery_callback"): + _post_cb = self.pop_post_delivery_callback( + session_key, + generation=_callback_generation, + ) + else: + _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None) if callable(_post_cb): try: _post_cb() @@ -2022,10 +2135,10 @@ class BasePlatformAdapter(ABC): pass # Leave _active_sessions[session_key] populated — the drain # task's own lifecycle will clean it up. - return - # Clean up session tracking - if session_key in self._active_sessions: - del self._active_sessions[session_key] + else: + # Clean up session tracking + if session_key in self._active_sessions: + del self._active_sessions[session_key] async def cancel_background_tasks(self) -> None: """Cancel any in-flight background message-processing tasks. @@ -2033,12 +2146,26 @@ class BasePlatformAdapter(ABC): Used during gateway shutdown/replacement so active sessions from the old process do not keep running after adapters are being torn down. """ - tasks = [task for task in self._background_tasks if not task.done()] - for task in tasks: - self._expected_cancelled_tasks.add(task) - task.cancel() - if tasks: + # Loop until no new tasks appear. Without this, a message + # arriving during the `await asyncio.gather` below would spawn + # a fresh _process_message_background task (added to + # self._background_tasks at line ~1668 via handle_message), + # and the _background_tasks.clear() at the end of this method + # would drop the reference — the task runs untracked against a + # disconnecting adapter, logs send-failures, and may linger + # until it completes on its own. Retrying the drain until the + # task set stabilizes closes the window. + MAX_DRAIN_ROUNDS = 5 + for _ in range(MAX_DRAIN_ROUNDS): + tasks = [task for task in self._background_tasks if not task.done()] + if not tasks: + break + for task in tasks: + self._expected_cancelled_tasks.add(task) + task.cancel() await asyncio.gather(*tasks, return_exceptions=True) + # Loop: late-arrival tasks spawned during the gather above + # will be in self._background_tasks now. Re-check. self._background_tasks.clear() self._expected_cancelled_tasks.clear() self._pending_messages.clear() diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py index a8a2929698..39d4e537eb 100644 --- a/gateway/platforms/bluebubbles.py +++ b/gateway/platforms/bluebubbles.py @@ -75,7 +75,7 @@ def _redact(text: str) -> str: def check_bluebubbles_requirements() -> bool: try: import aiohttp # noqa: F401 - import httpx as _httpx # noqa: F401 + import httpx # noqa: F401 except ImportError: return False return True diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index b1585637ff..d43e18d73d 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -498,6 +498,7 @@ class DiscordAdapter(BasePlatformAdapter): self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering # Voice channel state (per-guild) self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient + self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave # Text batching: merge rapid successive messages (Telegram-style) self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6")) self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) @@ -540,7 +541,6 @@ class DiscordAdapter(BasePlatformAdapter): # ctypes.util.find_library fails on macOS with Homebrew-installed libs, # so fall back to known Homebrew paths if needed. if not opus_path: - import sys _homebrew_paths = ( "/opt/homebrew/lib/libopus.dylib", # Apple Silicon "/usr/local/lib/libopus.dylib", # Intel Mac @@ -636,6 +636,15 @@ class DiscordAdapter(BasePlatformAdapter): @self._client.event async def on_message(message: DiscordMessage): + # Block until _resolve_allowed_usernames has swapped + # any raw usernames in DISCORD_ALLOWED_USERS for numeric + # IDs (otherwise on_message's author.id lookup can miss). + if not adapter_self._ready_event.is_set(): + try: + await asyncio.wait_for(adapter_self._ready_event.wait(), timeout=30.0) + except asyncio.TimeoutError: + pass + # Dedup: Discord RESUME replays events after reconnects (#4777) if adapter_self._dedup.is_duplicate(str(message.id)): return @@ -1071,6 +1080,8 @@ class DiscordAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Discord message.""" if not self._client: @@ -1237,51 +1248,53 @@ class DiscordAdapter(BasePlatformAdapter): return False guild_id = channel.guild.id - # Already connected in this guild? - existing = self._voice_clients.get(guild_id) - if existing and existing.is_connected(): - if existing.channel.id == channel.id: + async with self._voice_locks.setdefault(guild_id, asyncio.Lock()): + # Already connected in this guild? + existing = self._voice_clients.get(guild_id) + if existing and existing.is_connected(): + if existing.channel.id == channel.id: + self._reset_voice_timeout(guild_id) + return True + await existing.move_to(channel) self._reset_voice_timeout(guild_id) return True - await existing.move_to(channel) + + vc = await channel.connect() + self._voice_clients[guild_id] = vc self._reset_voice_timeout(guild_id) + + # Start voice receiver (Phase 2: listen to users) + try: + receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids) + receiver.start() + self._voice_receivers[guild_id] = receiver + self._voice_listen_tasks[guild_id] = asyncio.ensure_future( + self._voice_listen_loop(guild_id) + ) + except Exception as e: + logger.warning("Voice receiver failed to start: %s", e) + return True - vc = await channel.connect() - self._voice_clients[guild_id] = vc - self._reset_voice_timeout(guild_id) - - # Start voice receiver (Phase 2: listen to users) - try: - receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids) - receiver.start() - self._voice_receivers[guild_id] = receiver - self._voice_listen_tasks[guild_id] = asyncio.ensure_future( - self._voice_listen_loop(guild_id) - ) - except Exception as e: - logger.warning("Voice receiver failed to start: %s", e) - - return True - async def leave_voice_channel(self, guild_id: int) -> None: """Disconnect from the voice channel in a guild.""" - # Stop voice receiver first - receiver = self._voice_receivers.pop(guild_id, None) - if receiver: - receiver.stop() - listen_task = self._voice_listen_tasks.pop(guild_id, None) - if listen_task: - listen_task.cancel() + async with self._voice_locks.setdefault(guild_id, asyncio.Lock()): + # Stop voice receiver first + receiver = self._voice_receivers.pop(guild_id, None) + if receiver: + receiver.stop() + listen_task = self._voice_listen_tasks.pop(guild_id, None) + if listen_task: + listen_task.cancel() - vc = self._voice_clients.pop(guild_id, None) - if vc and vc.is_connected(): - await vc.disconnect() - task = self._voice_timeout_tasks.pop(guild_id, None) - if task: - task.cancel() - self._voice_text_channels.pop(guild_id, None) - self._voice_sources.pop(guild_id, None) + vc = self._voice_clients.pop(guild_id, None) + if vc and vc.is_connected(): + await vc.disconnect() + task = self._voice_timeout_tasks.pop(guild_id, None) + if task: + task.cancel() + self._voice_text_channels.pop(guild_id, None) + self._voice_sources.pop(guild_id, None) # Maximum seconds to wait for voice playback before giving up PLAYBACK_TIMEOUT = 120 @@ -1408,8 +1421,7 @@ class DiscordAdapter(BasePlatformAdapter): speaking_user_ids: set = set() receiver = self._voice_receivers.get(guild_id) if receiver: - import time as _time - now = _time.monotonic() + now = time.monotonic() with receiver._lock: for ssrc, last_t in receiver._last_packet_time.items(): # Consider "speaking" if audio received within last 2 seconds @@ -2948,6 +2960,17 @@ class DiscordAdapter(BasePlatformAdapter): parent_channel_id = self._get_parent_channel_id(message.channel) is_voice_linked_channel = False + + # Save mention-stripped text before auto-threading since create_thread() + # can clobber message.content, breaking /command detection in channels. + raw_content = message.content.strip() + normalized_content = raw_content + mention_prefix = False + if self._client.user and self._client.user in message.mentions: + mention_prefix = True + normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip() + normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip() + message.content = normalized_content if not isinstance(message.channel, discord.DMChannel): channel_ids = {str(message.channel.id)} if parent_channel_id: @@ -2985,13 +3008,8 @@ class DiscordAdapter(BasePlatformAdapter): in_bot_thread = is_thread and thread_id in self._threads if require_mention and not is_free_channel and not in_bot_thread: - if self._client.user not in message.mentions: + if self._client.user not in message.mentions and not mention_prefix: return - - if self._client.user and self._client.user in message.mentions: - message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip() - message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip() - # Auto-thread: when enabled, automatically create a thread for every # @mention in a text channel so each conversation is isolated (like Slack). # Messages already inside threads or DMs are unaffected. @@ -3013,7 +3031,7 @@ class DiscordAdapter(BasePlatformAdapter): # Determine message type msg_type = MessageType.TEXT - if message.content.startswith("/"): + if normalized_content.startswith("/"): msg_type = MessageType.COMMAND elif message.attachments: # Check attachment types @@ -3153,7 +3171,9 @@ class DiscordAdapter(BasePlatformAdapter): att.filename, e, exc_info=True, ) - event_text = message.content + # Use normalized_content (saved before auto-threading) instead of message.content, + # to detect /slash commands in channel messages. + event_text = normalized_content if pending_text_injection: event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection @@ -3265,7 +3285,20 @@ class DiscordAdapter(BasePlatformAdapter): "[Discord] Flushing text batch %s (%d chars)", key, len(event.text or ""), ) - await self.handle_message(event) + # Shield the downstream dispatch so that a subsequent chunk + # arriving while handle_message is mid-flight cannot cancel + # the running agent turn. _enqueue_text_event always cancels + # the prior flush task when a new chunk lands; without this + # shield, CancelledError would propagate from our task down + # into handle_message → the agent's streaming request, + # aborting the response the user was waiting on. The new + # chunk is handled by the fresh flush task regardless. + await asyncio.shield(self.handle_message(event)) + except asyncio.CancelledError: + # Only reached if cancel landed before the pop — the shielded + # handle_message is unaffected either way. Let the task exit + # cleanly so the finally block cleans up. + pass finally: if self._pending_text_batch_tasks.get(key) is current_task: self._pending_text_batch_tasks.pop(key, None) diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index 351337e827..85cebe5381 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -8,7 +8,8 @@ Supports: - Gateway allowlist integration via FEISHU_ALLOWED_USERS - Persistent dedup state across restarts - Per-chat serial message processing (matches openclaw createChatQueue) -- Persistent ACK emoji reaction on inbound messages +- Processing status reactions: Typing while working, removed on success, + swapped for CrossMark on failure - Reaction events routed as synthetic text events (matches openclaw) - Interactive card button-click events routed as synthetic COMMAND events - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker) @@ -29,6 +30,7 @@ import re import threading import time import uuid +from collections import OrderedDict from dataclasses import dataclass, field from datetime import datetime from pathlib import Path @@ -98,6 +100,7 @@ from gateway.platforms.base import ( BasePlatformAdapter, MessageEvent, MessageType, + ProcessingOutcome, SendResult, SUPPORTED_DOCUMENT_TYPES, cache_document_from_bytes, @@ -119,6 +122,8 @@ _MARKDOWN_HINT_RE = re.compile( re.MULTILINE, ) _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") +_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$") +_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$") _MENTION_RE = re.compile(r"@_user_\d+") _MULTISPACE_RE = re.compile(r"[ \t]{2,}") _POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE) @@ -188,7 +193,17 @@ _APPROVAL_LABEL_MAP: Dict[str, str] = { } _FEISHU_BOT_MSG_TRACK_SIZE = 512 # LRU size for tracking sent message IDs _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003}) # reply target withdrawn/missing → create fallback -_FEISHU_ACK_EMOJI = "OK" + +# Feishu reactions render as prominent badges, unlike Discord/Telegram's +# small footer emoji — a success badge on every message would add noise, so +# we only mark start (Typing) and failure (CrossMark); the reply itself is +# the success signal. +_FEISHU_REACTION_IN_PROGRESS = "Typing" +_FEISHU_REACTION_FAILURE = "CrossMark" +# Bound on the (message_id → reaction_id) handle cache. Happy-path entries +# drain on completion; the cap is a safeguard against unbounded growth from +# delete-failures, not a capacity plan. +_FEISHU_PROCESSING_REACTION_CACHE_SIZE = 1024 # QR onboarding constants _ONBOARD_ACCOUNTS_URLS = { @@ -430,23 +445,66 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int: def _build_markdown_post_payload(content: str) -> str: + rows = _build_markdown_post_rows(content) return json.dumps( { "zh_cn": { - "content": [ - [ - { - "tag": "md", - "text": content, - } - ] - ], + "content": rows, } }, ensure_ascii=False, ) +def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]: + """Build Feishu post rows while isolating fenced code blocks. + + Feishu's `md` renderer can swallow trailing content when a fenced code block + appears inside one large markdown element. Split the reply at real fence + lines so prose before/after the code block remains visible while code stays + in a dedicated row. + """ + if not content: + return [[{"tag": "md", "text": ""}]] + if "```" not in content: + return [[{"tag": "md", "text": content}]] + + rows: List[List[Dict[str, str]]] = [] + current: List[str] = [] + in_code_block = False + + def _flush_current() -> None: + nonlocal current + if not current: + return + segment = "\n".join(current) + if segment.strip(): + rows.append([{"tag": "md", "text": segment}]) + current = [] + + for raw_line in content.splitlines(): + stripped_line = raw_line.strip() + is_fence = bool( + _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line) + if in_code_block + else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line) + ) + + if is_fence: + if not in_code_block: + _flush_current() + current.append(raw_line) + in_code_block = not in_code_block + if not in_code_block: + _flush_current() + continue + + current.append(raw_line) + + _flush_current() + return rows or [[{"tag": "md", "text": content}]] + + def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult: resolved = _resolve_post_payload(payload) if not resolved: @@ -1096,6 +1154,9 @@ class FeishuAdapter(BasePlatformAdapter): # Exec approval button state (approval_id → {session_key, message_id, chat_id}) self._approval_state: Dict[int, Dict[str, str]] = {} self._approval_counter = itertools.count(1) + # Feishu reaction deletion requires the opaque reaction_id returned + # by create, so we cache it per message_id. + self._pending_processing_reactions: "OrderedDict[str, str]" = OrderedDict() self._load_seen_message_ids() @staticmethod @@ -1423,6 +1484,8 @@ class FeishuAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Feishu text/post message.""" if not self._client: @@ -1925,8 +1988,8 @@ class FeishuAdapter(BasePlatformAdapter): if not message_id or self._is_duplicate(message_id): logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id) return - if getattr(sender, "sender_type", "") == "bot": - logger.debug("[Feishu] Dropping bot-originated event: %s", message_id) + if self._is_self_sent_bot_message(event): + logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id) return chat_type = getattr(message, "chat_type", "p2p") @@ -2003,12 +2066,12 @@ class FeishuAdapter(BasePlatformAdapter): operator_type, emoji_type, ) - # Only process reactions from real users. Ignore app/bot-generated reactions - # and Hermes' own ACK emoji to avoid feedback loops. + # Drop bot/app-origin reactions to break the feedback loop from our + # own lifecycle reactions. A human reacting with the same emoji (e.g. + # clicking Typing on a bot message) is still routed through. loop = self._loop if ( operator_type in {"bot", "app"} - or emoji_type == _FEISHU_ACK_EMOJI or not message_id or loop is None or bool(getattr(loop, "is_closed", lambda: False)()) @@ -2232,33 +2295,35 @@ class FeishuAdapter(BasePlatformAdapter): async def _handle_message_with_guards(self, event: MessageEvent) -> None: """Dispatch a single event through the agent pipeline with per-chat serialization - and a persistent ACK emoji reaction before processing starts. + before handing the event off to the agent. - - Per-chat lock: ensures messages in the same chat are processed one at a time - (matches openclaw's createChatQueue serial queue behaviour). - - ACK indicator: adds a CHECK reaction to the triggering message before handing - off to the agent and leaves it in place as a receipt marker. + Per-chat lock ensures messages in the same chat are processed one at a + time (matches openclaw's createChatQueue serial queue behaviour). """ chat_id = getattr(event.source, "chat_id", "") or "" if event.source else "" chat_lock = self._get_chat_lock(chat_id) async with chat_lock: - message_id = event.message_id - if message_id: - await self._add_ack_reaction(message_id) await self.handle_message(event) - async def _add_ack_reaction(self, message_id: str) -> Optional[str]: - """Add a persistent ACK emoji reaction to signal the message was received.""" - if not self._client or not message_id: + # ========================================================================= + # Processing status reactions + # ========================================================================= + + def _reactions_enabled(self) -> bool: + return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in ("false", "0", "no") + + async def _add_reaction(self, message_id: str, emoji_type: str) -> Optional[str]: + """Return the reaction_id on success, else None. The id is needed later for deletion.""" + if not self._client or not message_id or not emoji_type: return None try: - from lark_oapi.api.im.v1 import ( # lazy import — keeps optional dep optional + from lark_oapi.api.im.v1 import ( CreateMessageReactionRequest, CreateMessageReactionRequestBody, ) body = ( CreateMessageReactionRequestBody.builder() - .reaction_type({"emoji_type": _FEISHU_ACK_EMOJI}) + .reaction_type({"emoji_type": emoji_type}) .build() ) request = ( @@ -2271,16 +2336,93 @@ class FeishuAdapter(BasePlatformAdapter): if response and getattr(response, "success", lambda: False)(): data = getattr(response, "data", None) return getattr(data, "reaction_id", None) - logger.warning( - "[Feishu] Failed to add ack reaction to %s: code=%s msg=%s", + logger.debug( + "[Feishu] Add reaction %s on %s rejected: code=%s msg=%s", + emoji_type, message_id, getattr(response, "code", None), getattr(response, "msg", None), ) except Exception: - logger.warning("[Feishu] Failed to add ack reaction to %s", message_id, exc_info=True) + logger.warning( + "[Feishu] Add reaction %s on %s raised", + emoji_type, + message_id, + exc_info=True, + ) return None + async def _remove_reaction(self, message_id: str, reaction_id: str) -> bool: + if not self._client or not message_id or not reaction_id: + return False + try: + from lark_oapi.api.im.v1 import DeleteMessageReactionRequest + request = ( + DeleteMessageReactionRequest.builder() + .message_id(message_id) + .reaction_id(reaction_id) + .build() + ) + response = await asyncio.to_thread(self._client.im.v1.message_reaction.delete, request) + if response and getattr(response, "success", lambda: False)(): + return True + logger.debug( + "[Feishu] Remove reaction %s on %s rejected: code=%s msg=%s", + reaction_id, + message_id, + getattr(response, "code", None), + getattr(response, "msg", None), + ) + except Exception: + logger.warning( + "[Feishu] Remove reaction %s on %s raised", + reaction_id, + message_id, + exc_info=True, + ) + return False + + def _remember_processing_reaction(self, message_id: str, reaction_id: str) -> None: + cache = self._pending_processing_reactions + cache[message_id] = reaction_id + cache.move_to_end(message_id) + while len(cache) > _FEISHU_PROCESSING_REACTION_CACHE_SIZE: + cache.popitem(last=False) + + def _pop_processing_reaction(self, message_id: str) -> Optional[str]: + return self._pending_processing_reactions.pop(message_id, None) + + async def on_processing_start(self, event: MessageEvent) -> None: + if not self._reactions_enabled(): + return + message_id = event.message_id + if not message_id or message_id in self._pending_processing_reactions: + return + reaction_id = await self._add_reaction(message_id, _FEISHU_REACTION_IN_PROGRESS) + if reaction_id: + self._remember_processing_reaction(message_id, reaction_id) + + async def on_processing_complete( + self, event: MessageEvent, outcome: ProcessingOutcome + ) -> None: + if not self._reactions_enabled(): + return + message_id = event.message_id + if not message_id: + return + + start_reaction_id = self._pending_processing_reactions.get(message_id) + if start_reaction_id: + if not await self._remove_reaction(message_id, start_reaction_id): + # Don't stack a second badge on top of a Typing we couldn't + # remove — UI would read as both "working" and "done/failed" + # simultaneously. Keep the handle so LRU eventually evicts it. + return + self._pop_processing_reaction(message_id) + + if outcome is ProcessingOutcome.FAILURE: + await self._add_reaction(message_id, _FEISHU_REACTION_FAILURE) + # ========================================================================= # Webhook server and security # ========================================================================= @@ -3249,6 +3391,23 @@ class FeishuAdapter(BasePlatformAdapter): return self._post_mentions_bot(normalized.mentioned_ids) return False + def _is_self_sent_bot_message(self, event: Any) -> bool: + """Return True only for Feishu events emitted by this Hermes bot.""" + sender = getattr(event, "sender", None) + sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower() + if sender_type not in {"bot", "app"}: + return False + + sender_id = getattr(sender, "sender_id", None) + sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip() + sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip() + + if self._bot_open_id and sender_open_id == self._bot_open_id: + return True + if self._bot_user_id and sender_user_id == self._bot_user_id: + return True + return False + def _message_mentions_bot(self, mentions: List[Any]) -> bool: """Check whether any mention targets the configured or inferred bot identity.""" for mention in mentions: @@ -3276,10 +3435,55 @@ class FeishuAdapter(BasePlatformAdapter): return False async def _hydrate_bot_identity(self) -> None: - """Best-effort discovery of bot identity for precise group mention gating.""" + """Best-effort discovery of bot identity for precise group mention gating + and self-sent bot event filtering. + + Populates ``_bot_open_id`` and ``_bot_name`` from /open-apis/bot/v3/info + (no extra scopes required beyond the tenant access token). Falls back to + the application info endpoint for ``_bot_name`` only when the first probe + doesn't return it. Each field is hydrated independently — a value already + supplied via env vars (FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID / + FEISHU_BOT_NAME) is preserved and skips its probe. + """ if not self._client: return - if any((self._bot_open_id, self._bot_user_id, self._bot_name)): + if self._bot_open_id and self._bot_name: + # Everything the self-send filter and precise mention gate need is + # already in place; nothing to probe. + return + + # Primary probe: /open-apis/bot/v3/info — returns bot_name + open_id, no + # extra scopes required. This is the same endpoint the onboarding wizard + # uses via probe_bot(). + if not self._bot_open_id or not self._bot_name: + try: + resp = await asyncio.to_thread( + self._client.request, + method="GET", + url="/open-apis/bot/v3/info", + body=None, + raw_response=True, + ) + content = getattr(resp, "content", None) + if content: + payload = json.loads(content) + parsed = _parse_bot_response(payload) or {} + open_id = (parsed.get("bot_open_id") or "").strip() + bot_name = (parsed.get("bot_name") or "").strip() + if open_id and not self._bot_open_id: + self._bot_open_id = open_id + if bot_name and not self._bot_name: + self._bot_name = bot_name + except Exception: + logger.debug( + "[Feishu] /bot/v3/info probe failed during hydration", + exc_info=True, + ) + + # Fallback probe for _bot_name only: application info endpoint. Needs + # admin:app.info:readonly or application:application:self_manage scope, + # so it's best-effort. + if self._bot_name: return try: request = self._build_get_application_request(app_id=self._app_id, lang="en_us") @@ -3288,17 +3492,17 @@ class FeishuAdapter(BasePlatformAdapter): code = getattr(response, "code", None) if code == 99991672: logger.warning( - "[Feishu] Unable to hydrate bot identity from application info. " + "[Feishu] Unable to hydrate bot name from application info. " "Grant admin:app.info:readonly or application:application:self_manage " "so group @mention gating can resolve the bot name precisely." ) return app = getattr(getattr(response, "data", None), "app", None) app_name = (getattr(app, "app_name", None) or "").strip() - if app_name: + if app_name and not self._bot_name: self._bot_name = app_name except Exception: - logger.debug("[Feishu] Failed to hydrate bot identity", exc_info=True) + logger.debug("[Feishu] Failed to hydrate bot name from application info", exc_info=True) # ========================================================================= # Deduplication — seen message ID cache (persistent) diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index cdd67b337d..a5f9352b55 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -825,7 +825,7 @@ class MatrixAdapter(BasePlatformAdapter): async def edit_message( - self, chat_id: str, message_id: str, content: str + self, chat_id: str, message_id: str, content: str, *, finalize: bool = False ) -> SendResult: """Edit an existing message (via m.replace).""" diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py index 18367a8e44..0e6c9631d7 100644 --- a/gateway/platforms/mattermost.py +++ b/gateway/platforms/mattermost.py @@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter): ) async def edit_message( - self, chat_id: str, message_id: str, content: str + self, chat_id: str, message_id: str, content: str, *, finalize: bool = False ) -> SendResult: """Edit an existing post.""" formatted = self.format_message(content) @@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter): logger.warning("Mattermost: blocked unsafe URL (SSRF protection)") return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to) - import asyncio import aiohttp last_exc = None diff --git a/gateway/platforms/qqbot/__init__.py b/gateway/platforms/qqbot/__init__.py index 7119dd979e..130269b5f2 100644 --- a/gateway/platforms/qqbot/__init__.py +++ b/gateway/platforms/qqbot/__init__.py @@ -26,9 +26,8 @@ from .adapter import ( # noqa: F401 # -- Onboard (QR-code scan-to-configure) ----------------------------------- from .onboard import ( # noqa: F401 BindStatus, - create_bind_task, - poll_bind_result, build_connect_url, + qr_register, ) from .crypto import decrypt_secret, generate_bind_key # noqa: F401 @@ -44,9 +43,8 @@ __all__ = [ "_ssrf_redirect_guard", # onboard "BindStatus", - "create_bind_task", - "poll_bind_result", "build_connect_url", + "qr_register", # crypto "decrypt_secret", "generate_bind_key", diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py index ced7442711..df3987f2eb 100644 --- a/gateway/platforms/qqbot/adapter.py +++ b/gateway/platforms/qqbot/adapter.py @@ -1086,11 +1086,8 @@ class QQAdapter(BasePlatformAdapter): return MessageType.VIDEO if "image" in first_type or "photo" in first_type: return MessageType.PHOTO - # Unknown content type with an attachment — don't assume PHOTO - # to prevent non-image files from being sent to vision analysis. logger.debug( - "[%s] Unknown media content_type '%s', defaulting to TEXT", - self._log_tag, + "Unknown media content_type '%s', defaulting to TEXT", first_type, ) return MessageType.TEXT @@ -1826,14 +1823,12 @@ class QQAdapter(BasePlatformAdapter): body["file_name"] = file_name # Retry transient upload failures - last_exc = None for attempt in range(3): try: return await self._api_request( "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT ) except RuntimeError as exc: - last_exc = exc err_msg = str(exc) if any( kw in err_msg @@ -1842,8 +1837,8 @@ class QQAdapter(BasePlatformAdapter): raise if attempt < 2: await asyncio.sleep(1.5 * (attempt + 1)) - - raise last_exc # type: ignore[misc] + else: + raise # Maximum time (seconds) to wait for reconnection before giving up on send. _RECONNECT_WAIT_SECONDS = 15.0 diff --git a/gateway/platforms/qqbot/onboard.py b/gateway/platforms/qqbot/onboard.py index 65750b3f10..b48c39a4f8 100644 --- a/gateway/platforms/qqbot/onboard.py +++ b/gateway/platforms/qqbot/onboard.py @@ -1,6 +1,10 @@ """ QQBot scan-to-configure (QR code onboard) module. +Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public +entry-point ``qr_register()`` that handles the full flow (create task → +display QR code → poll → decrypt credentials). + Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to generate a QR-code URL and poll for scan completion. On success the caller receives the bot's *app_id*, *client_secret* (decrypted locally), and the @@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/ from __future__ import annotations import logging +import time from enum import IntEnum -from typing import Tuple +from typing import Optional, Tuple from urllib.parse import quote from .constants import ( ONBOARD_API_TIMEOUT, ONBOARD_CREATE_PATH, + ONBOARD_POLL_INTERVAL, ONBOARD_POLL_PATH, PORTAL_HOST, QR_URL_TEMPLATE, ) -from .crypto import generate_bind_key +from .crypto import decrypt_secret, generate_bind_key from .utils import get_api_headers logger = logging.getLogger(__name__) @@ -35,7 +41,7 @@ logger = logging.getLogger(__name__) class BindStatus(IntEnum): - """Status codes returned by ``poll_bind_result``.""" + """Status codes returned by ``_poll_bind_result``.""" NONE = 0 PENDING = 1 @@ -44,18 +50,40 @@ class BindStatus(IntEnum): # --------------------------------------------------------------------------- -# Public API +# QR rendering +# --------------------------------------------------------------------------- + +try: + import qrcode as _qrcode_mod +except (ImportError, TypeError): + _qrcode_mod = None # type: ignore[assignment] + + +def _render_qr(url: str) -> bool: + """Try to render a QR code in the terminal. Returns True if successful.""" + if _qrcode_mod is None: + return False + try: + qr = _qrcode_mod.QRCode( + error_correction=_qrcode_mod.constants.ERROR_CORRECT_M, + border=2, + ) + qr.add_data(url) + qr.make(fit=True) + qr.print_ascii(invert=True) + return True + except Exception: + return False + + +# --------------------------------------------------------------------------- +# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern) # --------------------------------------------------------------------------- -async def create_bind_task( - timeout: float = ONBOARD_API_TIMEOUT, -) -> Tuple[str, str]: +def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]: """Create a bind task and return *(task_id, aes_key_base64)*. - The AES key is generated locally and sent to the server so it can - encrypt the bot credentials before returning them. - Raises: RuntimeError: If the API returns a non-zero ``retcode``. """ @@ -64,8 +92,8 @@ async def create_bind_task( url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}" key = generate_bind_key() - async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client: - resp = await client.post(url, json={"key": key}, headers=get_api_headers()) + with httpx.Client(timeout=timeout, follow_redirects=True) as client: + resp = client.post(url, json={"key": key}, headers=get_api_headers()) resp.raise_for_status() data = resp.json() @@ -80,7 +108,7 @@ async def create_bind_task( return task_id, key -async def poll_bind_result( +def _poll_bind_result( task_id: str, timeout: float = ONBOARD_API_TIMEOUT, ) -> Tuple[BindStatus, str, str, str]: @@ -89,12 +117,6 @@ async def poll_bind_result( Returns: A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``. - * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with - :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the - key from :func:`create_bind_task`. - * ``user_openid`` is the OpenID of the person who scanned the code - (available when ``status == COMPLETED``). - Raises: RuntimeError: If the API returns a non-zero ``retcode``. """ @@ -102,8 +124,8 @@ async def poll_bind_result( url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}" - async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client: - resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers()) + with httpx.Client(timeout=timeout, follow_redirects=True) as client: + resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers()) resp.raise_for_status() data = resp.json() @@ -122,3 +144,77 @@ async def poll_bind_result( def build_connect_url(task_id: str) -> str: """Build the QR-code target URL for a given *task_id*.""" return QR_URL_TEMPLATE.format(task_id=quote(task_id)) + + +# --------------------------------------------------------------------------- +# Public entry-point +# --------------------------------------------------------------------------- + +_MAX_REFRESHES = 3 + + +def qr_register(timeout_seconds: int = 600) -> Optional[dict]: + """Run the QQBot scan-to-configure QR registration flow. + + Mirrors ``feishu.qr_register()``: handles create → display → poll → + decrypt in one call. Unexpected errors propagate to the caller. + + :returns: + ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on + success, or ``None`` on failure / expiry / cancellation. + """ + deadline = time.monotonic() + timeout_seconds + + for refresh_count in range(_MAX_REFRESHES + 1): + # ── Create bind task ── + try: + task_id, aes_key = _create_bind_task() + except Exception as exc: + logger.warning("[QQBot onboard] Failed to create bind task: %s", exc) + return None + + url = build_connect_url(task_id) + + # ── Display QR code + URL ── + print() + if _render_qr(url): + print(f" Scan the QR code above, or open this URL directly:\n {url}") + else: + print(f" Open this URL in QQ on your phone:\n {url}") + print(" Tip: pip install qrcode to display a scannable QR code here") + print() + + # ── Poll loop ── + while time.monotonic() < deadline: + try: + status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id) + except Exception: + time.sleep(ONBOARD_POLL_INTERVAL) + continue + + if status == BindStatus.COMPLETED: + client_secret = decrypt_secret(encrypted_secret, aes_key) + print() + print(f" QR scan complete! (App ID: {app_id})") + if user_openid: + print(f" Scanner's OpenID: {user_openid}") + return { + "app_id": app_id, + "client_secret": client_secret, + "user_openid": user_openid, + } + + if status == BindStatus.EXPIRED: + if refresh_count >= _MAX_REFRESHES: + logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES) + return None + print(f"\n QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})") + break # next for-loop iteration creates a new task + + time.sleep(ONBOARD_POLL_INTERVAL) + else: + # deadline reached without completing + logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds) + return None + + return None diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 4df4193bc0..9a0a6256a4 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -18,6 +18,7 @@ import logging import os import random import time +import uuid from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Optional, Any @@ -127,6 +128,27 @@ def _render_mentions(text: str, mentions: list) -> str: return text +def _is_signal_service_id(value: str) -> bool: + """Return True if *value* already looks like a Signal service identifier.""" + if not value: + return False + if value.startswith("PNI:") or value.startswith("u:"): + return True + try: + uuid.UUID(value) + return True + except (ValueError, AttributeError, TypeError): + return False + + +def _looks_like_e164_number(value: str) -> bool: + """Return True for a plausible E.164 phone number.""" + if not value or not value.startswith("+"): + return False + digits = value[1:] + return digits.isdigit() and 7 <= len(digits) <= 15 + + def check_signal_requirements() -> bool: """Check if Signal is configured (has URL and account).""" return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT")) @@ -179,6 +201,12 @@ class SignalAdapter(BasePlatformAdapter): # in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds) self._recent_sent_timestamps: set = set() self._max_recent_timestamps = 50 + # Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs. + # Keep a best-effort mapping so outbound sends can upgrade from a + # phone number to the corresponding UUID when signal-cli prefers it. + self._recipient_uuid_by_number: Dict[str, str] = {} + self._recipient_number_by_uuid: Dict[str, str] = {} + self._recipient_cache_lock = asyncio.Lock() logger.info("Signal adapter initialized: url=%s account=%s groups=%s", self.http_url, redact_phone(self.account), @@ -195,31 +223,40 @@ class SignalAdapter(BasePlatformAdapter): return False # Acquire scoped lock to prevent duplicate Signal listeners for the same phone + lock_acquired = False try: if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'): return False + lock_acquired = True except Exception as e: logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e) self.client = httpx.AsyncClient(timeout=30.0) - - # Health check — verify signal-cli daemon is reachable try: - resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0) - if resp.status_code != 200: - logger.error("Signal: health check failed (status %d)", resp.status_code) + # Health check — verify signal-cli daemon is reachable + try: + resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0) + if resp.status_code != 200: + logger.error("Signal: health check failed (status %d)", resp.status_code) + return False + except Exception as e: + logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e) return False - except Exception as e: - logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e) - return False - self._running = True - self._last_sse_activity = time.time() - self._sse_task = asyncio.create_task(self._sse_listener()) - self._health_monitor_task = asyncio.create_task(self._health_monitor()) + self._running = True + self._last_sse_activity = time.time() + self._sse_task = asyncio.create_task(self._sse_listener()) + self._health_monitor_task = asyncio.create_task(self._health_monitor()) - logger.info("Signal: connected to %s", self.http_url) - return True + logger.info("Signal: connected to %s", self.http_url) + return True + finally: + if not self._running: + if self.client: + await self.client.aclose() + self.client = None + if lock_acquired: + self._release_platform_lock() async def disconnect(self) -> None: """Stop SSE listener and clean up.""" @@ -400,6 +437,7 @@ class SignalAdapter(BasePlatformAdapter): ) sender_name = envelope_data.get("sourceName", "") sender_uuid = envelope_data.get("sourceUuid", "") + self._remember_recipient_identifiers(sender, sender_uuid) if not sender: logger.debug("Signal: ignoring envelope with no sender") @@ -518,6 +556,64 @@ class SignalAdapter(BasePlatformAdapter): await self.handle_message(event) + def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None: + """Cache any number↔UUID mapping observed from Signal envelopes.""" + if not number or not service_id or not _is_signal_service_id(service_id): + return + self._recipient_uuid_by_number[number] = service_id + self._recipient_number_by_uuid[service_id] = number + + def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]: + """Best-effort extraction of a Signal service ID from listContacts output.""" + if not isinstance(contact, dict): + return None + + number = contact.get("number") + recipient = contact.get("recipient") + service_id = contact.get("uuid") or contact.get("serviceId") + if not service_id: + profile = contact.get("profile") + if isinstance(profile, dict): + service_id = profile.get("serviceId") or profile.get("uuid") + + if service_id and _is_signal_service_id(service_id): + matches_number = number == phone_number or recipient == phone_number + if matches_number: + return service_id + return None + + async def _resolve_recipient(self, chat_id: str) -> str: + """Return the preferred Signal recipient identifier for a direct chat.""" + if ( + not chat_id + or chat_id.startswith("group:") + or _is_signal_service_id(chat_id) + or not _looks_like_e164_number(chat_id) + ): + return chat_id + + cached = self._recipient_uuid_by_number.get(chat_id) + if cached: + return cached + + async with self._recipient_cache_lock: + cached = self._recipient_uuid_by_number.get(chat_id) + if cached: + return cached + + contacts = await self._rpc("listContacts", { + "account": self.account, + "allRecipients": True, + }) + if isinstance(contacts, list): + for contact in contacts: + number = contact.get("number") if isinstance(contact, dict) else None + service_id = self._extract_contact_uuid(contact, chat_id) + if number and service_id: + self._remember_recipient_identifiers(number, service_id) + + return self._recipient_uuid_by_number.get(chat_id, chat_id) + # ------------------------------------------------------------------ # Attachment Handling # ------------------------------------------------------------------ @@ -633,7 +729,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] result = await self._rpc("send", params) @@ -684,7 +780,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] fails = self._typing_failures.get(chat_id, 0) result = await self._rpc( @@ -745,7 +841,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] result = await self._rpc("send", params) if result is not None: @@ -784,7 +880,7 @@ class SignalAdapter(BasePlatformAdapter): if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: - params["recipient"] = [chat_id] + params["recipient"] = [await self._resolve_recipient(chat_id)] result = await self._rpc("send", params) if result is not None: diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index ba444c53e8..191689a5ae 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -38,6 +38,7 @@ from gateway.platforms.base import ( BasePlatformAdapter, MessageEvent, MessageType, + ProcessingOutcome, SendResult, SUPPORTED_DOCUMENT_TYPES, safe_url_for_log, @@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter): # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache self._thread_context_cache: Dict[str, _ThreadContextCache] = {} self._THREAD_CACHE_TTL = 60.0 + # Track message IDs that should get reaction lifecycle (DMs / @mentions). + self._reacting_message_ids: set = set() + # Track active assistant thread status indicators so stop_typing can + # clear them (chat_id → thread_ts). + self._active_status_threads: Dict[str, str] = {} async def connect(self) -> bool: """Connect to Slack via Socket Mode.""" @@ -150,9 +156,11 @@ class SlackAdapter(BasePlatformAdapter): except Exception as e: logger.warning("[Slack] Failed to read %s: %s", tokens_file, e) + lock_acquired = False try: if not self._acquire_platform_lock('slack-app-token', app_token, 'Slack app token'): return False + lock_acquired = True # First token is the primary — used for AsyncApp / Socket Mode primary_token = bot_tokens[0] @@ -228,6 +236,9 @@ class SlackAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.error("[Slack] Connection failed: %s", e, exc_info=True) return False + finally: + if lock_acquired and not self._running: + self._release_platform_lock() async def disconnect(self) -> None: """Disconnect from Slack.""" @@ -316,6 +327,8 @@ class SlackAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Slack message.""" if not self._app: @@ -355,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter): if not thread_ts: return # Can only set status in a thread context + self._active_status_threads[chat_id] = thread_ts try: await self._get_client(chat_id).assistant_threads_setStatus( channel_id=chat_id, @@ -366,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter): # in an assistant-enabled context. Falls back to reactions. logger.debug("[Slack] assistant.threads.setStatus failed: %s", e) + async def stop_typing(self, chat_id: str) -> None: + """Clear the assistant thread status indicator.""" + if not self._app: + return + thread_ts = self._active_status_threads.pop(chat_id, None) + if not thread_ts: + return + try: + await self._get_client(chat_id).assistant_threads_setStatus( + channel_id=chat_id, + thread_ts=thread_ts, + status="", + ) + except Exception as e: + logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e) + def _dm_top_level_threads_as_sessions(self) -> bool: """Whether top-level Slack DMs get per-message session threads. @@ -577,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter): logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e) return False + def _reactions_enabled(self) -> bool: + """Check if message reactions are enabled via config/env.""" + return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no") + + async def on_processing_start(self, event: MessageEvent) -> None: + """Add an in-progress reaction when message processing begins.""" + if not self._reactions_enabled(): + return + ts = getattr(event, "message_id", None) + if not ts or ts not in self._reacting_message_ids: + return + channel_id = getattr(event.source, "chat_id", None) + if channel_id: + await self._add_reaction(channel_id, ts, "eyes") + + async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None: + """Swap the in-progress reaction for a final success/failure reaction.""" + if not self._reactions_enabled(): + return + ts = getattr(event, "message_id", None) + if not ts or ts not in self._reacting_message_ids: + return + self._reacting_message_ids.discard(ts) + channel_id = getattr(event.source, "chat_id", None) + if not channel_id: + return + await self._remove_reaction(channel_id, ts, "eyes") + if outcome == ProcessingOutcome.SUCCESS: + await self._add_reaction(channel_id, ts, "white_check_mark") + elif outcome == ProcessingOutcome.FAILURE: + await self._add_reaction(channel_id, ts, "x") + # ----- User identity resolution ----- async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str: @@ -1206,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter): # Only react when bot is directly addressed (DM or @mention). # In listen-all channels (require_mention=false), reacting to every # casual message would be noisy. - _should_react = is_dm or is_mentioned - + _should_react = (is_dm or is_mentioned) and self._reactions_enabled() if _should_react: - await self._add_reaction(channel_id, ts, "eyes") + self._reacting_message_ids.add(ts) await self.handle_message(msg_event) - if _should_react: - await self._remove_reaction(channel_id, ts, "eyes") - await self._add_reaction(channel_id, ts, "white_check_mark") - # ----- Approval button support (Block Kit) ----- async def send_exec_approval( @@ -1593,11 +1650,9 @@ class SlackAdapter(BasePlatformAdapter): async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str: """Download a Slack file using the bot token for auth, with retry.""" - import asyncio import httpx bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token - last_exc = None async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: for attempt in range(3): @@ -1627,7 +1682,6 @@ class SlackAdapter(BasePlatformAdapter): from gateway.platforms.base import cache_image_from_bytes return cache_image_from_bytes(response.content, ext) except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < 2: @@ -1636,15 +1690,12 @@ class SlackAdapter(BasePlatformAdapter): await asyncio.sleep(1.5 * (attempt + 1)) continue raise - raise last_exc async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes: """Download a Slack file and return raw bytes, with retry.""" - import asyncio import httpx bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token - last_exc = None async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: for attempt in range(3): @@ -1656,7 +1707,6 @@ class SlackAdapter(BasePlatformAdapter): response.raise_for_status() return response.content except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < 2: @@ -1665,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter): await asyncio.sleep(1.5 * (attempt + 1)) continue raise - raise last_exc # ── Channel mention gating ───────────────────────────────────────────── diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index f71614054c..bec0d690a3 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -11,6 +11,7 @@ import asyncio import json import logging import os +import tempfile import html as _html import re from typing import Dict, List, Optional, Any @@ -70,8 +71,10 @@ from gateway.platforms.base import ( SendResult, cache_image_from_bytes, cache_audio_from_bytes, + cache_video_from_bytes, cache_document_from_bytes, resolve_proxy_url, + SUPPORTED_VIDEO_TYPES, SUPPORTED_DOCUMENT_TYPES, utf16_len, _prefix_within_utf16_limit, @@ -493,6 +496,13 @@ class TelegramAdapter(BasePlatformAdapter): "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)", self.name, name, chat_id, ) + elif "not a forum" in error_text or "forums_disabled" in error_text: + logger.warning( + "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. " + "The user must open the DM with this bot in Telegram, tap the bot name " + "at the top, and enable 'Topics' in chat settings before topics can be created.", + self.name, name, chat_id, + ) else: logger.warning( "[%s] Failed to create DM topic '%s' in chat %s: %s", @@ -534,8 +544,23 @@ class TelegramAdapter(BasePlatformAdapter): break if changed: - with open(config_path, "w") as f: - _yaml.dump(config, f, default_flow_style=False, sort_keys=False) + fd, tmp_path = tempfile.mkstemp( + dir=str(config_path.parent), + suffix=".tmp", + prefix=".config_", + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + _yaml.dump(config, f, default_flow_style=False, sort_keys=False) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, config_path) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise logger.info( "[%s] Persisted thread_id=%s for topic '%s' in config.yaml", self.name, thread_id, topic_name, @@ -769,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter): # Telegram pushes updates to our HTTP endpoint. This # enables cloud platforms (Fly.io, Railway) to auto-wake # suspended machines on inbound HTTP traffic. + # + # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it, + # python-telegram-bot passes secret_token=None and the + # webhook endpoint accepts any HTTP POST — attackers can + # inject forged updates as if from Telegram. Refuse to + # start rather than silently run in fail-open mode. + # See GHSA-3vpc-7q5r-276h. webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443")) - webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None + webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() + if not webhook_secret: + raise RuntimeError( + "TELEGRAM_WEBHOOK_SECRET is required when " + "TELEGRAM_WEBHOOK_URL is set. Without it, the " + "webhook endpoint accepts forged updates from " + "anyone who can reach it — see " + "https://github.com/NousResearch/hermes-agent/" + "security/advisories/GHSA-3vpc-7q5r-276h.\n\n" + "Generate a secret and set it in your .env:\n" + " export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n" + "Then register it with Telegram when setting the " + "webhook via setWebhook's secret_token parameter." + ) from urllib.parse import urlparse webhook_path = urlparse(webhook_url).path or "/telegram" @@ -1081,6 +1126,8 @@ class TelegramAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent Telegram message.""" if not self._bot: @@ -1657,6 +1704,21 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as exc: logger.error("Failed to write update response from callback: %s", exc) + def _missing_media_path_error(self, label: str, path: str) -> str: + """Build an actionable file-not-found error for gateway MEDIA delivery. + + Paths like /workspace/... or /output/... often only exist inside the + Docker sandbox, while the gateway process runs on the host. + """ + error = f"{label} file not found: {path}" + if path.startswith(("/workspace/", "/output/", "/outputs/")): + error += ( + " (path may only exist inside the Docker sandbox. " + "Bind-mount a host directory and emit the host-visible " + "path in MEDIA: for gateway file delivery.)" + ) + return error + async def send_voice( self, chat_id: str, @@ -1671,9 +1733,8 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=False, error="Not connected") try: - import os if not os.path.exists(audio_path): - return SendResult(success=False, error=f"Audio file not found: {audio_path}") + return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path)) with open(audio_path, "rb") as audio_file: # .ogg files -> send as voice (round playable bubble) @@ -1720,9 +1781,8 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=False, error="Not connected") try: - import os if not os.path.exists(image_path): - return SendResult(success=False, error=f"Image file not found: {image_path}") + return SendResult(success=False, error=self._missing_media_path_error("Image", image_path)) _thread = self._metadata_thread_id(metadata) with open(image_path, "rb") as image_file: @@ -1759,7 +1819,7 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(file_path): - return SendResult(success=False, error=f"File not found: {file_path}") + return SendResult(success=False, error=self._missing_media_path_error("File", file_path)) display_name = file_name or os.path.basename(file_path) _thread = self._metadata_thread_id(metadata) @@ -1793,7 +1853,7 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(video_path): - return SendResult(success=False, error=f"Video file not found: {video_path}") + return SendResult(success=False, error=self._missing_media_path_error("Video", video_path)) _thread = self._metadata_thread_id(metadata) with open(video_path, "rb") as f: @@ -2033,7 +2093,7 @@ class TelegramAdapter(BasePlatformAdapter): url = m.group(2).replace('\\', '\\\\').replace(')', '\\)') return _ph(f'[{display}]({url})') - text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text) + text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text) # 4) Convert markdown headers (## Title) → bold *Title* def _convert_header(m): @@ -2241,22 +2301,27 @@ class TelegramAdapter(BasePlatformAdapter): bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower() bot_id = getattr(self._bot, "id", None) + expected = f"@{bot_username}" if bot_username else None def _iter_sources(): yield getattr(message, "text", None) or "", getattr(message, "entities", None) or [] yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or [] + # Telegram parses mentions server-side and emits MessageEntity objects + # (type=mention for @username, type=text_mention for @FirstName targeting + # a user without a public username). Only those entities are authoritative — + # raw substring matches like "foo@hermes_bot.example" are not mentions + # (bug #12545). Entities also correctly handle @handles inside URLs, code + # blocks, and quoted text, where a regex scan would over-match. for source_text, entities in _iter_sources(): - if bot_username and f"@{bot_username}" in source_text.lower(): - return True for entity in entities: entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower() - if entity_type == "mention" and bot_username: + if entity_type == "mention" and expected: offset = int(getattr(entity, "offset", -1)) length = int(getattr(entity, "length", 0)) if offset < 0 or length <= 0: continue - if source_text[offset:offset + length].strip().lower() == f"@{bot_username}": + if source_text[offset:offset + length].strip().lower() == expected: return True elif entity_type == "text_mention": user = getattr(entity, "user", None) @@ -2288,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter): DMs remain unrestricted. Group/supergroup messages are accepted when: - the chat is explicitly allowlisted in ``free_response_chats`` - ``require_mention`` is disabled - - the message is a command - the message replies to the bot - the bot is @mentioned - the text/caption matches a configured regex wake-word pattern + + When ``require_mention`` is enabled, slash commands are not given + special treatment — they must pass the same mention/reply checks + as any other group message. Users can still trigger commands via + the Telegram bot menu (``/command@botname``) or by explicitly + mentioning the bot (``@botname /command``), both of which are + recognised as mentions by :meth:`_message_mentions_bot`. """ if not self._is_group_chat(message): return True @@ -2306,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter): return True if not self._telegram_require_mention(): return True - if is_command: - return True if self._is_reply_to_bot(message): return True if self._message_mentions_bot(message): @@ -2590,6 +2659,23 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as e: logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True) + elif msg.video: + try: + file_obj = await msg.video.get_file() + video_bytes = await file_obj.download_as_bytearray() + ext = ".mp4" + if getattr(file_obj, "file_path", None): + for candidate in SUPPORTED_VIDEO_TYPES: + if file_obj.file_path.lower().endswith(candidate): + ext = candidate + break + cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext) + event.media_urls = [cached_path] + event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")] + logger.info("[Telegram] Cached user video at %s", cached_path) + except Exception as e: + logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True) + # Download document files to cache for agent processing elif msg.document: doc = msg.document @@ -2606,6 +2692,21 @@ class TelegramAdapter(BasePlatformAdapter): mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()} ext = mime_to_ext.get(doc.mime_type, "") + if not ext and doc.mime_type: + video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()} + ext = video_mime_to_ext.get(doc.mime_type, "") + + if ext in SUPPORTED_VIDEO_TYPES: + file_obj = await doc.get_file() + video_bytes = await file_obj.download_as_bytearray() + cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext) + event.media_urls = [cached_path] + event.media_types = [SUPPORTED_VIDEO_TYPES[ext]] + event.message_type = MessageType.VIDEO + logger.info("[Telegram] Cached user video document at %s", cached_path) + await self.handle_message(event) + return + # Check if supported if ext not in SUPPORTED_DOCUMENT_TYPES: supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys())) @@ -2744,13 +2845,11 @@ class TelegramAdapter(BasePlatformAdapter): logger.info("[Telegram] Analyzing sticker at %s", cached_path) from tools.vision_tools import vision_analyze_tool - import json as _json - result_json = await vision_analyze_tool( image_url=cached_path, user_prompt=STICKER_VISION_PROMPT, ) - result = _json.loads(result_json) + result = json.loads(result_json) if result.get("success"): description = result.get("analysis", "a sticker") diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py index c37445b17e..e3a736a451 100644 --- a/gateway/platforms/webhook.py +++ b/gateway/platforms/webhook.py @@ -13,6 +13,10 @@ Each route defines: - skills: optional list of skills to load for the agent - deliver: where to send the response (github_comment, telegram, etc.) - deliver_extra: additional delivery config (repo, pr_number, chat_id) + - deliver_only: if true, skip the agent — the rendered prompt IS the + message that gets delivered. Use for external push notifications + (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost + and sub-second delivery matter more than agent reasoning. Security: - HMAC secret is required per route (validated at startup) @@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter): f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'." ) + # deliver_only routes bypass the agent — the POST body becomes a + # direct push notification via the configured delivery target. + # Validate up-front so misconfiguration surfaces at startup rather + # than on the first webhook POST. + if route.get("deliver_only"): + deliver = route.get("deliver", "log") + if not deliver or deliver == "log": + raise ValueError( + f"[webhook] Route '{name}' has deliver_only=true but " + f"deliver is '{deliver}'. Direct delivery requires a " + f"real target (telegram, discord, slack, github_comment, etc.)." + ) + app = web.Application() app.router.add_get("/health", self._handle_health) app.router.add_post("/webhooks/{route_name}", self._handle_webhook) @@ -296,24 +313,14 @@ class WebhookAdapter(BasePlatformAdapter): {"error": "Payload too large"}, status=413 ) - # ── Rate limiting ──────────────────────────────────────── - now = time.time() - window = self._rate_counts.setdefault(route_name, []) - window[:] = [t for t in window if now - t < 60] - if len(window) >= self._rate_limit: - return web.json_response( - {"error": "Rate limit exceeded"}, status=429 - ) - window.append(now) - - # Read body + # Read body (must be done before any validation) try: raw_body = await request.read() except Exception as e: logger.error("[webhook] Failed to read body: %s", e) return web.json_response({"error": "Bad request"}, status=400) - # Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode) + # Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode) secret = route_config.get("secret", self._global_secret) if secret and secret != _INSECURE_NO_AUTH: if not self._validate_signature(request, raw_body, secret): @@ -324,6 +331,16 @@ class WebhookAdapter(BasePlatformAdapter): {"error": "Invalid signature"}, status=401 ) + # ── Rate limiting (after auth) ─────────────────────────── + now = time.time() + window = self._rate_counts.setdefault(route_name, []) + window[:] = [t for t in window if now - t < 60] + if len(window) >= self._rate_limit: + return web.json_response( + {"error": "Rate limit exceeded"}, status=429 + ) + window.append(now) + # Parse payload try: payload = json.loads(raw_body) @@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter): ) self._seen_deliveries[delivery_id] = now + # ── Direct delivery mode (deliver_only) ───────────────── + # Skip the agent entirely — the rendered prompt IS the message we + # deliver. Use case: external services (Supabase, monitoring, + # cron jobs, other agents) that need to push a plain notification + # to a user's chat with zero LLM cost. Reuses the same HMAC auth, + # rate limiting, idempotency, and template rendering as agent mode. + if route_config.get("deliver_only"): + delivery = { + "deliver": route_config.get("deliver", "log"), + "deliver_extra": self._render_delivery_extra( + route_config.get("deliver_extra", {}), payload + ), + "payload": payload, + } + logger.info( + "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s", + event_type, + route_name, + delivery["deliver"], + len(prompt), + delivery_id, + ) + try: + result = await self._direct_deliver(prompt, delivery) + except Exception: + logger.exception( + "[webhook] direct-deliver failed route=%s delivery=%s", + route_name, + delivery_id, + ) + return web.json_response( + {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id}, + status=502, + ) + + if result.success: + return web.json_response( + { + "status": "delivered", + "route": route_name, + "target": delivery["deliver"], + "delivery_id": delivery_id, + }, + status=200, + ) + # Delivery attempted but target rejected it — surface as 502 + # with a generic error (don't leak adapter-level detail). + logger.warning( + "[webhook] direct-deliver target rejected route=%s target=%s error=%s", + route_name, + delivery["deliver"], + result.error, + ) + return web.json_response( + {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id}, + status=502, + ) + # Use delivery_id in session key so concurrent webhooks on the # same route get independent agent runs (not queued/interrupted). session_chat_id = f"webhook:{route_name}:{delivery_id}" @@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter): # Response delivery # ------------------------------------------------------------------ + async def _direct_deliver( + self, content: str, delivery: dict + ) -> SendResult: + """Deliver *content* directly without invoking the agent. + + Used by ``deliver_only`` routes: the rendered template becomes the + literal message body, and we dispatch to the same delivery helpers + that the agent-mode ``send()`` flow uses. All target types that + work in agent mode work here — Telegram, Discord, Slack, GitHub + PR comments, etc. + """ + deliver_type = delivery.get("deliver", "log") + + if deliver_type == "log": + # Shouldn't reach here — startup validation rejects deliver_only + # with deliver=log — but guard defensively. + logger.info("[webhook] direct-deliver log-only: %s", content[:200]) + return SendResult(success=True) + + if deliver_type == "github_comment": + return await self._deliver_github_comment(content, delivery) + + # Fall through to the cross-platform dispatcher, which validates the + # target name and routes via the gateway runner. + return await self._deliver_cross_platform( + deliver_type, content, delivery + ) + async def _deliver_github_comment( self, content: str, delivery: dict ) -> SendResult: diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index 9e5dd04e0d..a6506d18a9 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -624,13 +624,16 @@ class WeComAdapter(BasePlatformAdapter): msgtype = str(body.get("msgtype") or "").lower() if msgtype == "mixed": - mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {} - items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else [] + _raw_mixed = body.get("mixed") + mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {} + _raw_items = mixed.get("msg_item") + items = _raw_items if isinstance(_raw_items, list) else [] for item in items: if not isinstance(item, dict): continue if str(item.get("msgtype") or "").lower() == "text": - text_block = item.get("text") if isinstance(item.get("text"), dict) else {} + _raw_text = item.get("text") + text_block = _raw_text if isinstance(_raw_text, dict) else {} content = str(text_block.get("content") or "").strip() if content: text_parts.append(content) @@ -672,8 +675,10 @@ class WeComAdapter(BasePlatformAdapter): msgtype = str(body.get("msgtype") or "").lower() if msgtype == "mixed": - mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {} - items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else [] + _raw_mixed = body.get("mixed") + mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {} + _raw_items = mixed.get("msg_item") + items = _raw_items if isinstance(_raw_items, list) else [] for item in items: if not isinstance(item, dict): continue @@ -1459,3 +1464,134 @@ class WeComAdapter(BasePlatformAdapter): "name": chat_id, "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm", } + + +# ------------------------------------------------------------------ +# QR code scan flow for obtaining bot credentials +# ------------------------------------------------------------------ + +_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate" +_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result" +_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode=" +_QR_POLL_INTERVAL = 3 # seconds +_QR_POLL_TIMEOUT = 300 # 5 minutes + + +def qr_scan_for_bot_info( + *, + timeout_seconds: int = _QR_POLL_TIMEOUT, +) -> Optional[Dict[str, str]]: + """Run the WeCom QR scan flow to obtain bot_id and secret. + + Fetches a QR code from WeCom, renders it in the terminal, and polls + until the user scans it or the timeout expires. + + Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on + failure or timeout. + + Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints + used here are not part of WeCom's public developer API — they back the + admin-console web UI's bot-creation flow and may change without notice. + The same pattern is used by the feishu/dingtalk QR setup wizards. + """ + try: + import urllib.request + import urllib.parse + except ImportError: # pragma: no cover + logger.error("urllib is required for WeCom QR scan") + return None + + generate_url = f"{_QR_GENERATE_URL}?source=hermes" + + # ── Step 1: Fetch QR code ── + print(" Connecting to WeCom...", end="", flush=True) + try: + req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"}) + with urllib.request.urlopen(req, timeout=15) as resp: + raw = json.loads(resp.read().decode("utf-8")) + except Exception as exc: + logger.error("WeCom QR: failed to fetch QR code: %s", exc) + print(f" failed: {exc}") + return None + + data = raw.get("data") or {} + scode = str(data.get("scode") or "").strip() + auth_url = str(data.get("auth_url") or "").strip() + + if not scode or not auth_url: + logger.error("WeCom QR: unexpected response format: %s", raw) + print(" failed: unexpected response format") + return None + + print(" done.") + + # ── Step 2: Render QR code in terminal ── + print() + qr_rendered = False + try: + import qrcode as _qrcode + qr = _qrcode.QRCode() + qr.add_data(auth_url) + qr.make(fit=True) + qr.print_ascii(invert=True) + qr_rendered = True + except ImportError: + pass + except Exception: + pass + + page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}" + if qr_rendered: + print(f"\n Scan the QR code above, or open this URL directly:\n {page_url}") + else: + print(f" Open this URL in WeCom on your phone:\n\n {page_url}\n") + print(" Tip: pip install qrcode to display a scannable QR code here next time") + print() + print(" Fetching configuration results...", end="", flush=True) + + # ── Step 3: Poll for result ── + import time + deadline = time.time() + timeout_seconds + query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}" + poll_count = 0 + + while time.time() < deadline: + try: + req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"}) + with urllib.request.urlopen(req, timeout=10) as resp: + result = json.loads(resp.read().decode("utf-8")) + except Exception as exc: + logger.debug("WeCom QR poll error: %s", exc) + time.sleep(_QR_POLL_INTERVAL) + continue + + poll_count += 1 + # Print a dot on every poll so progress is visible within 3s. + print(".", end="", flush=True) + + result_data = result.get("data") or {} + status = str(result_data.get("status") or "").lower() + + if status == "success": + print() # newline after "Fetching configuration results..." dots + bot_info = result_data.get("bot_info") or {} + bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip() + secret = str(bot_info.get("secret") or "").strip() + if bot_id and secret: + return {"bot_id": bot_id, "secret": secret} + logger.warning( + "WeCom QR: scan reported success but bot_info missing or incomplete: %s", + result_data, + ) + print( + " QR scan reported success but no bot credentials were returned.\n" + " This usually means the bot was not actually created on the WeCom side.\n" + " Falling back to manual credential entry." + ) + return None + + time.sleep(_QR_POLL_INTERVAL) + + print() # newline after dots + print(f" QR scan timed out ({timeout_seconds // 60} minutes). Please try again.") + return None diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index d1de5b8568..a82417a601 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -66,6 +66,37 @@ def _kill_port_process(port: int) -> None: except Exception: pass + +def _terminate_bridge_process(proc, *, force: bool = False) -> None: + """Terminate the bridge process using process-tree semantics where possible.""" + if _IS_WINDOWS: + cmd = ["taskkill", "/PID", str(proc.pid), "/T"] + if force: + cmd.append("/F") + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=10, + ) + except FileNotFoundError: + if force: + proc.kill() + else: + proc.terminate() + return + + if result.returncode != 0: + details = (result.stderr or result.stdout or "").strip() + raise OSError(details or f"taskkill failed for PID {proc.pid}") + return + + import signal + + sig = signal.SIGTERM if not force else signal.SIGKILL + os.killpg(os.getpgid(proc.pid), sig) + import sys sys.path.insert(0, str(Path(__file__).resolve().parents[2])) @@ -118,6 +149,10 @@ class WhatsAppAdapter(BasePlatformAdapter): - bridge_script: Path to the Node.js bridge script - bridge_port: Port for HTTP communication (default: 3000) - session_path: Path to store WhatsApp session data + - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open") + - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist") + - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open") + - group_allow_from: List of group JIDs allowed (when group_policy="allowlist") """ # WhatsApp message limits — practical UX limit, not protocol max. @@ -140,6 +175,10 @@ class WhatsAppAdapter(BasePlatformAdapter): get_hermes_dir("platforms/whatsapp/session", "whatsapp/session") )) self._reply_prefix: Optional[str] = config.extra.get("reply_prefix") + self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower() + self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom")) + self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower() + self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom")) self._mention_patterns = self._compile_mention_patterns() self._message_queue: asyncio.Queue = asyncio.Queue() self._bridge_log_fh = None @@ -163,6 +202,33 @@ class WhatsAppAdapter(BasePlatformAdapter): return {str(part).strip() for part in raw if str(part).strip()} return {part.strip() for part in str(raw).split(",") if part.strip()} + @staticmethod + def _coerce_allow_list(raw) -> set[str]: + """Parse allow_from / group_allow_from from config or env var.""" + if raw is None: + return set() + if isinstance(raw, list): + return {str(part).strip() for part in raw if str(part).strip()} + return {part.strip() for part in str(raw).split(",") if part.strip()} + + def _is_dm_allowed(self, sender_id: str) -> bool: + """Check whether a DM from the given sender should be processed.""" + if self._dm_policy == "disabled": + return False + if self._dm_policy == "allowlist": + return sender_id in self._allow_from + # "open" — all DMs allowed + return True + + def _is_group_allowed(self, chat_id: str) -> bool: + """Check whether a group chat should be processed.""" + if self._group_policy == "disabled": + return False + if self._group_policy == "allowlist": + return chat_id in self._group_allow_from + # "open" — all groups allowed + return True + def _compile_mention_patterns(self): patterns = self.config.extra.get("mention_patterns") if patterns is None: @@ -255,8 +321,18 @@ class WhatsAppAdapter(BasePlatformAdapter): return cleaned.strip() or text def _should_process_message(self, data: Dict[str, Any]) -> bool: - if not data.get("isGroup"): + is_group = data.get("isGroup", False) + if is_group: + chat_id = str(data.get("chatId") or "") + if not self._is_group_allowed(chat_id): + return False + else: + sender_id = str(data.get("senderId") or data.get("from") or "") + if not self._is_dm_allowed(sender_id): + return False + # DMs that pass the policy gate are always processed return True + # Group messages: check mention / free-response settings chat_id = str(data.get("chatId") or "") if chat_id in self._whatsapp_free_response_chats(): return True @@ -289,39 +365,40 @@ class WhatsAppAdapter(BasePlatformAdapter): logger.info("[%s] Bridge found at %s", self.name, bridge_path) # Acquire scoped lock to prevent duplicate sessions + lock_acquired = False try: if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'): return False + lock_acquired = True except Exception as e: logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e) - # Auto-install npm dependencies if node_modules doesn't exist - bridge_dir = bridge_path.parent - if not (bridge_dir / "node_modules").exists(): - print(f"[{self.name}] Installing WhatsApp bridge dependencies...") - try: - install_result = subprocess.run( - ["npm", "install", "--silent"], - cwd=str(bridge_dir), - capture_output=True, - text=True, - timeout=60, - ) - if install_result.returncode != 0: - print(f"[{self.name}] npm install failed: {install_result.stderr}") - return False - print(f"[{self.name}] Dependencies installed") - except Exception as e: - print(f"[{self.name}] Failed to install dependencies: {e}") - return False - try: + # Auto-install npm dependencies if node_modules doesn't exist + bridge_dir = bridge_path.parent + if not (bridge_dir / "node_modules").exists(): + print(f"[{self.name}] Installing WhatsApp bridge dependencies...") + try: + install_result = subprocess.run( + ["npm", "install", "--silent"], + cwd=str(bridge_dir), + capture_output=True, + text=True, + timeout=60, + ) + if install_result.returncode != 0: + print(f"[{self.name}] npm install failed: {install_result.stderr}") + return False + print(f"[{self.name}] Dependencies installed") + except Exception as e: + print(f"[{self.name}] Failed to install dependencies: {e}") + return False + # Ensure session directory exists self._session_path.mkdir(parents=True, exist_ok=True) # Check if bridge is already running and connected import aiohttp - import asyncio try: async with aiohttp.ClientSession() as session: async with session.get( @@ -452,10 +529,13 @@ class WhatsAppAdapter(BasePlatformAdapter): return True except Exception as e: - self._release_platform_lock() logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True) - self._close_bridge_log() return False + finally: + if not self._running: + if lock_acquired: + self._release_platform_lock() + self._close_bridge_log() def _close_bridge_log(self) -> None: """Close the bridge log file handle if open.""" @@ -487,22 +567,14 @@ class WhatsAppAdapter(BasePlatformAdapter): """Stop the WhatsApp bridge and clean up any orphaned processes.""" if self._bridge_process: try: - # Kill the entire process group so child node processes die too - import signal try: - if _IS_WINDOWS: - self._bridge_process.terminate() - else: - os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM) + _terminate_bridge_process(self._bridge_process, force=False) except (ProcessLookupError, PermissionError): self._bridge_process.terminate() await asyncio.sleep(1) if self._bridge_process.poll() is None: try: - if _IS_WINDOWS: - self._bridge_process.kill() - else: - os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL) + _terminate_bridge_process(self._bridge_process, force=True) except (ProcessLookupError, PermissionError): self._bridge_process.kill() except Exception as e: @@ -655,6 +727,8 @@ class WhatsAppAdapter(BasePlatformAdapter): chat_id: str, message_id: str, content: str, + *, + finalize: bool = False, ) -> SendResult: """Edit a previously sent message via the WhatsApp bridge.""" if not self._running or not self._http_session: @@ -766,6 +840,17 @@ class WhatsAppAdapter(BasePlatformAdapter): """Send a video natively via bridge — plays inline in WhatsApp.""" return await self._send_media_to_bridge(chat_id, video_path, "video", caption) + async def send_voice( + self, + chat_id: str, + audio_path: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send an audio file as a WhatsApp voice message via bridge.""" + return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption) + async def send_document( self, chat_id: str, diff --git a/gateway/run.py b/gateway/run.py index b72e95eb83..617a38418e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -30,6 +30,8 @@ from pathlib import Path from datetime import datetime from typing import Dict, Optional, Any, List +from agent.account_usage import fetch_account_usage, render_account_usage_lines + # --- Agent cache tuning --------------------------------------------------- # Bounds the per-session AIAgent cache to prevent unbounded growth in # long-lived gateways (each AIAgent holds LLM clients, tool schemas, @@ -86,7 +88,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) # Resolve Hermes home directory (respects HERMES_HOME override) from hermes_constants import get_hermes_home -from utils import atomic_yaml_write, is_truthy_value +from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value _hermes_home = get_hermes_home() # Load environment variables from ~/.hermes/.env first. @@ -96,6 +98,10 @@ from hermes_cli.env_loader import load_hermes_dotenv _env_path = _hermes_home / '.env' load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env') + +_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P.+):(?P/[^:]+?)(?::(?P[^:]+))?$") +_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"} + # Bridge config.yaml values into the environment so os.getenv() picks them up. # config.yaml is authoritative for terminal settings — overrides .env. _config_path = _hermes_home / 'config.yaml' @@ -275,6 +281,7 @@ from gateway.session import ( build_session_context, build_session_context_prompt, build_session_key, + is_shared_multi_user_session, ) from gateway.delivery import DeliveryRouter from gateway.platforms.base import ( @@ -398,6 +405,33 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None: return adapter.get_pending_message(session_key) +_INTERRUPT_REASON_STOP = "Stop requested" +_INTERRUPT_REASON_RESET = "Session reset requested" +_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)" +_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected" +_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down" +_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting" + +_CONTROL_INTERRUPT_MESSAGES = frozenset( + { + _INTERRUPT_REASON_STOP.lower(), + _INTERRUPT_REASON_RESET.lower(), + _INTERRUPT_REASON_TIMEOUT.lower(), + _INTERRUPT_REASON_SSE_DISCONNECT.lower(), + _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(), + _INTERRUPT_REASON_GATEWAY_RESTART.lower(), + } +) + + +def _is_control_interrupt_message(message: Optional[str]) -> bool: + """Return True when an interrupt message is internal control flow.""" + if not message: + return False + normalized = " ".join(str(message).strip().split()).lower() + return normalized in _CONTROL_INTERRUPT_MESSAGES + + def _check_unavailable_skill(command_name: str) -> str | None: """Check if a command matches a known-but-inactive skill. @@ -585,6 +619,7 @@ class GatewayRunner: def __init__(self, config: Optional[GatewayConfig] = None): self.config = config or load_gateway_config() self.adapters: Dict[Platform, BasePlatformAdapter] = {} + self._warn_if_docker_media_delivery_is_risky() # Load ephemeral config from config.yaml / env vars. # Both are injected at API-call time only and never persisted. @@ -597,7 +632,6 @@ class GatewayRunner: self._restart_drain_timeout = self._load_restart_drain_timeout() self._provider_routing = self._load_provider_routing() self._fallback_model = self._load_fallback_model() - self._smart_model_routing = self._load_smart_model_routing() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -625,6 +659,7 @@ class GatewayRunner: self._running_agents_ts: Dict[str, float] = {} # start timestamp per session self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce) + self._session_run_generation: Dict[str, int] = {} # Cache AIAgent instances per session to preserve prompt caching. # Without this, a new AIAgent is created per message, rebuilding the @@ -675,7 +710,26 @@ class GatewayRunner: self._session_db = SessionDB() except Exception as e: logger.debug("SQLite session store not available: %s", e) - + + # Opportunistic state.db maintenance: prune ended sessions older + # than sessions.retention_days + optional VACUUM. Tracks last-run + # in state_meta so it only actually executes once per + # sessions.min_interval_hours. Gateway is long-lived so blocking + # a few seconds once per day is acceptable; failures are logged + # but never raised. + if self._session_db is not None: + try: + from hermes_cli.config import load_config as _load_full_config + _sess_cfg = (_load_full_config().get("sessions") or {}) + if _sess_cfg.get("auto_prune", False): + self._session_db.maybe_auto_prune_and_vacuum( + retention_days=int(_sess_cfg.get("retention_days", 90)), + min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)), + vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)), + ) + except Exception as exc: + logger.debug("state.db auto-maintenance skipped: %s", exc) + # DM pairing store for code-based user authorization from gateway.pairing import PairingStore self.pairing_store = PairingStore() @@ -691,6 +745,53 @@ class GatewayRunner: self._background_tasks: set = set() + def _warn_if_docker_media_delivery_is_risky(self) -> None: + """Warn when Docker-backed gateways lack an explicit export mount. + + MEDIA delivery happens in the gateway process, so paths emitted by the model + must be readable from the host. A plain container-local path like + `/workspace/report.txt` or `/output/report.txt` often exists only inside + Docker, so users commonly need a dedicated export mount such as + `host-dir:/output`. + """ + if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker": + return + + connected = self.config.get_connected_platforms() + messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}] + if not messaging_platforms: + return + + raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip() + volumes: List[str] = [] + if raw_volumes: + try: + parsed = json.loads(raw_volumes) + if isinstance(parsed, list): + volumes = [str(v) for v in parsed if isinstance(v, str)] + except Exception: + logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True) + + has_explicit_output_mount = False + for spec in volumes: + match = _DOCKER_VOLUME_SPEC_RE.match(spec) + if not match: + continue + container_path = match.group("container") + if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS: + has_explicit_output_mount = True + break + + if has_explicit_output_mount: + return + + logger.warning( + "Docker backend is enabled for the messaging gateway but no explicit host-visible " + "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. " + "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail " + "for container-local paths like '/workspace/...' or '/output/...'." + ) + # -- Setup skill availability ---------------------------------------- @@ -707,6 +808,10 @@ class GatewayRunner: _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json" + def _voice_key(self, platform: Platform, chat_id: str) -> str: + """Return a platform-namespaced key for voice mode state.""" + return f"{platform.value}:{chat_id}" + def _load_voice_modes(self) -> Dict[str, str]: try: data = json.loads(self._VOICE_MODE_PATH.read_text()) @@ -717,11 +822,21 @@ class GatewayRunner: return {} valid_modes = {"off", "voice_only", "all"} - return { - str(chat_id): mode - for chat_id, mode in data.items() - if mode in valid_modes - } + result = {} + for chat_id, mode in data.items(): + if mode not in valid_modes: + continue + key = str(chat_id) + # Skip legacy unprefixed keys (warn and skip) + if ":" not in key: + logger.warning( + "Skipping legacy unprefixed voice mode key %r during migration. " + "Re-enable voice mode on that chat to rebuild the prefixed key.", + key, + ) + continue + result[key] = mode + return result def _save_voice_modes(self) -> None: try: @@ -747,9 +862,14 @@ class GatewayRunner: disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None) if not isinstance(disabled_chats, set): return + platform = getattr(adapter, "platform", None) + if not isinstance(platform, Platform): + return disabled_chats.clear() + prefix = f"{platform.value}:" disabled_chats.update( - chat_id for chat_id, mode in self._voice_mode.items() if mode == "off" + key[len(prefix):] for key, mode in self._voice_mode.items() + if mode == "off" and key.startswith(prefix) ) async def _safe_adapter_disconnect(self, adapter, platform) -> None: @@ -1002,11 +1122,16 @@ class GatewayRunner: return model, runtime_kwargs def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: - from agent.smart_model_routing import resolve_turn_route + """Build the effective model/runtime config for a single turn. + + Always uses the session's primary model/provider. If `/fast` is + enabled and the model supports Priority Processing / Anthropic fast + mode, attach `request_overrides` so the API call is marked + accordingly. + """ from hermes_cli.models import resolve_fast_mode_overrides - primary = { - "model": model, + runtime = { "api_key": runtime_kwargs.get("api_key"), "base_url": runtime_kwargs.get("base_url"), "provider": runtime_kwargs.get("provider"), @@ -1015,7 +1140,18 @@ class GatewayRunner: "args": list(runtime_kwargs.get("args") or []), "credential_pool": runtime_kwargs.get("credential_pool"), } - route = resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary) + route = { + "model": model, + "runtime": runtime, + "signature": ( + model, + runtime["provider"], + runtime["base_url"], + runtime["api_mode"], + runtime["command"], + tuple(runtime["args"]), + ), + } service_tier = getattr(self, "_service_tier", None) if not service_tier: @@ -1023,7 +1159,7 @@ class GatewayRunner: return route try: - overrides = resolve_fast_mode_overrides(route.get("model")) + overrides = resolve_fast_mode_overrides(route["model"]) except Exception: overrides = None route["request_overrides"] = overrides @@ -1152,7 +1288,6 @@ class GatewayRunner: the prefill_messages_file key in ~/.hermes/config.yaml. Relative paths are resolved from ~/.hermes/. """ - import json as _json file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") if not file_path: try: @@ -1174,7 +1309,7 @@ class GatewayRunner: return [] try: with open(path, "r", encoding="utf-8") as f: - data = _json.load(f) + data = json.load(f) if not isinstance(data, list): logger.warning("Prefill messages file must contain a JSON array: %s", path) return [] @@ -1381,20 +1516,6 @@ class GatewayRunner: pass return None - @staticmethod - def _load_smart_model_routing() -> dict: - """Load optional smart cheap-vs-strong model routing config.""" - try: - import yaml as _y - cfg_path = _hermes_home / "config.yaml" - if cfg_path.exists(): - with open(cfg_path, encoding="utf-8") as _f: - cfg = _y.safe_load(_f) or {} - return cfg.get("smart_model_routing", {}) or {} - except Exception: - pass - return {} - def _snapshot_running_agents(self) -> Dict[str, Any]: return { session_key: agent @@ -1567,12 +1688,32 @@ class GatewayRunner: notified: set = set() for session_key in active: - # Parse platform + chat_id from the session key. - _parsed = _parse_session_key(session_key) - if not _parsed: - continue - platform_str = _parsed["platform"] - chat_id = _parsed["chat_id"] + source = None + try: + if getattr(self, "session_store", None) is not None: + self.session_store._ensure_loaded() + entry = self.session_store._entries.get(session_key) + source = getattr(entry, "origin", None) if entry else None + except Exception as e: + logger.debug( + "Failed to load session origin for shutdown notification %s: %s", + session_key, + e, + ) + + if source is not None: + platform_str = source.platform.value + chat_id = source.chat_id + thread_id = source.thread_id + else: + # Fall back to parsing the session key when no persisted + # origin is available (legacy sessions/tests). + _parsed = _parse_session_key(session_key) + if not _parsed: + continue + platform_str = _parsed["platform"] + chat_id = _parsed["chat_id"] + thread_id = _parsed.get("thread_id") # Deduplicate: one notification per chat, even if multiple # sessions (different users/threads) share the same chat. @@ -1588,7 +1729,6 @@ class GatewayRunner: # Include thread_id if present so the message lands in the # correct forum topic / thread. - thread_id = _parsed.get("thread_id") metadata = {"thread_id": thread_id} if thread_id else None await adapter.send(chat_id, msg, metadata=metadata) @@ -1841,6 +1981,39 @@ class GatewayRunner: "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)." ) + # Discover Python plugins before shell hooks so plugin block + # decisions take precedence in tie cases. The CLI startup path + # does this via an explicit call in hermes_cli/main.py; the + # gateway lazily imports run_agent inside per-request handlers, + # so the discover_plugins() side-effect in model_tools.py is NOT + # guaranteed to have run by the time we reach this point. + try: + from hermes_cli.plugins import discover_plugins + discover_plugins() + except Exception: + logger.debug( + "plugin discovery failed at gateway startup", exc_info=True, + ) + + # Register declarative shell hooks from cli-config.yaml. Gateway + # has no TTY, so consent has to come from one of the three opt-in + # channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var, + # or hooks_auto_accept: true in config.yaml). We pass + # accept_hooks=False here and let register_from_config resolve + # the effective value from env + config itself — the CLI-side + # registration already honored --accept-hooks, and re-reading + # hooks_auto_accept here would just duplicate that lookup. + # Failures are logged but must never block gateway startup. + try: + from hermes_cli.config import load_config + from agent.shell_hooks import register_from_config + register_from_config(load_config(), accept_hooks=False) + except Exception: + logger.debug( + "shell-hook registration failed at gateway startup", + exc_info=True, + ) + # Discover and load event hooks self.hooks.discover_and_load() @@ -2441,7 +2614,7 @@ class GatewayRunner: _sk[:20], _e, ) self._interrupt_running_agents( - "Gateway restarting" if self._restart_requested else "Gateway shutting down" + _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN ) interrupt_deadline = asyncio.get_running_loop().time() + 5.0 while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline: @@ -2862,10 +3035,59 @@ class GatewayRunner: return bool(check_ids & allowed_ids) def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str: - """Return how unauthorized DMs should be handled for a platform.""" + """Return how unauthorized DMs should be handled for a platform. + + Resolution order: + 1. Explicit per-platform ``unauthorized_dm_behavior`` in config — always wins. + 2. Explicit global ``unauthorized_dm_behavior`` in config — wins when no per-platform. + 3. When an allowlist (``PLATFORM_ALLOWED_USERS`` or ``GATEWAY_ALLOWED_USERS``) is + configured, default to ``"ignore"`` — the allowlist signals that the owner has + deliberately restricted access; spamming unknown contacts with pairing codes + is both noisy and a potential info-leak. (#9337) + 4. No allowlist and no explicit config → ``"pair"`` (open-gateway default). + """ config = getattr(self, "config", None) - if config and hasattr(config, "get_unauthorized_dm_behavior"): - return config.get_unauthorized_dm_behavior(platform) + + # Check for an explicit per-platform override first. + if config and hasattr(config, "get_unauthorized_dm_behavior") and platform: + platform_cfg = config.platforms.get(platform) if hasattr(config, "platforms") else None + if platform_cfg and "unauthorized_dm_behavior" in getattr(platform_cfg, "extra", {}): + # Operator explicitly configured behavior for this platform — respect it. + return config.get_unauthorized_dm_behavior(platform) + + # Check for an explicit global config override. + if config and hasattr(config, "unauthorized_dm_behavior"): + if config.unauthorized_dm_behavior != "pair": # non-default → explicit override + return config.unauthorized_dm_behavior + + # No explicit override. Fall back to allowlist-aware default: + # if any allowlist is configured for this platform, silently drop + # unauthorized messages instead of sending pairing codes. + if platform: + platform_env_map = { + Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS", + Platform.DISCORD: "DISCORD_ALLOWED_USERS", + Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS", + Platform.SLACK: "SLACK_ALLOWED_USERS", + Platform.SIGNAL: "SIGNAL_ALLOWED_USERS", + Platform.EMAIL: "EMAIL_ALLOWED_USERS", + Platform.SMS: "SMS_ALLOWED_USERS", + Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS", + Platform.MATRIX: "MATRIX_ALLOWED_USERS", + Platform.DINGTALK: "DINGTALK_ALLOWED_USERS", + Platform.FEISHU: "FEISHU_ALLOWED_USERS", + Platform.WECOM: "WECOM_ALLOWED_USERS", + Platform.WECOM_CALLBACK: "WECOM_CALLBACK_ALLOWED_USERS", + Platform.WEIXIN: "WEIXIN_ALLOWED_USERS", + Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS", + Platform.QQBOT: "QQ_ALLOWED_USERS", + } + if os.getenv(platform_env_map.get(platform, ""), "").strip(): + return "ignore" + + if os.getenv("GATEWAY_ALLOWED_USERS", "").strip(): + return "ignore" + return "pair" async def _handle_message(self, event: MessageEvent) -> Optional[str]: @@ -3012,6 +3234,10 @@ class GatewayRunner: _quick_key[:30], _stale_age, _stale_idle, _raw_stale_timeout, _stale_detail, ) + self._invalidate_session_run_generation( + _quick_key, + reason="stale_running_agent_eviction", + ) self._release_running_agent_state(_quick_key) if _quick_key in self._running_agents: @@ -3035,15 +3261,12 @@ class GatewayRunner: # _interrupt_requested. Force-clean _running_agents so the session # is unlocked and subsequent messages are processed normally. if _cmd_def_inner and _cmd_def_inner.name == "stop": - running_agent = self._running_agents.get(_quick_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - running_agent.interrupt("Stop requested") - # Force-clean: remove the session lock regardless of agent state - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, 'get_pending_message'): - adapter.get_pending_message(_quick_key) # consume and discard - self._pending_messages.pop(_quick_key, None) - self._release_running_agent_state(_quick_key) + await self._interrupt_and_clear_session( + _quick_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command", + ) logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20]) return "⚡ Stopped. You can continue this session." @@ -3055,17 +3278,15 @@ class GatewayRunner: # doesn't get re-processed as a user message after the # interrupt completes. if _cmd_def_inner and _cmd_def_inner.name == "new": - running_agent = self._running_agents.get(_quick_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - running_agent.interrupt("Session reset requested") # Clear any pending messages so the old text doesn't replay - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, 'get_pending_message'): - adapter.get_pending_message(_quick_key) # consume and discard - self._pending_messages.pop(_quick_key, None) + await self._interrupt_and_clear_session( + _quick_key, + source, + interrupt_reason=_INTERRUPT_REASON_RESET, + invalidation_reason="new_command", + ) # Clean up the running agent entry so the reset handler # doesn't think an agent is still active. - self._release_running_agent_state(_quick_key) return await self._handle_reset_command(event) # /queue — queue without interrupting @@ -3075,10 +3296,9 @@ class GatewayRunner: return "Usage: /queue " adapter = self.adapters.get(source.platform) if adapter: - from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT - queued_event = _ME( + queued_event = MessageEvent( text=queued_text, - message_type=_MT.TEXT, + message_type=MessageType.TEXT, source=event.source, message_id=event.message_id, channel_prompt=event.channel_prompt, @@ -3100,10 +3320,9 @@ class GatewayRunner: # Agent hasn't started yet — queue as turn-boundary fallback. adapter = self.adapters.get(source.platform) if adapter: - from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT - queued_event = _ME( + queued_event = MessageEvent( text=steer_text, - message_type=_MT.TEXT, + message_type=MessageType.TEXT, source=event.source, message_id=event.message_id, channel_prompt=event.channel_prompt, @@ -3123,10 +3342,9 @@ class GatewayRunner: # Running agent is missing or lacks steer() — fall back to queue. adapter = self.adapters.get(source.platform) if adapter: - from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT - queued_event = _ME( + queued_event = MessageEvent( text=steer_text, - message_type=_MT.TEXT, + message_type=MessageType.TEXT, source=event.source, message_id=event.message_id, channel_prompt=event.channel_prompt, @@ -3156,6 +3374,20 @@ class GatewayRunner: if _cmd_def_inner and _cmd_def_inner.name == "background": return await self._handle_background_command(event) + # Session-level toggles that are safe to run mid-agent — + # /yolo can unblock a pending approval prompt, /verbose cycles + # the tool-progress display mode for the ongoing stream. + # Both modify session state without needing agent interaction + # and must not be queued (the safety net would discard them). + # /fast and /reasoning are config-only and take effect next + # message, so they fall through to the catch-all busy response + # below — users should wait and set them between turns. + if _cmd_def_inner and _cmd_def_inner.name in ("yolo", "verbose"): + if _cmd_def_inner.name == "yolo": + return await self._handle_yolo_command(event) + if _cmd_def_inner.name == "verbose": + return await self._handle_verbose_command(event) + # Gateway-handled info/control commands with dedicated # running-agent handlers. if _cmd_def_inner and _cmd_def_inner.name in _DEDICATED_HANDLERS: @@ -3461,9 +3693,8 @@ class GatewayRunner: plugin_handler = get_plugin_command_handler(command.replace("_", "-")) if plugin_handler: user_args = event.get_command_args().strip() - import asyncio as _aio result = plugin_handler(user_args) - if _aio.iscoroutine(result): + if asyncio.iscoroutine(result): result = await result return str(result) if result else None except Exception as e: @@ -3546,9 +3777,10 @@ class GatewayRunner: # same session — corrupting the transcript. self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL self._running_agents_ts[_quick_key] = time.time() + _run_generation = self._begin_session_run_generation(_quick_key) try: - return await self._handle_message_with_agent(event, source, _quick_key) + return await self._handle_message_with_agent(event, source, _quick_key, _run_generation) finally: # If _run_agent replaced the sentinel with a real agent and # then cleaned it up, this is a no-op. If we exited early @@ -3579,12 +3811,12 @@ class GatewayRunner: history = history or [] message_text = event.text or "" - _is_shared_thread = ( - source.chat_type != "dm" - and source.thread_id - and not getattr(self.config, "thread_sessions_per_user", False) + _is_shared_multi_user = is_shared_multi_user_session( + source, + group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True), + thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False), ) - if _is_shared_thread and source.user_name: + if _is_shared_multi_user and source.user_name: message_text = f"[{source.user_name}] {message_text}" if event.media_urls: @@ -3644,9 +3876,7 @@ class GatewayRunner: for i, path in enumerate(event.media_urls): mtype = event.media_types[i] if i < len(event.media_types) else "" if mtype in ("", "application/octet-stream"): - import os as _os2 - - _ext = _os2.path.splitext(path)[1].lower() + _ext = os.path.splitext(path)[1].lower() if _ext in _TEXT_EXTENSIONS: mtype = "text/plain" else: @@ -3656,13 +3886,10 @@ class GatewayRunner: if not mtype.startswith(("application/", "text/")): continue - import os as _os - import re as _re - - basename = _os.path.basename(path) + basename = os.path.basename(path) parts = basename.split("_", 2) display_name = parts[2] if len(parts) >= 3 else basename - display_name = _re.sub(r'[^\w.\- ]', '_', display_name) + display_name = re.sub(r'[^\w.\- ]', '_', display_name) if mtype.startswith("text/"): context_note = ( @@ -3679,14 +3906,14 @@ class GatewayRunner: message_text = f"{context_note}\n\n{message_text}" if getattr(event, "reply_to_text", None) and event.reply_to_message_id: + # Always inject the reply-to pointer — even when the quoted text + # already appears in history. The prefix isn't deduplication, it's + # disambiguation: it tells the agent *which* prior message the user + # is referencing. History can contain the same or similar text + # multiple times, and without an explicit pointer the agent has to + # guess (or answer for both subjects). Token overhead is minimal. reply_snippet = event.reply_to_text[:500] - found_in_history = any( - reply_snippet[:200] in (msg.get("content") or "") - for msg in history - if msg.get("role") in ("assistant", "user", "tool") - ) - if not found_in_history: - message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}' + message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}' if "@" in message_text: try: @@ -3694,9 +3921,11 @@ class GatewayRunner: from agent.model_metadata import get_model_context_length _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~")) + _msg_runtime = _resolve_runtime_agent_kwargs() _msg_ctx_len = get_model_context_length( self._model, - base_url=self._base_url or "", + base_url=self._base_url or _msg_runtime.get("base_url") or "", + api_key=_msg_runtime.get("api_key") or "", ) _ctx_result = await preprocess_context_references_async( message_text, @@ -3719,7 +3948,7 @@ class GatewayRunner: return message_text - async def _handle_message_with_agent(self, event, source, _quick_key: str): + async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int): """Inner handler that runs under the _running_agents sentinel guard.""" _msg_start_time = time.time() _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform) @@ -4176,6 +4405,15 @@ class GatewayRunner: if message_text is None: return + # Bind this gateway run generation to the adapter's active-session + # event so deferred post-delivery callbacks can be released by the + # same run that registered them. + self._bind_adapter_run_generation( + self.adapters.get(source.platform), + session_key, + run_generation, + ) + try: # Emit agent:start hook hook_ctx = { @@ -4194,6 +4432,7 @@ class GatewayRunner: source=source, session_id=session_entry.session_id, session_key=session_key, + run_generation=run_generation, event_message_id=event.message_id, channel_prompt=event.channel_prompt, ) @@ -4206,6 +4445,22 @@ class GatewayRunner: except Exception: pass + if not self._is_session_run_current(_quick_key, run_generation): + logger.info( + "Discarding stale agent result for %s — generation %d is no longer current", + _quick_key[:20] if _quick_key else "?", + run_generation, + ) + _stale_adapter = self.adapters.get(source.platform) + if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None: + _stale_adapter.pop_post_delivery_callback( + _quick_key, + generation=run_generation, + ) + elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"): + _stale_adapter._post_delivery_callbacks.pop(_quick_key, None) + return None + response = agent_result.get("final_response") or "" # Convert the agent's internal "(empty)" sentinel into a @@ -4620,6 +4875,7 @@ class GatewayRunner: # Get existing session key session_key = self._session_key_for_source(source) + self._invalidate_session_run_generation(session_key, reason="session_reset") # Flush memories in the background (fire-and-forget) so the user # gets the "Session reset!" response immediately. @@ -4879,14 +5135,23 @@ class GatewayRunner: agent = self._running_agents.get(session_key) if agent is _AGENT_PENDING_SENTINEL: # Force-clean the sentinel so the session is unlocked. - self._release_running_agent_state(session_key) + await self._interrupt_and_clear_session( + session_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command_pending", + ) logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20]) return "⚡ Stopped. The agent hadn't started yet — you can continue this session." if agent: - agent.interrupt("Stop requested") # Force-clean the session lock so a truly hung agent doesn't # keep it locked forever. - self._release_running_agent_state(session_key) + await self._interrupt_and_clear_session( + session_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command_handler", + ) return "⚡ Stopped. You can continue this session." else: return "No active task to stop." @@ -4922,7 +5187,6 @@ class GatewayRunner: # Save the requester's routing info so the new gateway process can # notify them once it comes back online. try: - import json as _json notify_data = { "platform": event.source.platform.value if event.source.platform else None, "chat_id": event.source.chat_id, @@ -4930,7 +5194,7 @@ class GatewayRunner: if event.source.thread_id: notify_data["thread_id"] = event.source.thread_id (_hermes_home / ".restart_notify.json").write_text( - _json.dumps(notify_data) + json.dumps(notify_data) ) except Exception as e: logger.debug("Failed to write restart notify file: %s", e) @@ -4941,16 +5205,14 @@ class GatewayRunner: # marker persists so the new gateway can still detect a delayed # /restart redelivery from Telegram. Overwritten on every /restart. try: - import json as _json - import time as _time dedup_data = { "platform": event.source.platform.value if event.source.platform else None, - "requested_at": _time.time(), + "requested_at": time.time(), } if event.platform_update_id is not None: dedup_data["update_id"] = event.platform_update_id (_hermes_home / ".restart_last_processed.json").write_text( - _json.dumps(dedup_data) + json.dumps(dedup_data) ) except Exception as e: logger.debug("Failed to write restart dedup marker: %s", e) @@ -4998,12 +5260,10 @@ class GatewayRunner: return False try: - import json as _json - import time as _time marker_path = _hermes_home / ".restart_last_processed.json" if not marker_path.exists(): return False - data = _json.loads(marker_path.read_text()) + data = json.loads(marker_path.read_text()) except Exception: return False @@ -5017,7 +5277,7 @@ class GatewayRunner: # swallow a fresh /restart from the user. requested_at = data.get("requested_at") if isinstance(requested_at, (int, float)): - if _time.time() - requested_at > 300: + if time.time() - requested_at > 300: return False return event.platform_update_id <= recorded_uid @@ -5408,7 +5668,7 @@ class GatewayRunner: # Cache notice cache_enabled = ( - ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower()) + (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower()) or result.api_mode == "anthropic_messages" ) if cache_enabled: @@ -5664,11 +5924,13 @@ class GatewayRunner: """Handle /voice [on|off|tts|channel|leave|status] command.""" args = event.get_command_args().strip().lower() chat_id = event.source.chat_id + platform = event.source.platform + voice_key = self._voice_key(platform, chat_id) - adapter = self.adapters.get(event.source.platform) + adapter = self.adapters.get(platform) if args in ("on", "enable"): - self._voice_mode[chat_id] = "voice_only" + self._voice_mode[voice_key] = "voice_only" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) @@ -5678,13 +5940,13 @@ class GatewayRunner: "Use /voice tts to get voice replies for all messages." ) elif args in ("off", "disable"): - self._voice_mode[chat_id] = "off" + self._voice_mode[voice_key] = "off" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) return "Voice mode disabled. Text-only replies." elif args == "tts": - self._voice_mode[chat_id] = "all" + self._voice_mode[voice_key] = "all" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) @@ -5697,7 +5959,7 @@ class GatewayRunner: elif args == "leave": return await self._handle_voice_channel_leave(event) elif args == "status": - mode = self._voice_mode.get(chat_id, "off") + mode = self._voice_mode.get(voice_key, "off") labels = { "off": "Off (text only)", "voice_only": "On (voice reply to voice messages)", @@ -5721,15 +5983,15 @@ class GatewayRunner: return f"Voice mode: {labels.get(mode, mode)}" else: # Toggle: off → on, on/all → off - current = self._voice_mode.get(chat_id, "off") + current = self._voice_mode.get(voice_key, "off") if current == "off": - self._voice_mode[chat_id] = "voice_only" + self._voice_mode[voice_key] = "voice_only" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) return "Voice mode enabled." else: - self._voice_mode[chat_id] = "off" + self._voice_mode[voice_key] = "off" self._save_voice_modes() if adapter: self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) @@ -5775,7 +6037,7 @@ class GatewayRunner: adapter._voice_text_channels[guild_id] = int(event.source.chat_id) if hasattr(adapter, "_voice_sources"): adapter._voice_sources[guild_id] = event.source.to_dict() - self._voice_mode[event.source.chat_id] = "all" + self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all" self._save_voice_modes() self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False) return ( @@ -5802,7 +6064,7 @@ class GatewayRunner: except Exception as e: logger.warning("Error leaving voice channel: %s", e) # Always clean up state even if leave raised an exception - self._voice_mode[event.source.chat_id] = "off" + self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off" self._save_voice_modes() self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True) if hasattr(adapter, "_voice_input_callback"): @@ -5814,7 +6076,7 @@ class GatewayRunner: Cleans up runner-side voice_mode state that the adapter cannot reach. """ - self._voice_mode[chat_id] = "off" + self._voice_mode[self._voice_key(Platform.DISCORD, chat_id)] = "off" self._save_voice_modes() adapter = self.adapters.get(Platform.DISCORD) self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) @@ -5900,7 +6162,7 @@ class GatewayRunner: return False chat_id = event.source.chat_id - voice_mode = self._voice_mode.get(chat_id, "off") + voice_mode = self._voice_mode.get(self._voice_key(event.source.platform, chat_id), "off") is_voice_input = (event.message_type == MessageType.VOICE) should = ( @@ -6213,6 +6475,11 @@ class GatewayRunner: session_id=task_id, platform=platform_key, user_id=source.user_id, + user_name=source.user_name, + chat_id=source.chat_id, + chat_name=source.chat_name, + chat_type=source.chat_type, + thread_id=source.thread_id, session_db=self._session_db, fallback_model=self._fallback_model, ) @@ -6973,6 +7240,7 @@ class GatewayRunner: tool_calls=msg.get("tool_calls"), tool_call_id=msg.get("tool_call_id"), reasoning=msg.get("reasoning"), + reasoning_content=msg.get("reasoning_content"), ) except Exception: pass # Best-effort copy @@ -7021,6 +7289,38 @@ class GatewayRunner: if cached: agent = cached[0] + # Resolve provider/base_url/api_key for the account-usage fetch. + # Prefer the live agent; fall back to persisted billing data on the + # SessionDB row so `/usage` still returns account info between turns + # when no agent is resident. + provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + if not provider and getattr(self, "_session_db", None) is not None: + try: + _entry_for_billing = self.session_store.get_or_create_session(source) + persisted = self._session_db.get_session(_entry_for_billing.session_id) or {} + except Exception: + persisted = {} + provider = provider or persisted.get("billing_provider") + base_url = base_url or persisted.get("billing_base_url") + + # Fetch account usage off the event loop so slow provider APIs don't + # block the gateway. Failures are non-fatal -- account_lines stays []. + account_lines: list[str] = [] + if provider: + try: + account_snapshot = await asyncio.to_thread( + fetch_account_usage, + provider, + base_url=base_url, + api_key=api_key, + ) + except Exception: + account_snapshot = None + if account_snapshot: + account_lines = render_account_usage_lines(account_snapshot, markdown=True) + if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0: lines = [] @@ -7078,6 +7378,10 @@ class GatewayRunner: if ctx.compression_count: lines.append(f"Compressions: {ctx.compression_count}") + if account_lines: + lines.append("") + lines.extend(account_lines) + return "\n".join(lines) # No agent at all -- check session history for a rough count @@ -7087,23 +7391,26 @@ class GatewayRunner: from agent.model_metadata import estimate_messages_tokens_rough msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")] approx = estimate_messages_tokens_rough(msgs) - return ( - f"📊 **Session Info**\n" - f"Messages: {len(msgs)}\n" - f"Estimated context: ~{approx:,} tokens\n" - f"_(Detailed usage available after the first agent response)_" - ) + lines = [ + "📊 **Session Info**", + f"Messages: {len(msgs)}", + f"Estimated context: ~{approx:,} tokens", + "_(Detailed usage available after the first agent response)_", + ] + if account_lines: + lines.append("") + lines.extend(account_lines) + return "\n".join(lines) + if account_lines: + return "\n".join(account_lines) return "No usage data available for this session." async def _handle_insights_command(self, event: MessageEvent) -> str: """Handle /insights command -- show usage insights and analytics.""" - import asyncio as _asyncio - args = event.get_command_args().strip() # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash) - import re as _re - args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args) + args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args) days = 30 source = None @@ -7132,7 +7439,7 @@ class GatewayRunner: from hermes_state import SessionDB from agent.insights import InsightsEngine - loop = _asyncio.get_running_loop() + loop = asyncio.get_running_loop() def _run_insights(): db = SessionDB() @@ -7490,9 +7797,6 @@ class GatewayRunner: the messenger. The user's next message is intercepted by ``_handle_message`` and written to ``.update_response``. """ - import json - import re as _re - pending_path = _hermes_home / ".update_pending.json" claimed_path = _hermes_home / ".update_pending.claimed.json" output_path = _hermes_home / ".update_output.txt" @@ -7537,7 +7841,7 @@ class GatewayRunner: return def _strip_ansi(text: str) -> str: - return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text) + return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text) bytes_sent = 0 last_stream_time = loop.time() @@ -7685,9 +7989,6 @@ class GatewayRunner: cannot resolve the adapter (e.g. after a gateway restart where the platform hasn't reconnected yet). """ - import json - import re as _re - pending_path = _hermes_home / ".update_pending.json" claimed_path = _hermes_home / ".update_pending.claimed.json" output_path = _hermes_home / ".update_output.txt" @@ -7733,7 +8034,7 @@ class GatewayRunner: if adapter and chat_id: # Strip ANSI escape codes for clean display - output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip() + output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip() if output: if len(output) > 3500: output = "…" + output[-3500:] @@ -7766,14 +8067,12 @@ class GatewayRunner: async def _send_restart_notification(self) -> None: """Notify the chat that initiated /restart that the gateway is back.""" - import json as _json - notify_path = _hermes_home / ".restart_notify.json" if not notify_path.exists(): return try: - data = _json.loads(notify_path.read_text()) + data = json.loads(notify_path.read_text()) platform_str = data.get("platform") chat_id = data.get("chat_id") thread_id = data.get("thread_id") @@ -7859,7 +8158,6 @@ class GatewayRunner: The enriched message string with vision descriptions prepended. """ from tools.vision_tools import vision_analyze_tool - import json as _json analysis_prompt = ( "Describe everything visible in this image in thorough detail. " @@ -7875,7 +8173,7 @@ class GatewayRunner: image_url=path, user_prompt=analysis_prompt, ) - result = _json.loads(result_json) + result = json.loads(result_json) if result.get("success"): description = result.get("analysis", "") enriched_parts.append( @@ -7934,7 +8232,6 @@ class GatewayRunner: return disabled_note from tools.transcription_tools import transcribe_audio - import asyncio enriched_parts = [] for path in audio_paths: @@ -8070,7 +8367,6 @@ class GatewayRunner: if not adapter: return try: - from gateway.platforms.base import MessageEvent, MessageType synth_event = MessageEvent( text=synth_text, message_type=MessageType.TEXT, @@ -8175,7 +8471,6 @@ class GatewayRunner: break if adapter and source.chat_id: try: - from gateway.platforms.base import MessageEvent, MessageType synth_event = MessageEvent( text=synth_text, message_type=MessageType.TEXT, @@ -8333,6 +8628,84 @@ class GatewayRunner: if hasattr(self, "_busy_ack_ts"): self._busy_ack_ts.pop(session_key, None) + def _begin_session_run_generation(self, session_key: str) -> int: + """Claim a fresh run generation token for ``session_key``. + + Every top-level gateway turn gets a monotonically increasing token. + If a later command like /stop or /new invalidates that token while the + old worker is still unwinding, the late result can be recognized and + dropped instead of bleeding into the fresh session. + """ + if not session_key: + return 0 + generations = self.__dict__.get("_session_run_generation") + if generations is None: + generations = {} + self._session_run_generation = generations + next_generation = int(generations.get(session_key, 0)) + 1 + generations[session_key] = next_generation + return next_generation + + def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int: + """Invalidate any in-flight run token for ``session_key``.""" + generation = self._begin_session_run_generation(session_key) + if reason: + logger.info( + "Invalidated run generation for %s → %d (%s)", + session_key[:20], + generation, + reason, + ) + return generation + + def _is_session_run_current(self, session_key: str, generation: int) -> bool: + """Return True when ``generation`` is still current for ``session_key``.""" + if not session_key: + return True + generations = self.__dict__.get("_session_run_generation") or {} + return int(generations.get(session_key, 0)) == int(generation) + + def _bind_adapter_run_generation( + self, + adapter: Any, + session_key: str, + generation: int | None, + ) -> None: + """Bind a gateway run generation to the adapter's active-session event.""" + if not adapter or not session_key or generation is None: + return + try: + interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key) + if interrupt_event is not None: + setattr(interrupt_event, "_hermes_run_generation", int(generation)) + except Exception: + pass + + async def _interrupt_and_clear_session( + self, + session_key: str, + source: SessionSource, + *, + interrupt_reason: str, + invalidation_reason: str, + release_running_state: bool = True, + ) -> None: + """Interrupt the current run and clear queued session state consistently.""" + if not session_key: + return + running_agent = self._running_agents.get(session_key) + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + running_agent.interrupt(interrupt_reason) + self._invalidate_session_run_generation(session_key, reason=invalidation_reason) + adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(session_key, source.chat_id) + if adapter and hasattr(adapter, "get_pending_message"): + adapter.get_pending_message(session_key) # consume and discard + self._pending_messages.pop(session_key, None) + if release_running_state: + self._release_running_agent_state(session_key) + def _evict_cached_agent(self, session_key: str) -> None: """Remove a cached agent for a session (called on /new, /model, etc).""" _lock = getattr(self, "_agent_cache_lock", None) @@ -8514,6 +8887,7 @@ class GatewayRunner: source: "SessionSource", session_id: str, session_key: str = None, + run_generation: Optional[int] = None, event_message_id: Optional[str] = None, ) -> Dict[str, Any]: """Forward the message to a remote Hermes API server instead of @@ -8549,6 +8923,11 @@ class GatewayRunner: proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip() + def _run_still_current() -> bool: + if run_generation is None or not session_key: + return True + return self._is_session_run_current(session_key, run_generation) + # Build messages in OpenAI chat format -------------------------- # # The remote api_server can maintain session continuity via @@ -8613,7 +8992,6 @@ class GatewayRunner: if _streaming_enabled: try: from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig - from gateway.config import Platform _adapter = self.adapters.get(source.platform) if _adapter: _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True) @@ -8678,6 +9056,21 @@ class GatewayRunner: # Parse SSE stream buffer = "" async for chunk in resp.content.iter_any(): + if not _run_still_current(): + logger.info( + "Discarding stale proxy stream for %s — generation %d is no longer current", + session_key[:20] if session_key else "?", + run_generation or 0, + ) + return { + "final_response": "", + "messages": [], + "api_calls": 0, + "tools": [], + "history_offset": len(history), + "session_id": session_id, + "response_previewed": False, + } text = chunk.decode("utf-8", errors="replace") buffer += text @@ -8727,6 +9120,21 @@ class GatewayRunner: stream_task.cancel() _elapsed = time.time() - _start + if not _run_still_current(): + logger.info( + "Discarding stale proxy result for %s — generation %d is no longer current", + session_key[:20] if session_key else "?", + run_generation or 0, + ) + return { + "final_response": "", + "messages": [], + "api_calls": 0, + "tools": [], + "history_offset": len(history), + "session_id": session_id, + "response_previewed": False, + } logger.info( "proxy response: url=%s session=%s time=%.1fs response=%d chars", proxy_url, (session_id or "")[:20], _elapsed, len(full_response), @@ -8755,6 +9163,7 @@ class GatewayRunner: source: SessionSource, session_id: str, session_key: str = None, + run_generation: Optional[int] = None, _interrupt_depth: int = 0, event_message_id: Optional[str] = None, channel_prompt: Optional[str] = None, @@ -8780,11 +9189,17 @@ class GatewayRunner: source=source, session_id=session_id, session_key=session_key, + run_generation=run_generation, event_message_id=event_message_id, ) from run_agent import AIAgent import queue + + def _run_still_current() -> bool: + if run_generation is None or not session_key: + return True + return self._is_session_run_current(session_key, run_generation) user_config = _load_gateway_config() platform_key = _platform_config_key(source.platform) @@ -8839,7 +9254,7 @@ class GatewayRunner: def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs): """Callback invoked by agent on tool lifecycle events.""" - if not progress_queue: + if not progress_queue or not _run_still_current(): return # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.) @@ -8860,8 +9275,7 @@ class GatewayRunner: if args: from agent.display import get_tool_preview_max_len _pl = get_tool_preview_max_len() - import json as _json - args_str = _json.dumps(args, ensure_ascii=False, default=str) + args_str = json.dumps(args, ensure_ascii=False, default=str) # When tool_preview_length is 0 (default), don't truncate # in verbose mode — the user explicitly asked for full # detail. Platform message-length limits handle the rest. @@ -8927,8 +9341,7 @@ class GatewayRunner: # Skip tool progress for platforms that don't support message # editing (e.g. iMessage/BlueBubbles) — each progress update # would become a separate message bubble, which is noisy. - from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter - if type(adapter).edit_message is _BaseAdapter.edit_message: + if type(adapter).edit_message is BasePlatformAdapter.edit_message: while not progress_queue.empty(): try: progress_queue.get_nowait() @@ -8944,6 +9357,14 @@ class GatewayRunner: while True: try: + if not _run_still_current(): + while not progress_queue.empty(): + try: + progress_queue.get_nowait() + except Exception: + break + return + raw = progress_queue.get_nowait() # Handle dedup messages: update last line with repeat counter @@ -8969,6 +9390,9 @@ class GatewayRunner: await asyncio.sleep(_remaining) continue + if not _run_still_current(): + return + if can_edit and progress_msg_id is not None: # Try to edit the existing progress message full_text = "\n".join(progress_lines) @@ -9004,7 +9428,8 @@ class GatewayRunner: # Restore typing indicator await asyncio.sleep(0.3) - await adapter.send_typing(source.chat_id, metadata=_progress_metadata) + if _run_still_current(): + await adapter.send_typing(source.chat_id, metadata=_progress_metadata) except queue.Empty: await asyncio.sleep(0.3) @@ -9048,6 +9473,8 @@ class GatewayRunner: _hooks_ref = self.hooks def _step_callback_sync(iteration: int, prev_tools: list) -> None: + if not _run_still_current(): + return try: # prev_tools may be list[str] or list[dict] with "name"/"result" # keys. Normalise to keep "tool_names" backward-compatible for @@ -9078,7 +9505,7 @@ class GatewayRunner: _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None def _status_callback_sync(event_type: str, message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return try: asyncio.run_coroutine_threadsafe( @@ -9209,12 +9636,16 @@ class GatewayRunner: metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None, ) if _want_stream_deltas: - _stream_delta_cb = _stream_consumer.on_delta + def _stream_delta_cb(text: str) -> None: + if _run_still_current(): + _stream_consumer.on_delta(text) stream_consumer_holder[0] = _stream_consumer except Exception as _sc_err: logger.debug("Could not set up stream consumer: %s", _sc_err) def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None: + if not _run_still_current(): + return if _stream_consumer is not None: if already_streamed: _stream_consumer.on_segment_break() @@ -9292,6 +9723,11 @@ class GatewayRunner: session_id=session_id, platform=platform_key, user_id=source.user_id, + user_name=source.user_name, + chat_id=source.chat_id, + chat_name=source.chat_name, + chat_type=source.chat_type, + thread_id=source.thread_id, gateway_session_key=session_key, session_db=self._session_db, fallback_model=self._fallback_model, @@ -9318,7 +9754,7 @@ class GatewayRunner: _bg_review_pending_lock = threading.Lock() def _deliver_bg_review_message(message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return try: asyncio.run_coroutine_threadsafe( @@ -9342,7 +9778,7 @@ class GatewayRunner: # Background review delivery — send "💾 Memory updated" etc. to user def _bg_review_send(message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return if not _bg_review_release.is_set(): with _bg_review_pending_lock: @@ -9355,9 +9791,16 @@ class GatewayRunner: # Register the release hook on the adapter so base.py's finally # block can fire it after delivering the main response. if _status_adapter and session_key: - _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None) - if _pdc is not None: - _pdc[session_key] = _release_bg_review_messages + if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None: + _status_adapter.register_post_delivery_callback( + session_key, + _release_bg_review_messages, + generation=run_generation, + ) + else: + _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None) + if _pdc is not None: + _pdc[session_key] = _release_bg_review_messages # Store agent reference for interrupt support agent_holder[0] = agent @@ -9959,7 +10402,7 @@ class GatewayRunner: # Interrupt the agent if it's still running so the thread # pool worker is freed. if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"): - _timed_out_agent.interrupt("Execution timed out (inactivity)") + _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT) _timeout_mins = int(_agent_timeout // 60) or 1 @@ -10024,11 +10467,29 @@ class GatewayRunner: if result and adapter and session_key: pending_event = _dequeue_pending_event(adapter, session_key) if result.get("interrupted") and not pending_event and result.get("interrupt_message"): - pending = result.get("interrupt_message") + interrupt_message = result.get("interrupt_message") + if _is_control_interrupt_message(interrupt_message): + logger.info( + "Ignoring control interrupt message for session %s: %s", + session_key[:20] if session_key else "?", + interrupt_message, + ) + else: + pending = interrupt_message elif pending_event: pending = pending_event.text or _build_media_placeholder(pending_event) logger.debug("Processing queued message after agent completion: '%s...'", pending[:40]) + # Leftover /steer: if a steer arrived after the last tool batch + # (e.g. during the final API call), the agent couldn't inject it + # and returned it in result["pending_steer"]. Deliver it as the + # next user turn so it isn't silently dropped. + if result and not pending and not pending_event: + _leftover_steer = result.get("pending_steer") + if _leftover_steer: + pending = _leftover_steer + logger.debug("Delivering leftover /steer as next turn: '%s...'", pending[:40]) + # Safety net: if the pending text is a slash command (e.g. "/stop", # "/new"), discard it — commands should never be passed to the agent # as user input. The primary fix is in base.py (commands bypass the @@ -10129,7 +10590,17 @@ class GatewayRunner: # first response has been delivered. Pop from the # adapter's callback dict (prevents double-fire in # base.py's finally block) and call it. - if adapter and hasattr(adapter, "_post_delivery_callbacks"): + if getattr(type(adapter), "pop_post_delivery_callback", None) is not None: + _bg_cb = adapter.pop_post_delivery_callback( + session_key, + generation=run_generation, + ) + if callable(_bg_cb): + try: + _bg_cb() + except Exception: + pass + elif adapter and hasattr(adapter, "_post_delivery_callbacks"): _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None) if callable(_bg_cb): try: @@ -10177,6 +10648,7 @@ class GatewayRunner: source=next_source, session_id=session_id, session_key=session_key, + run_generation=run_generation, _interrupt_depth=_interrupt_depth + 1, event_message_id=next_message_id, channel_prompt=next_channel_prompt, @@ -10322,7 +10794,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = # The PID file is scoped to HERMES_HOME, so future multi-profile # setups (each profile using a distinct HERMES_HOME) will naturally # allow concurrent instances without tripping this guard. - import time as _time from gateway.status import get_running_pid, remove_pid_file, terminate_pid existing_pid = get_running_pid() if existing_pid is not None and existing_pid != os.getpid(): @@ -10362,7 +10833,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = for _ in range(20): try: os.kill(existing_pid, 0) - _time.sleep(0.5) + time.sleep(0.5) except (ProcessLookupError, PermissionError): break # Process is gone else: @@ -10373,10 +10844,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = ) try: terminate_pid(existing_pid, force=True) - _time.sleep(0.5) + time.sleep(0.5) except (ProcessLookupError, PermissionError, OSError): pass remove_pid_file() + # remove_pid_file() is a no-op when the PID doesn't match. + # Force-unlink to cover the old-process-crashed case. + try: + (get_hermes_home() / "gateway.pid").unlink(missing_ok=True) + except Exception: + pass # Clean up any takeover marker the old process didn't consume # (e.g. SIGKILL'd before its shutdown handler could read it). try: @@ -10515,6 +10992,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = else: logger.info("Skipping signal handlers (not running in main thread).") + # Claim the PID file BEFORE bringing up any platform adapters. + # This closes the --replace race window: two concurrent `gateway run + # --replace` invocations both pass the termination-wait above, but + # only the winner of the O_CREAT|O_EXCL race below will ever open + # Telegram polling, Discord gateway sockets, etc. The loser exits + # cleanly before touching any external service. + import atexit + from gateway.status import write_pid_file, remove_pid_file, get_running_pid + _current_pid = get_running_pid() + if _current_pid is not None and _current_pid != os.getpid(): + logger.error( + "Another gateway instance (PID %d) started during our startup. " + "Exiting to avoid double-running.", _current_pid + ) + return False + try: + write_pid_file() + except FileExistsError: + logger.error( + "PID file race lost to another gateway instance. Exiting." + ) + return False + atexit.register(remove_pid_file) + # Start the gateway success = await runner.start() if not success: @@ -10524,12 +11025,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = logger.error("Gateway exiting cleanly: %s", runner.exit_reason) return True - # Write PID file so CLI can detect gateway is running - import atexit - from gateway.status import write_pid_file, remove_pid_file - write_pid_file() - atexit.register(remove_pid_file) - # Start background cron ticker so scheduled jobs fire automatically. # Pass the event loop so cron delivery can use live adapters (E2EE support). cron_stop = threading.Event() diff --git a/gateway/session.py b/gateway/session.py index 8b31c2b0aa..ea3f174909 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -152,6 +152,7 @@ class SessionContext: source: SessionSource connected_platforms: List[Platform] home_channels: Dict[Platform, HomeChannel] + shared_multi_user_session: bool = False # Session metadata session_key: str = "" @@ -166,6 +167,7 @@ class SessionContext: "home_channels": { p.value: hc.to_dict() for p, hc in self.home_channels.items() }, + "shared_multi_user_session": self.shared_multi_user_session, "session_key": self.session_key, "session_id": self.session_id, "created_at": self.created_at.isoformat() if self.created_at else None, @@ -240,18 +242,16 @@ def build_session_context_prompt( lines.append(f"**Channel Topic:** {context.source.chat_topic}") # User identity. - # In shared thread sessions (non-DM with thread_id), multiple users - # contribute to the same conversation. Don't pin a single user name - # in the system prompt — it changes per-turn and would bust the prompt - # cache. Instead, note that this is a multi-user thread; individual - # sender names are prefixed on each user message by the gateway. - _is_shared_thread = ( - context.source.chat_type != "dm" - and context.source.thread_id - ) - if _is_shared_thread: + # In shared multi-user sessions (shared threads OR shared non-thread groups + # when group_sessions_per_user=False), multiple users contribute to the same + # conversation. Don't pin a single user name in the system prompt — it + # changes per-turn and would bust the prompt cache. Instead, note that + # this is a multi-user session; individual sender names are prefixed on + # each user message by the gateway. + if context.shared_multi_user_session: + session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session" lines.append( - "**Session type:** Multi-user thread — messages are prefixed " + f"**Session type:** {session_label} — messages are prefixed " "with [sender name]. Multiple users may participate." ) elif context.source.user_name: @@ -467,6 +467,27 @@ class SessionEntry: ) +def is_shared_multi_user_session( + source: SessionSource, + *, + group_sessions_per_user: bool = True, + thread_sessions_per_user: bool = False, +) -> bool: + """Return True when a non-DM session is shared across participants. + + Mirrors the isolation rules in :func:`build_session_key`: + - DMs are never shared. + - Threads are shared unless ``thread_sessions_per_user`` is True. + - Non-thread group/channel sessions are shared unless + ``group_sessions_per_user`` is True (default: True = isolated). + """ + if source.chat_type == "dm": + return False + if source.thread_id: + return not thread_sessions_per_user + return not group_sessions_per_user + + def build_session_key( source: SessionSource, group_sessions_per_user: bool = True, @@ -926,12 +947,18 @@ class SessionStore: continue # Never prune sessions with an active background process # attached — the user may still be waiting on output. + # The callback is keyed by session_key (see process_registry. + # has_active_for_session); passing session_id here used to + # never match, so active sessions got pruned anyway. if self._has_active_processes_fn is not None: try: - if self._has_active_processes_fn(entry.session_id): + if self._has_active_processes_fn(entry.session_key): continue - except Exception: - pass + except Exception as exc: + logger.debug( + "has_active_processes_fn raised during prune for %s: %s", + entry.session_key, exc, + ) if entry.updated_at < cutoff: removed_keys.append(key) for key in removed_keys: @@ -1120,6 +1147,10 @@ class SessionStore: tool_name=message.get("tool_name"), tool_calls=message.get("tool_calls"), tool_call_id=message.get("tool_call_id"), + reasoning=message.get("reasoning") if message.get("role") == "assistant" else None, + reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None, + reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None, + codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None, ) except Exception as e: logger.debug("Session DB operation failed: %s", e) @@ -1149,6 +1180,7 @@ class SessionStore: tool_calls=msg.get("tool_calls"), tool_call_id=msg.get("tool_call_id"), reasoning=msg.get("reasoning") if role == "assistant" else None, + reasoning_content=msg.get("reasoning_content") if role == "assistant" else None, reasoning_details=msg.get("reasoning_details") if role == "assistant" else None, codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None, ) @@ -1232,6 +1264,11 @@ def build_session_context( source=source, connected_platforms=connected, home_channels=home_channels, + shared_multi_user_session=is_shared_multi_user_session( + source, + group_sessions_per_user=getattr(config, "group_sessions_per_user", True), + thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False), + ), ) if session_entry: diff --git a/gateway/session_context.py b/gateway/session_context.py index 7f8aca3eb9..9dc051e3a2 100644 --- a/gateway/session_context.py +++ b/gateway/session_context.py @@ -56,6 +56,12 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET) _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET) +# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs +# don't clobber each other's delivery targets. +_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET) +_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET) +_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET) + _VAR_MAP = { "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM, "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID, @@ -64,6 +70,9 @@ _VAR_MAP = { "HERMES_SESSION_USER_ID": _SESSION_USER_ID, "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME, "HERMES_SESSION_KEY": _SESSION_KEY, + "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM, + "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID, + "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID, } diff --git a/gateway/status.py b/gateway/status.py index e1598e1797..74763332c8 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None: def write_pid_file() -> None: - """Write the current process PID and metadata to the gateway PID file.""" - _write_json_file(_get_pid_path(), _build_pid_record()) + """Write the current process PID and metadata to the gateway PID file. + + Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace + invocations race: exactly one process wins and the rest get + FileExistsError. + """ + path = _get_pid_path() + path.parent.mkdir(parents=True, exist_ok=True) + record = json.dumps(_build_pid_record()) + try: + fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY) + except FileExistsError: + raise # Let caller decide: another gateway is racing us + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(record) + except Exception: + try: + path.unlink(missing_ok=True) + except OSError: + pass + raise def write_runtime_status( diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index ae00aee392..78e365712d 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -430,6 +430,21 @@ class GatewayStreamConsumer: # a real string like "msg_1", not "__no_edit__", so that case # still resets and creates a fresh segment as intended.) if got_segment_break: + # If the segment-break edit failed to deliver the + # accumulated content (flood control that has not yet + # promoted to fallback mode, or fallback mode itself), + # _accumulated still holds pre-boundary text the user + # never saw. Flush that tail as a continuation message + # before the reset below wipes _accumulated — otherwise + # text generated before the tool boundary is silently + # dropped (issue #8124). + if ( + self._accumulated + and not current_update_visible + and self._message_id + and self._message_id != "__no_edit__" + ): + await self._flush_segment_tail_on_edit_failure() self._reset_segment_state(preserve_no_edit=True) await asyncio.sleep(0.05) # Small yield to not busy-loop @@ -556,6 +571,30 @@ class GatewayStreamConsumer: if final_text.strip() and final_text != self._visible_prefix(): continuation = final_text else: + # Defence-in-depth for #7183: the last edit may still show the + # cursor character because fallback mode was entered after an + # edit failure left it stuck. Try one final edit to strip it + # so the message doesn't freeze with a visible ▉. Best-effort + # — if this edit also fails (flood control still active), + # _try_strip_cursor has already been called on fallback entry + # and the adaptive-backoff retries will have had their shot. + if ( + self._message_id + and self._last_sent_text + and self.cfg.cursor + and self._last_sent_text.endswith(self.cfg.cursor) + ): + clean_text = self._last_sent_text[:-len(self.cfg.cursor)] + try: + result = await self.adapter.edit_message( + chat_id=self.chat_id, + message_id=self._message_id, + content=clean_text, + ) + if result.success: + self._last_sent_text = clean_text + except Exception: + pass self._already_sent = True self._final_response_sent = True return @@ -620,6 +659,39 @@ class GatewayStreamConsumer: err_lower = err.lower() return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower + async def _flush_segment_tail_on_edit_failure(self) -> None: + """Deliver un-sent tail content before a segment-break reset. + + When an edit fails (flood control, transport error) and a tool + boundary arrives before the next retry, ``_accumulated`` holds text + that was generated but never shown to the user. Without this flush, + the segment reset would discard that tail and leave a frozen cursor + in the partial message. + + Sends the tail that sits after the last successfully-delivered + prefix as a new message, and best-effort strips the stuck cursor + from the previous partial message. + """ + if not self._fallback_final_send: + await self._try_strip_cursor() + visible = self._fallback_prefix or self._visible_prefix() + tail = self._accumulated + if visible and tail.startswith(visible): + tail = tail[len(visible):].lstrip() + tail = self._clean_for_display(tail) + if not tail.strip(): + return + try: + result = await self.adapter.send( + chat_id=self.chat_id, + content=tail, + metadata=self.metadata, + ) + if result.success: + self._already_sent = True + except Exception as e: + logger.error("Segment-break tail flush error: %s", e) + async def _try_strip_cursor(self) -> None: """Best-effort edit to remove the cursor from the last visible message. diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 4623147a5a..3fab36a2c3 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -20,6 +20,7 @@ import logging import os import shutil import shlex +import ssl import stat import base64 import hashlib @@ -71,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1" DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com" DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot" DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1" +STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1" +STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1" CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token" CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 @@ -151,7 +154,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { id="gemini", name="Google AI Studio", auth_type="api_key", - inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai", + inference_base_url="https://generativelanguage.googleapis.com/v1beta", api_key_env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"), base_url_env_var="GEMINI_BASE_URL", ), @@ -167,8 +170,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { id="kimi-coding", name="Kimi / Moonshot", auth_type="api_key", + # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat). + # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding + # by _resolve_kimi_base_url() below. inference_base_url="https://api.moonshot.ai/v1", - api_key_env_vars=("KIMI_API_KEY",), + api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"), base_url_env_var="KIMI_BASE_URL", ), "kimi-coding-cn": ProviderConfig( @@ -178,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { inference_base_url="https://api.moonshot.cn/v1", api_key_env_vars=("KIMI_CN_API_KEY",), ), + "stepfun": ProviderConfig( + id="stepfun", + name="StepFun Step Plan", + auth_type="api_key", + inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL, + api_key_env_vars=("STEPFUN_API_KEY",), + base_url_env_var="STEPFUN_BASE_URL", + ), "arcee": ProviderConfig( id="arcee", name="Arcee AI", @@ -339,10 +353,16 @@ def get_anthropic_key() -> str: # ============================================================================= # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work -# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on -# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set +# on api.kimi.com/coding. Legacy keys from platform.moonshot.ai work on +# api.moonshot.ai/v1 (the old default). Auto-detect when user hasn't set # KIMI_BASE_URL explicitly. -KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1" +# +# Note: the base URL intentionally has NO /v1 suffix. The /coding endpoint +# speaks the Anthropic Messages protocol, and the anthropic SDK appends +# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages" +# (the correct target). Using "/coding/v1" here would produce +# "/coding/v1/v1/messages" (a 404). +KIMI_CODE_BASE_URL = "https://api.kimi.com/coding" def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str: @@ -353,6 +373,9 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> """ if env_override: return env_override + # No key → nothing to infer from. Return default without inspecting. + if not api_key: + return default_url if api_key.startswith("sk-kimi-"): return KIMI_CODE_BASE_URL return default_url @@ -480,6 +503,14 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> if env_override: return env_override + # No API key set → don't probe (would fire N×M HTTPS requests with an + # empty Bearer token, all returning 401). This path is hit during + # auxiliary-client auto-detection when the user has no Z.AI credentials + # at all — the caller discards the result immediately, so the probe is + # pure latency for every AIAgent construction. + if not api_key: + return default_url + # Check provider-state cache for a previously-detected endpoint. auth_store = _load_auth_store() state = _load_provider_state(auth_store, "zai") or {} @@ -971,6 +1002,7 @@ def resolve_provider( "x-ai": "xai", "x.ai": "xai", "grok": "xai", "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding", "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", + "step": "stepfun", "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "claude": "anthropic", "claude-code": "anthropic", @@ -1652,7 +1684,7 @@ def _resolve_verify( insecure: Optional[bool] = None, ca_bundle: Optional[str] = None, auth_state: Optional[Dict[str, Any]] = None, -) -> bool | str: +) -> bool | ssl.SSLContext: tls_state = auth_state.get("tls") if isinstance(auth_state, dict) else {} tls_state = tls_state if isinstance(tls_state, dict) else {} @@ -1672,13 +1704,12 @@ def _resolve_verify( if effective_ca: ca_path = str(effective_ca) if not os.path.isfile(ca_path): - import logging - logging.getLogger("hermes.auth").warning( + logger.warning( "CA bundle path does not exist: %s — falling back to default certificates", ca_path, ) return True - return ca_path + return ssl.create_default_context(cafile=ca_path) return True @@ -2721,6 +2752,17 @@ def _update_config_for_provider( # Clear stale base_url to prevent contamination when switching providers model_cfg.pop("base_url", None) + # Clear stale api_key/api_mode left over from a previous custom provider. + # When the user switches from e.g. a MiniMax custom endpoint + # (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider + # (e.g. OpenRouter), the stale api_key/api_mode would override the new + # provider's credentials and transport choice. Built-in providers that + # need a specific api_mode (copilot, xai) set it at request-resolution + # time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so + # removing the persisted value here is safe. + model_cfg.pop("api_key", None) + model_cfg.pop("api_mode", None) + # When switching to a non-OpenRouter provider, ensure model.default is # valid for the new provider. An OpenRouter-formatted name like # "anthropic/claude-opus-4.6" will fail on direct-API providers. @@ -3353,7 +3395,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: ) from hermes_cli.models import ( - _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models, + _PROVIDER_MODELS, get_pricing_for_provider, check_nous_free_tier, partition_nous_models_by_tier, ) model_ids = _PROVIDER_MODELS.get("nous", []) @@ -3362,7 +3404,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: unavailable_models: list = [] if model_ids: pricing = get_pricing_for_provider("nous") - model_ids = filter_nous_free_models(model_ids, pricing) free_tier = check_nous_free_tier() if free_tier: model_ids, unavailable_models = partition_nous_models_by_tier( diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 30e5182949..9c33200107 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -152,6 +152,23 @@ def auth_add_command(args) -> None: pool = load_pool(provider) + # Clear ALL suppressions for this provider — re-adding a credential is + # a strong signal the user wants auth re-enabled. This covers env:* + # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli, + # device_code (codex), etc. One consistent re-engagement pattern. + # Matches the Codex device_code re-link pattern that predates this. + if not provider.startswith(CUSTOM_POOL_PREFIX): + try: + from hermes_cli.auth import ( + _load_auth_store, + unsuppress_credential_source, + ) + suppressed = _load_auth_store().get("suppressed_sources", {}) + for src in list(suppressed.get(provider, []) or []): + unsuppress_credential_source(provider, src) + except Exception: + pass + if requested_type == AUTH_TYPE_API_KEY: token = (getattr(args, "api_key", None) or "").strip() if not token: @@ -338,71 +355,28 @@ def auth_remove_command(args) -> None: raise SystemExit(f'No credential matching "{target}" for provider {provider}.') print(f"Removed {provider} credential #{index} ({removed.label})") - # If this was an env-seeded credential, also clear the env var from .env - # so it doesn't get re-seeded on the next load_pool() call. - if removed.source.startswith("env:"): - env_var = removed.source[len("env:"):] - if env_var: - from hermes_cli.config import remove_env_value - cleared = remove_env_value(env_var) - if cleared: - print(f"Cleared {env_var} from .env") + # Unified removal dispatch. Every credential source Hermes reads from + # (env vars, external OAuth files, auth.json blocks, custom config) + # has a RemovalStep registered in agent.credential_sources. The step + # handles its source-specific cleanup and we centralise suppression + + # user-facing output here so every source behaves identically from + # the user's perspective. + from agent.credential_sources import find_removal_step + from hermes_cli.auth import suppress_credential_source - # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce), - # clear the underlying auth store / credential file so it doesn't get - # re-seeded on the next load_pool() call. - elif provider == "openai-codex" and ( - removed.source == "device_code" or removed.source.endswith(":device_code") - ): - # Codex tokens live in TWO places: the Hermes auth store and - # ~/.codex/auth.json (the Codex CLI shared file). On every refresh, - # refresh_codex_oauth_pure() writes to both. So clearing only the - # Hermes auth store is not enough — _seed_from_singletons() will - # auto-import from ~/.codex/auth.json on the next load_pool() and - # the removal is instantly undone. Mark the source as suppressed - # so auto-import is skipped; leave ~/.codex/auth.json untouched so - # the Codex CLI itself keeps working. - from hermes_cli.auth import ( - _load_auth_store, _save_auth_store, _auth_store_lock, - suppress_credential_source, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - providers_dict = auth_store.get("providers") - if isinstance(providers_dict, dict) and provider in providers_dict: - del providers_dict[provider] - _save_auth_store(auth_store) - print(f"Cleared {provider} OAuth tokens from auth store") - suppress_credential_source(provider, "device_code") - print("Suppressed openai-codex device_code source — it will not be re-seeded.") - print("Note: Codex CLI credentials still live in ~/.codex/auth.json") - print("Run `hermes auth add openai-codex` to re-enable if needed.") + step = find_removal_step(provider, removed.source) + if step is None: + # Unregistered source — e.g. "manual", which has nothing external + # to clean up. The pool entry is already gone; we're done. + return - elif removed.source == "device_code" and provider == "nous": - from hermes_cli.auth import ( - _load_auth_store, _save_auth_store, _auth_store_lock, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - providers_dict = auth_store.get("providers") - if isinstance(providers_dict, dict) and provider in providers_dict: - del providers_dict[provider] - _save_auth_store(auth_store) - print(f"Cleared {provider} OAuth tokens from auth store") - - elif removed.source == "hermes_pkce" and provider == "anthropic": - from hermes_constants import get_hermes_home - oauth_file = get_hermes_home() / ".anthropic_oauth.json" - if oauth_file.exists(): - oauth_file.unlink() - print("Cleared Hermes Anthropic OAuth credentials") - - elif removed.source == "claude_code" and provider == "anthropic": - from hermes_cli.auth import suppress_credential_source - suppress_credential_source(provider, "claude_code") - print("Suppressed claude_code credential — it will not be re-seeded.") - print("Note: Claude Code credentials still live in ~/.claude/.credentials.json") - print("Run `hermes auth add anthropic` to re-enable if needed.") + result = step.remove_fn(provider, removed) + for line in result.cleaned: + print(line) + if result.suppress: + suppress_credential_source(provider, removed.source) + for line in result.hints: + print(line) def auth_reset_command(args) -> None: diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 667b8915af..8b5b90ef1f 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -201,7 +201,7 @@ def run_backup(args) -> None: else: zf.write(abs_path, arcname=str(rel_path)) total_bytes += abs_path.stat().st_size - except (PermissionError, OSError) as exc: + except (PermissionError, OSError, ValueError) as exc: errors.append(f" {rel_path}: {exc}") continue diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py index f5616b68d6..9e2181b501 100644 --- a/hermes_cli/codex_models.py +++ b/hermes_cli/codex_models.py @@ -24,7 +24,6 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [ ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.3-codex", ("gpt-5.2-codex",)), - ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")), ] diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index f753d6f3a7..8b43a351fb 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -497,9 +497,8 @@ def _collect_gateway_skill_entries( # --- Tier 1: Plugin slash commands (never trimmed) --------------------- plugin_pairs: list[tuple[str, str]] = [] try: - from hermes_cli.plugins import get_plugin_manager - pm = get_plugin_manager() - plugin_cmds = getattr(pm, "_plugin_commands", {}) + from hermes_cli.plugins import get_plugin_commands + plugin_cmds = get_plugin_commands() for cmd_name in sorted(plugin_cmds): name = sanitize_name(cmd_name) if sanitize_name else cmd_name if not name: @@ -925,12 +924,22 @@ class SlashCommandCompleter(Completer): display_meta=meta, ) - # If the user typed @file: or @folder:, delegate to path completions + # If the user typed @file: / @folder: (or just @file / @folder with + # no colon yet), delegate to path completions. Accepting the bare + # form lets the picker surface directories as soon as the user has + # typed `@folder`, without requiring them to first accept the static + # `@folder:` hint and re-trigger completion. for prefix in ("@file:", "@folder:"): - if word.startswith(prefix): - path_part = word[len(prefix):] or "." + bare = prefix[:-1] + + if word == bare or word.startswith(prefix): + want_dir = prefix == "@folder:" + path_part = '' if word == bare else word[len(prefix):] expanded = os.path.expanduser(path_part) - if expanded.endswith("/"): + + if not expanded or expanded == ".": + search_dir, match_prefix = ".", "" + elif expanded.endswith("/"): search_dir, match_prefix = expanded, "" else: search_dir = os.path.dirname(expanded) or "." @@ -946,15 +955,21 @@ class SlashCommandCompleter(Completer): for entry in sorted(entries): if match_prefix and not entry.lower().startswith(prefix_lower): continue - if count >= limit: - break full_path = os.path.join(search_dir, entry) is_dir = os.path.isdir(full_path) + # `@folder:` must only surface directories; `@file:` only + # regular files. Without this filter `@folder:` listed + # every .env / .gitignore in the cwd, defeating the + # explicit prefix and confusing users expecting a + # directory picker. + if want_dir != is_dir: + continue + if count >= limit: + break display_path = os.path.relpath(full_path) suffix = "/" if is_dir else "" - kind = "folder" if is_dir else "file" meta = "dir" if is_dir else _file_size_label(full_path) - completion = f"@{kind}:{display_path}{suffix}" + completion = f"{prefix}{display_path}{suffix}" yield Completion( completion, start_position=-len(word), diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d53899b135..81275a7f9a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -13,6 +13,7 @@ This module provides: """ import copy +import logging import os import platform import re @@ -24,6 +25,7 @@ from dataclasses import dataclass from pathlib import Path from typing import Dict, Any, Optional, List, Tuple +logger = logging.getLogger(__name__) _IS_WINDOWS = platform.system() == "Windows" _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") @@ -385,6 +387,26 @@ DEFAULT_CONFIG = { # (terminal and execute_code). Skill-declared required_environment_variables # are passed through automatically; this list is for non-skill use cases. "env_passthrough": [], + # Extra files to source in the login shell when building the + # per-session environment snapshot. Use this when tools like nvm, + # pyenv, asdf, or custom PATH entries are registered by files that + # a bash login shell would skip — most commonly ``~/.bashrc`` + # (bash doesn't source bashrc in non-interactive login mode) or + # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``. + # Paths support ``~`` / ``${VAR}``. Missing files are silently + # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the + # snapshot shell is bash (this is the ``auto_source_bashrc`` + # behaviour — disable with that key if you want strict login-only + # semantics). + "shell_init_files": [], + # When true (default), Hermes sources ``~/.bashrc`` in the login + # shell used to build the environment snapshot. This captures + # PATH additions, shell functions, and aliases defined in the + # user's bashrc — which a plain ``bash -l -c`` would otherwise + # miss because bash skips bashrc in non-interactive login mode. + # Turn this off if you have a bashrc that misbehaves when sourced + # non-interactively (e.g. one that hard-exits on TTY checks). + "auto_source_bashrc": True, "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20", "docker_forward_env": [], # Explicit environment variables to set inside Docker containers. @@ -403,7 +425,11 @@ DEFAULT_CONFIG = { "container_persistent": True, # Persist filesystem across sessions # Docker volume mounts — share host directories with the container. # Each entry is "host_path:container_path" (standard Docker -v syntax). - # Example: ["/home/user/projects:/workspace/projects", "/data:/data"] + # Example: + # ["/home/user/projects:/workspace/projects", + # "/home/user/.hermes/cache/documents:/output"] + # For gateway MEDIA delivery, write inside Docker to /output/... and emit + # the host-visible path in MEDIA:, not the container path. "docker_volumes": [], # Explicit opt-in: mount the host cwd into /workspace for Docker sessions. # Default off because passing host directories into a sandbox weakens isolation. @@ -470,13 +496,6 @@ DEFAULT_CONFIG = { }, }, - "smart_model_routing": { - "enabled": False, - "max_simple_chars": 160, - "max_simple_words": 28, - "cheap_model": {}, - }, - # Auxiliary model config — provider:model for each side task. # Format: provider is the provider name, model is the model slug. # "auto" for provider = auto-detect best available provider. @@ -490,6 +509,7 @@ DEFAULT_CONFIG = { "base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider) "api_key": "", # API key for base_url (falls back to OPENAI_API_KEY) "timeout": 120, # seconds — LLM API call timeout; vision payloads need generous timeout + "extra_body": {}, # OpenAI-compatible provider-specific request fields "download_timeout": 30, # seconds — image HTTP download timeout; increase for slow connections }, "web_extract": { @@ -498,6 +518,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 360, # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models + "extra_body": {}, }, "compression": { "provider": "auto", @@ -505,6 +526,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 120, # seconds — compression summarises large contexts; increase for local models + "extra_body": {}, }, "session_search": { "provider": "auto", @@ -512,6 +534,8 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, + "max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers }, "skills_hub": { "provider": "auto", @@ -519,6 +543,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "approval": { "provider": "auto", @@ -526,6 +551,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "mcp": { "provider": "auto", @@ -533,6 +559,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "flush_memories": { "provider": "auto", @@ -540,6 +567,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, "title_generation": { "provider": "auto", @@ -547,6 +575,7 @@ DEFAULT_CONFIG = { "base_url": "", "api_key": "", "timeout": 30, + "extra_body": {}, }, }, @@ -558,9 +587,14 @@ DEFAULT_CONFIG = { "bell_on_complete": False, "show_reasoning": False, "streaming": False, + "final_response_markdown": "strip", # render | strip | raw "inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage) "show_cost": False, # Show $ cost in the status bar (off by default) "skin": "default", + "user_message_preview": { # CLI: how many submitted user-message lines to echo back in scrollback + "first_lines": 2, + "last_lines": 2, + }, "interim_assistant_messages": True, # Gateway: show natural mid-turn assistant status messages "tool_progress_command": False, # Enable /verbose command in messaging gateway "tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead @@ -579,6 +613,10 @@ DEFAULT_CONFIG = { }, # Text-to-speech configuration + # Each provider supports an optional `max_text_length:` override for the + # per-request input-character cap. Omit it to use the provider's documented + # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware, + # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000). "tts": { "provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local) "edge": { @@ -631,6 +669,7 @@ DEFAULT_CONFIG = { "record_key": "ctrl+b", "max_recording_seconds": 120, "auto_tts": False, + "beep_enabled": True, # Play record start/stop beeps in CLI voice mode "silence_threshold": 200, # RMS below this = silence (0-32767) "silence_duration": 3.0, # Seconds of silence before auto-stop }, @@ -677,6 +716,12 @@ DEFAULT_CONFIG = { # independent of the parent's max_iterations) "reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium", # "low", "minimal", "none" (empty = inherit parent's level) + "max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling + # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth + # and _get_orchestrator_enabled). Values are clamped to [1, 3] with a + # warning log if out of range. + "max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level) + "orchestrator_enabled": True, # kill switch for role="orchestrator" }, # Ephemeral prefill messages file — JSON list of {role, content} dicts @@ -689,6 +734,20 @@ DEFAULT_CONFIG = { # always goes to ~/.hermes/skills/. "skills": { "external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"] + # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md + # content with the absolute skill directory and the active session id + # before the agent sees it. Lets skill authors reference bundled + # scripts without the agent having to join paths. + "template_vars": True, + # Pre-execute inline shell snippets written as !`cmd` in SKILL.md + # body. Their stdout is inlined into the skill message before the + # agent reads it, so skills can inject dynamic context (dates, git + # state, detected tool versions, …). Off by default because any + # content from the skill author runs on the host without approval; + # only enable for skill sources you trust. + "inline_shell": False, + # Timeout (seconds) for each !`cmd` snippet when inline_shell is on. + "inline_shell_timeout": 10, }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. @@ -708,6 +767,14 @@ DEFAULT_CONFIG = { "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) "reactions": True, # Add 👀/✅/❌ reactions to messages during processing "channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads) + # discord_server tool: restrict which actions the agent may call. + # Default (empty) = all actions allowed (subject to bot privileged intents). + # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages") + # or YAML list. Unknown names are dropped with a warning at load time. + # Actions: list_guilds, server_info, list_channels, channel_info, + # list_roles, member_info, search_members, fetch_messages, list_pins, + # pin_message, unpin_message, create_thread, add_role, remove_role. + "server_actions": "", }, # WhatsApp platform settings (gateway mode) @@ -751,6 +818,21 @@ DEFAULT_CONFIG = { "command_allowlist": [], # User-defined quick commands that bypass the agent loop (type: exec only) "quick_commands": {}, + + # Shell-script hooks — declarative bridge that invokes shell scripts + # on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call, + # subagent_stop, etc.). Each entry maps an event name to a list of + # {matcher, command, timeout} dicts. First registration of a new + # command prompts the user for consent; subsequent runs reuse the + # stored approval from ~/.hermes/shell-hooks-allowlist.json. + # See `website/docs/user-guide/features/hooks.md` for schema + examples. + "hooks": {}, + + # Auto-accept shell-hook registrations without a TTY prompt. Also + # toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1. + # Gateway / cron / non-interactive runs need this (or one of the other + # channels) to pick up newly-added hooks. + "hooks_auto_accept": False, # Custom personalities — add your own entries here # Supports string format: {"name": "system prompt"} # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}} @@ -774,6 +856,11 @@ DEFAULT_CONFIG = { # Wrap delivered cron responses with a header (task name) and footer # ("The agent cannot see this message"). Set to false for clean output. "wrap_response": True, + # Maximum number of due jobs to run in parallel per tick. + # null/0 = unbounded (limited only by thread count). + # 1 = serial (pre-v0.9 behaviour). + # Also overridable via HERMES_CRON_MAX_PARALLEL env var. + "max_parallel_jobs": None, }, # execute_code settings — controls the tool used for programmatic tool calls. @@ -806,8 +893,36 @@ DEFAULT_CONFIG = { "force_ipv4": False, }, + # Session storage — controls automatic cleanup of ~/.hermes/state.db. + # state.db accumulates every session, message, tool call, and FTS5 index + # entry forever. Without auto-pruning, a heavy user (gateway + cron) + # reports 384MB+ databases with 68K+ messages, which slows down FTS5 + # inserts, /resume listing, and insights queries. + "sessions": { + # When true, prune ended sessions older than retention_days once + # per (roughly) min_interval_hours at CLI/gateway/cron startup. + # Only touches ended sessions — active sessions are always preserved. + # Default false: session history is valuable for search recall, and + # silently deleting it could surprise users. Opt in explicitly. + "auto_prune": False, + # How many days of ended-session history to keep. Matches the + # default of ``hermes sessions prune``. + "retention_days": 90, + # VACUUM after a prune that actually deleted rows. SQLite does not + # reclaim disk space on DELETE — freed pages are just reused on + # subsequent INSERTs — so without VACUUM the file stays bloated + # even after pruning. VACUUM blocks writes for a few seconds per + # 100MB, so it only runs at startup, and only when prune deleted + # ≥1 session. + "vacuum_after_prune": True, + # Minimum hours between auto-maintenance runs (avoids repeating + # the sweep on every CLI invocation). Tracked via state_meta in + # state.db itself, so it's shared across all processes. + "min_interval_hours": 24, + }, + # Config schema version - bump this when adding new required fields - "_config_version": 19, + "_config_version": 22, } # ============================================================================= @@ -963,6 +1078,22 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "STEPFUN_API_KEY": { + "description": "StepFun Step Plan API key", + "prompt": "StepFun Step Plan API key", + "url": "https://platform.stepfun.com/", + "password": True, + "category": "provider", + "advanced": True, + }, + "STEPFUN_BASE_URL": { + "description": "StepFun Step Plan base URL override", + "prompt": "StepFun Step Plan base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, "ARCEEAI_API_KEY": { "description": "Arcee AI API key", "prompt": "Arcee AI API key", @@ -1830,12 +1961,53 @@ def _normalize_custom_provider_entry( if not isinstance(entry, dict): return None + # Accept camelCase aliases commonly used in hand-written configs. + _CAMEL_ALIASES: Dict[str, str] = { + "apiKey": "api_key", + "baseUrl": "base_url", + "apiMode": "api_mode", + "keyEnv": "key_env", + "defaultModel": "default_model", + "contextLength": "context_length", + "rateLimitDelay": "rate_limit_delay", + } + _KNOWN_KEYS = { + "name", "api", "url", "base_url", "api_key", "key_env", + "api_mode", "transport", "model", "default_model", "models", + "context_length", "rate_limit_delay", + } + for camel, snake in _CAMEL_ALIASES.items(): + if camel in entry and snake not in entry: + logger.warning( + "providers.%s: camelCase key '%s' auto-mapped to '%s' " + "(use snake_case to avoid this warning)", + provider_key or "?", camel, snake, + ) + entry[snake] = entry[camel] + unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys()) + if unknown: + logger.warning( + "providers.%s: unknown config keys ignored: %s", + provider_key or "?", ", ".join(sorted(unknown)), + ) + + from urllib.parse import urlparse + base_url = "" - for url_key in ("api", "url", "base_url"): + for url_key in ("base_url", "url", "api"): raw_url = entry.get(url_key) if isinstance(raw_url, str) and raw_url.strip(): - base_url = raw_url.strip() - break + candidate = raw_url.strip() + parsed = urlparse(candidate) + if parsed.scheme and parsed.netloc: + base_url = candidate + break + else: + logger.warning( + "providers.%s: '%s' value '%s' is not a valid URL " + "(no scheme or host) — skipped", + provider_key or "?", url_key, candidate, + ) if not base_url: return None @@ -1974,6 +2146,7 @@ _KNOWN_ROOT_KEYS = { "fallback_providers", "credential_pool_strategies", "toolsets", "agent", "terminal", "display", "compression", "delegation", "auxiliary", "custom_providers", "context", "memory", "gateway", + "sessions", } # Valid fields inside a custom_providers list entry @@ -2131,7 +2304,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None: if not issues: return - import sys lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"] for ci in issues: marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m" @@ -2146,7 +2318,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non These env vars are deprecated — the canonical setting is terminal.cwd in config.yaml. Prints a migration hint to stderr. """ - import os, sys messaging_cwd = os.environ.get("MESSAGING_CWD") terminal_cwd_env = os.environ.get("TERMINAL_CWD") @@ -2464,6 +2635,71 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A else: print(" ✓ Removed unused compression.summary_* keys") + # ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ── + # The loader now requires plugins to appear in ``plugins.enabled`` before + # loading. Existing installs had all discovered plugins loading by default + # (minus anything in ``plugins.disabled``). To avoid silently breaking + # those setups on upgrade, populate ``plugins.enabled`` with the set of + # currently-installed user plugins that aren't already disabled. + # + # Bundled plugins (shipped in the repo itself) are NOT grandfathered — + # they ship off for everyone, including existing users, so any user who + # wants one has to opt in explicitly. + if current_ver < 21: + config = read_raw_config() + plugins_cfg = config.get("plugins") + if not isinstance(plugins_cfg, dict): + plugins_cfg = {} + # Only migrate if the enabled allow-list hasn't been set yet. + if "enabled" not in plugins_cfg: + disabled = plugins_cfg.get("disabled", []) or [] + if not isinstance(disabled, list): + disabled = [] + disabled_set = set(disabled) + + # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins. + grandfathered: List[str] = [] + try: + user_plugins_dir = get_hermes_home() / "plugins" + if user_plugins_dir.is_dir(): + for child in sorted(user_plugins_dir.iterdir()): + if not child.is_dir(): + continue + manifest_file = child / "plugin.yaml" + if not manifest_file.exists(): + manifest_file = child / "plugin.yml" + if not manifest_file.exists(): + continue + try: + with open(manifest_file) as _mf: + manifest = yaml.safe_load(_mf) or {} + except Exception: + manifest = {} + name = manifest.get("name") or child.name + if name in disabled_set: + continue + grandfathered.append(name) + except Exception: + grandfathered = [] + + plugins_cfg["enabled"] = grandfathered + config["plugins"] = plugins_cfg + save_config(config) + results["config_added"].append( + f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)" + ) + if not quiet: + if grandfathered: + print( + f" ✓ Plugins now opt-in: grandfathered " + f"{len(grandfathered)} existing plugin(s) into plugins.enabled" + ) + else: + print( + " ✓ Plugins now opt-in: no existing plugins to grandfather. " + "Use `hermes plugins enable ` to activate." + ) + if current_ver < latest_ver and not quiet: print(f"Config version: {current_ver} → {latest_ver}") @@ -2861,24 +3097,11 @@ _FALLBACK_COMMENT = """ # minimax (MINIMAX_API_KEY) — MiniMax # minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) # -# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# For custom OpenAI-compatible endpoints, add base_url and key_env. # # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 -# -# ── Smart Model Routing ──────────────────────────────────────────────── -# Optional cheap-vs-strong routing for simple turns. -# Keeps the primary model for complex work, but can route short/simple -# messages to a cheaper model across providers. -# -# smart_model_routing: -# enabled: true -# max_simple_chars: 160 -# max_simple_words: 28 -# cheap_model: -# provider: openrouter -# model: google/gemini-2.5-flash """ @@ -2905,24 +3128,11 @@ _COMMENTED_SECTIONS = """ # minimax (MINIMAX_API_KEY) — MiniMax # minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) # -# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# For custom OpenAI-compatible endpoints, add base_url and key_env. # # fallback_model: # provider: openrouter # model: anthropic/claude-sonnet-4 -# -# ── Smart Model Routing ──────────────────────────────────────────────── -# Optional cheap-vs-strong routing for simple turns. -# Keeps the primary model for complex work, but can route short/simple -# messages to a cheaper model across providers. -# -# smart_model_routing: -# enabled: true -# max_simple_chars: 160 -# max_simple_words: 28 -# cheap_model: -# provider: openrouter -# model: google/gemini-2.5-flash """ @@ -3115,7 +3325,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str: bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})") sanitized = value.encode("ascii", errors="ignore").decode("ascii") - import sys print( f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n" f" This usually happens when copy-pasting from a PDF, rich-text editor,\n" @@ -3385,6 +3594,10 @@ def show_config(): print(f" Personality: {display.get('personality', 'kawaii')}") print(f" Reasoning: {'on' if display.get('show_reasoning', False) else 'off'}") print(f" Bell: {'on' if display.get('bell_on_complete', False) else 'off'}") + ump = display.get('user_message_preview', {}) if isinstance(display.get('user_message_preview', {}), dict) else {} + ump_first = ump.get('first_lines', 2) + ump_last = ump.get('last_lines', 2) + print(f" User preview: first {ump_first} line(s), last {ump_last} line(s)") # Terminal print() diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 4138aeaa27..064b1d68d1 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -30,6 +30,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8") from hermes_cli.colors import Colors, color from hermes_constants import OPENROUTER_MODELS_URL +from utils import base_url_host_matches _PROVIDER_ENV_HINTS = ( @@ -277,6 +278,86 @@ def run_doctor(args): config_path = HERMES_HOME / 'config.yaml' if config_path.exists(): check_ok(f"{_DHH}/config.yaml exists") + + # Validate model.provider and model.default values + try: + import yaml as _yaml + cfg = _yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + model_section = cfg.get("model") or {} + provider_raw = (model_section.get("provider") or "").strip() + provider = provider_raw.lower() + default_model = (model_section.get("default") or model_section.get("model") or "").strip() + + known_providers: set = set() + try: + from hermes_cli.auth import PROVIDER_REGISTRY + known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"} + except Exception: + pass + try: + from hermes_cli.auth import resolve_provider as _resolve_provider + except Exception: + _resolve_provider = None + + canonical_provider = provider + if provider and _resolve_provider is not None and provider != "auto": + try: + canonical_provider = _resolve_provider(provider) + except Exception: + canonical_provider = None + + if provider and provider != "auto": + if canonical_provider is None or (known_providers and canonical_provider not in known_providers): + known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)" + check_fail( + f"model.provider '{provider_raw}' is not a recognised provider", + f"(known: {known_list})", + ) + issues.append( + f"model.provider '{provider_raw}' is unknown. " + f"Valid providers: {known_list}. " + f"Fix: run 'hermes config set model.provider '" + ) + + # Warn if model is set to a provider-prefixed name on a provider that doesn't use them + if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"): + check_warn( + f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'", + "(vendor-prefixed slugs belong to aggregators like openrouter)", + ) + issues.append( + f"model.default '{default_model}' is vendor-prefixed but model.provider is '{provider_raw}'. " + "Either set model.provider to 'openrouter', or drop the vendor prefix." + ) + + # Check credentials for the configured provider. + # Limit to API-key providers in PROVIDER_REGISTRY — other provider + # types (OAuth, SDK, openrouter/anthropic/custom/auto) have their + # own env-var checks elsewhere in doctor, and get_auth_status() + # returns a bare {logged_in: False} for anything it doesn't + # explicitly dispatch, which would produce false positives. + if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"): + try: + from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status + pconfig = PROVIDER_REGISTRY.get(canonical_provider) + if pconfig and getattr(pconfig, "auth_type", "") == "api_key": + status = get_auth_status(canonical_provider) or {} + configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key")) + if not configured: + check_fail( + f"model.provider '{canonical_provider}' is set but no API key is configured", + "(check ~/.hermes/.env or run 'hermes setup')", + ) + issues.append( + f"No credentials found for provider '{canonical_provider}'. " + f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, " + f"or switch providers with 'hermes config set model.provider '" + ) + except Exception: + pass + + except Exception as e: + check_warn("Could not validate model/provider config", f"({e})") else: fallback_config = PROJECT_ROOT / 'cli-config.yaml' if fallback_config.exists(): @@ -778,6 +859,16 @@ def run_doctor(args): elif response.status_code == 401: print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ") issues.append("Check OPENROUTER_API_KEY in .env") + elif response.status_code == 402: + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}") + issues.append( + "OpenRouter account has insufficient credits. " + "Fix: run 'hermes config set model.provider ' to switch providers, " + "or fund your OpenRouter account at https://openrouter.ai/settings/credits" + ) + elif response.status_code == 429: + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)} ") + issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting") else: print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ") except Exception as e: @@ -821,6 +912,7 @@ def run_doctor(args): _apikey_providers = [ ("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True), ("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True), + ("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True), ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True), ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True), ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), @@ -852,18 +944,22 @@ def run_doctor(args): try: import httpx _base = os.getenv(_base_env, "") if _base_env else "" - # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com + # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1 + # (OpenAI-compat surface, which exposes /models for health check). if not _base and _key.startswith("sk-kimi-"): _base = "https://api.kimi.com/coding/v1" - # Anthropic-compat endpoints (/anthropic) don't support /models. - # Rewrite to the OpenAI-compat /v1 surface for health checks. + # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding + # with no /v1) don't support /models. Rewrite to the OpenAI-compat + # /v1 surface for health checks. if _base and _base.rstrip("/").endswith("/anthropic"): from agent.auxiliary_client import _to_openai_base_url _base = _to_openai_base_url(_base) + if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"): + _base = _base.rstrip("/") + "/v1" _url = (_base.rstrip("/") + "/models") if _base else _default_url _headers = {"Authorization": f"Bearer {_key}"} - if "api.kimi.com" in _url.lower(): - _headers["User-Agent"] = "KimiCLI/1.30.0" + if base_url_host_matches(_base, "api.kimi.com"): + _headers["User-Agent"] = "claude-code/0.1.0" _resp = httpx.get( _url, headers=_headers, diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index f3a174e71b..90364a261a 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -160,7 +160,6 @@ def _config_overrides(config: dict) -> dict[str, str]: ("display", "streaming"), ("display", "skin"), ("display", "show_reasoning"), - ("smart_model_routing", "enabled"), ("privacy", "redact_pii"), ("tts", "provider"), ] diff --git a/hermes_cli/env_loader.py b/hermes_cli/env_loader.py index 853f0d2626..009f3de273 100644 --- a/hermes_cli/env_loader.py +++ b/hermes_cli/env_loader.py @@ -3,6 +3,7 @@ from __future__ import annotations import os +import sys from pathlib import Path from dotenv import load_dotenv @@ -14,6 +15,26 @@ from dotenv import load_dotenv # pure ASCII (they become HTTP header values). _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY") +# Names we've already warned about during this process, so repeated +# load_hermes_dotenv() calls (user env + project env, gateway hot-reload, +# tests) don't spam the same warning multiple times. +_WARNED_KEYS: set[str] = set() + + +def _format_offending_chars(value: str, limit: int = 3) -> str: + """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints.""" + seen: list[str] = [] + for ch in value: + if ord(ch) > 127: + label = f"U+{ord(ch):04X}" + if ch.isprintable(): + label += f" ({ch!r})" + if label not in seen: + seen.append(label) + if len(seen) >= limit: + break + return ", ".join(seen) + def _sanitize_loaded_credentials() -> None: """Strip non-ASCII characters from credential env vars in os.environ. @@ -21,14 +42,42 @@ def _sanitize_loaded_credentials() -> None: Called after dotenv loads so the rest of the codebase never sees non-ASCII API keys. Only touches env vars whose names end with known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.). + + Emits a one-line warning to stderr when characters are stripped. + Silent stripping would mask copy-paste corruption (Unicode lookalike + glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque + provider-side "invalid API key" errors (see #6843). """ for key, value in list(os.environ.items()): if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES): continue try: value.encode("ascii") + continue except UnicodeEncodeError: - os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii") + pass + cleaned = value.encode("ascii", errors="ignore").decode("ascii") + os.environ[key] = cleaned + if key in _WARNED_KEYS: + continue + _WARNED_KEYS.add(key) + stripped = len(value) - len(cleaned) + detail = _format_offending_chars(value) or "non-printable" + print( + f" Warning: {key} contained {stripped} non-ASCII character" + f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the " + f"key can be sent as an HTTP header.", + file=sys.stderr, + ) + print( + " This usually means the key was copy-pasted from a PDF, " + "rich-text editor, or web page that substituted lookalike\n" + " Unicode glyphs for ASCII letters. If authentication fails " + "(e.g. \"API key not valid\"), re-copy the key from the\n" + " provider's dashboard and run `hermes setup` (or edit the " + ".env file in a plain-text editor).", + file=sys.stderr, + ) def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None: @@ -111,6 +160,8 @@ def load_hermes_dotenv( # Fix corrupted .env files before python-dotenv parses them (#8908). if user_env.exists(): _sanitize_env_file_if_needed(user_env) + if project_env_path and project_env_path.exists(): + _sanitize_env_file_if_needed(project_env_path) if user_env.exists(): _load_dotenv_with_fallback(user_env, override=True) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index bc809cadf9..59bd37d113 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -994,8 +994,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]: if not is_linux(): return None, "not supported on this platform" - import shutil - if not shutil.which("loginctl"): return None, "loginctl not found" @@ -1347,7 +1345,6 @@ def _ensure_linger_enabled() -> None: return import getpass - import shutil username = getpass.getuser() linger_file = Path(f"/var/lib/systemd/linger/{username}") @@ -1656,7 +1653,6 @@ def get_launchd_label() -> str: def _launchd_domain() -> str: - import os return f"gui/{os.getuid()}" @@ -2643,9 +2639,120 @@ def _setup_dingtalk(): def _setup_wecom(): - """Configure WeCom (Enterprise WeChat) via the standard platform setup.""" - wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom") - _setup_standard_platform(wecom_platform) + """Interactive setup for WeCom — scan QR code or manual credential input.""" + print() + print(color(" ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN)) + + existing_bot_id = get_env_value("WECOM_BOT_ID") + existing_secret = get_env_value("WECOM_SECRET") + if existing_bot_id and existing_secret: + print() + print_success("WeCom is already configured.") + if not prompt_yes_no(" Reconfigure WeCom?", False): + return + + # ── Choose setup method ── + print() + method_choices = [ + "Scan QR code to obtain Bot ID and Secret automatically (recommended)", + "Enter existing Bot ID and Secret manually", + ] + method_idx = prompt_choice(" How would you like to set up WeCom?", method_choices, 0) + + bot_id = None + secret = None + + if method_idx == 0: + # ── QR scan flow ── + try: + from gateway.platforms.wecom import qr_scan_for_bot_info + except Exception as exc: + print_error(f" WeCom QR scan import failed: {exc}") + qr_scan_for_bot_info = None + + if qr_scan_for_bot_info is not None: + try: + credentials = qr_scan_for_bot_info() + except KeyboardInterrupt: + print() + print_warning(" WeCom setup cancelled.") + return + except Exception as exc: + print_warning(f" QR scan failed: {exc}") + credentials = None + if credentials: + bot_id = credentials.get("bot_id", "") + secret = credentials.get("secret", "") + print_success(" ✔ QR scan successful! Bot ID and Secret obtained.") + + if not bot_id or not secret: + print_info(" QR scan did not complete. Continuing with manual input.") + bot_id = None + secret = None + + # ── Manual credential input ── + if not bot_id or not secret: + print() + print_info(" 1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots") + print_info(" 2. Select API Mode") + print_info(" 3. Copy the Bot ID and Secret from the bot's credentials info") + print_info(" 4. The bot connects via WebSocket — no public endpoint needed") + print() + bot_id = prompt(" Bot ID", password=False) + if not bot_id: + print_warning(" Skipped — WeCom won't work without a Bot ID.") + return + secret = prompt(" Secret", password=True) + if not secret: + print_warning(" Skipped — WeCom won't work without a Secret.") + return + + # ── Save core credentials ── + save_env_value("WECOM_BOT_ID", bot_id) + save_env_value("WECOM_SECRET", secret) + + # ── Allowed users (deny-by-default security) ── + print() + print_info(" The gateway DENIES all users by default for security.") + print_info(" Enter user IDs to create an allowlist, or leave empty.") + allowed = prompt(" Allowed user IDs (comma-separated, or empty)", password=False) + if allowed: + cleaned = allowed.replace(" ", "") + save_env_value("WECOM_ALLOWED_USERS", cleaned) + print_success(" Saved — only these users can interact with the bot.") + else: + print() + access_choices = [ + "Enable open access (anyone can message the bot)", + "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')", + "Disable direct messages", + "Skip for now (bot will deny all users until configured)", + ] + access_idx = prompt_choice(" How should unauthorized users be handled?", access_choices, 1) + if access_idx == 0: + save_env_value("WECOM_DM_POLICY", "open") + save_env_value("GATEWAY_ALLOW_ALL_USERS", "true") + print_warning(" Open access enabled — anyone can use your bot!") + elif access_idx == 1: + save_env_value("WECOM_DM_POLICY", "pairing") + print_success(" DM pairing mode — users will receive a code to request access.") + print_info(" Approve with: hermes pairing approve ") + elif access_idx == 2: + save_env_value("WECOM_DM_POLICY", "disabled") + print_warning(" Direct messages disabled.") + else: + print_info(" Skipped — configure later with 'hermes gateway setup'") + + # ── Home channel (optional) ── + print() + print_info(" Chat ID for scheduled results and notifications.") + home = prompt(" Home chat ID (optional, for cron/notifications)", password=False) + if home: + save_env_value("WECOM_HOME_CHANNEL", home) + print_success(f" Home channel set to {home}") + + print() + print_success("💬 WeCom configured!") def _is_service_installed() -> bool: @@ -3025,7 +3132,8 @@ def _setup_qqbot(): if method_idx == 0: # ── QR scan-to-configure ── try: - credentials = _qqbot_qr_flow() + from gateway.platforms.qqbot import qr_register + credentials = qr_register() except KeyboardInterrupt: print() print_warning(" QQ Bot setup cancelled.") @@ -3107,106 +3215,6 @@ def _setup_qqbot(): print_info(f" App ID: {credentials['app_id']}") -def _qqbot_render_qr(url: str) -> bool: - """Try to render a QR code in the terminal. Returns True if successful.""" - try: - import qrcode as _qr - qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L) - qr.add_data(url) - qr.make(fit=True) - qr.print_ascii(invert=True) - return True - except Exception: - return False - - -def _qqbot_qr_flow(): - """Run the QR-code scan-to-configure flow. - - Returns a dict with app_id, client_secret, user_openid on success, - or None on failure/cancel. - """ - try: - from gateway.platforms.qqbot import ( - create_bind_task, poll_bind_result, build_connect_url, - decrypt_secret, BindStatus, - ) - from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL - except Exception as exc: - print_error(f" QQBot onboard import failed: {exc}") - return None - - import asyncio - import time - - MAX_REFRESHES = 3 - refresh_count = 0 - - while refresh_count <= MAX_REFRESHES: - loop = asyncio.new_event_loop() - - # ── Create bind task ── - try: - task_id, aes_key = loop.run_until_complete(create_bind_task()) - except Exception as e: - print_warning(f" Failed to create bind task: {e}") - loop.close() - return None - - url = build_connect_url(task_id) - - # ── Display QR code + URL ── - print() - if _qqbot_render_qr(url): - print(f" Scan the QR code above, or open this URL directly:\n {url}") - else: - print(f" Open this URL in QQ on your phone:\n {url}") - print_info(" Tip: pip install qrcode to show a scannable QR code here") - - # ── Poll loop (silent — keep QR visible at bottom) ── - try: - while True: - try: - status, app_id, encrypted_secret, user_openid = loop.run_until_complete( - poll_bind_result(task_id) - ) - except Exception: - time.sleep(ONBOARD_POLL_INTERVAL) - continue - - if status == BindStatus.COMPLETED: - client_secret = decrypt_secret(encrypted_secret, aes_key) - print() - print_success(f" QR scan complete! (App ID: {app_id})") - if user_openid: - print_info(f" Scanner's OpenID: {user_openid}") - return { - "app_id": app_id, - "client_secret": client_secret, - "user_openid": user_openid, - } - - if status == BindStatus.EXPIRED: - refresh_count += 1 - if refresh_count > MAX_REFRESHES: - print() - print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.") - return None - print() - print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})") - loop.close() - break # outer while creates a new task - - time.sleep(ONBOARD_POLL_INTERVAL) - except KeyboardInterrupt: - loop.close() - raise - finally: - loop.close() - - return None - - def _setup_signal(): """Interactive setup for Signal messenger.""" import shutil @@ -3394,6 +3402,8 @@ def gateway_setup(): _setup_feishu() elif platform["key"] == "qqbot": _setup_qqbot() + elif platform["key"] == "wecom": + _setup_wecom() else: _setup_standard_platform(platform) diff --git a/hermes_cli/hooks.py b/hermes_cli/hooks.py new file mode 100644 index 0000000000..97d9e36b30 --- /dev/null +++ b/hermes_cli/hooks.py @@ -0,0 +1,385 @@ +"""hermes hooks — inspect and manage shell-script hooks. + +Usage:: + + hermes hooks list + hermes hooks test [--for-tool X] [--payload-file F] + hermes hooks revoke + hermes hooks doctor + +Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and +hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml`` +(the same config read by the CLI / gateway at startup). + +This module is a thin CLI shell over :mod:`agent.shell_hooks`; every +shared concern (payload serialisation, response parsing, allowlist +format) lives there. +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any, Dict, List, Optional + + +def hooks_command(args) -> None: + """Entry point for ``hermes hooks`` — dispatches to the requested action.""" + sub = getattr(args, "hooks_action", None) + + if not sub: + print("Usage: hermes hooks {list|test|revoke|doctor}") + print("Run 'hermes hooks --help' for details.") + return + + if sub in ("list", "ls"): + _cmd_list(args) + elif sub == "test": + _cmd_test(args) + elif sub in ("revoke", "remove", "rm"): + _cmd_revoke(args) + elif sub == "doctor": + _cmd_doctor(args) + else: + print(f"Unknown hooks subcommand: {sub}") + + +# --------------------------------------------------------------------------- +# list +# --------------------------------------------------------------------------- + +def _cmd_list(_args) -> None: + from hermes_cli.config import load_config + from agent import shell_hooks + + specs = shell_hooks.iter_configured_hooks(load_config()) + + if not specs: + print("No shell hooks configured in ~/.hermes/config.yaml.") + print("See `hermes hooks --help` or") + print(" website/docs/user-guide/features/hooks.md") + print("for the config schema and worked examples.") + return + + by_event: Dict[str, List] = {} + for spec in specs: + by_event.setdefault(spec.event, []).append(spec) + + allowlist = shell_hooks.load_allowlist() + approved = { + (e.get("event"), e.get("command")) + for e in allowlist.get("approvals", []) + if isinstance(e, dict) + } + + print(f"Configured shell hooks ({len(specs)} total):\n") + + for event in sorted(by_event.keys()): + print(f" [{event}]") + for spec in by_event[event]: + is_approved = (spec.event, spec.command) in approved + status = "✓ allowed" if is_approved else "✗ not allowlisted" + matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else "" + print( + f" - {spec.command}{matcher_part} " + f"(timeout={spec.timeout}s, {status})" + ) + + if is_approved: + entry = shell_hooks.allowlist_entry_for(spec.event, spec.command) + if entry and entry.get("approved_at"): + print(f" approved_at: {entry['approved_at']}") + mtime_now = shell_hooks.script_mtime_iso(spec.command) + mtime_at = entry.get("script_mtime_at_approval") + if mtime_now and mtime_at and mtime_now > mtime_at: + print( + f" ⚠ script modified since approval " + f"(was {mtime_at}, now {mtime_now}) — " + f"run `hermes hooks doctor` to re-validate" + ) + print() + + +# --------------------------------------------------------------------------- +# test +# --------------------------------------------------------------------------- + +# Synthetic kwargs matching the real invoke_hook() call sites — these are +# passed verbatim to agent.shell_hooks.run_once(), which routes them through +# the same _serialize_payload() that production firings use. That way the +# stdin a script sees under `hermes hooks test` and `hermes hooks doctor` +# is identical in shape to what it will see at runtime. +_DEFAULT_PAYLOADS = { + "pre_tool_call": { + "tool_name": "terminal", + "args": {"command": "echo hello"}, + "session_id": "test-session", + "task_id": "test-task", + "tool_call_id": "test-call", + }, + "post_tool_call": { + "tool_name": "terminal", + "args": {"command": "echo hello"}, + "session_id": "test-session", + "task_id": "test-task", + "tool_call_id": "test-call", + "result": '{"output": "hello"}', + }, + "pre_llm_call": { + "session_id": "test-session", + "user_message": "What is the weather?", + "conversation_history": [], + "is_first_turn": True, + "model": "gpt-4", + "platform": "cli", + }, + "post_llm_call": { + "session_id": "test-session", + "model": "gpt-4", + "platform": "cli", + }, + "on_session_start": {"session_id": "test-session"}, + "on_session_end": {"session_id": "test-session"}, + "on_session_finalize": {"session_id": "test-session"}, + "on_session_reset": {"session_id": "test-session"}, + "pre_api_request": { + "session_id": "test-session", + "task_id": "test-task", + "platform": "cli", + "model": "claude-sonnet-4-6", + "provider": "anthropic", + "base_url": "https://api.anthropic.com", + "api_mode": "anthropic_messages", + "api_call_count": 1, + "message_count": 4, + "tool_count": 12, + "approx_input_tokens": 2048, + "request_char_count": 8192, + "max_tokens": 4096, + }, + "post_api_request": { + "session_id": "test-session", + "task_id": "test-task", + "platform": "cli", + "model": "claude-sonnet-4-6", + "provider": "anthropic", + "base_url": "https://api.anthropic.com", + "api_mode": "anthropic_messages", + "api_call_count": 1, + "api_duration": 1.234, + "finish_reason": "stop", + "message_count": 4, + "response_model": "claude-sonnet-4-6", + "usage": {"input_tokens": 2048, "output_tokens": 512}, + "assistant_content_chars": 1200, + "assistant_tool_call_count": 0, + }, + "subagent_stop": { + "parent_session_id": "parent-sess", + "child_role": None, + "child_summary": "Synthetic summary for hooks test", + "child_status": "completed", + "duration_ms": 1234, + }, +} + + +def _cmd_test(args) -> None: + from hermes_cli.config import load_config + from hermes_cli.plugins import VALID_HOOKS + from agent import shell_hooks + + event = args.event + if event not in VALID_HOOKS: + print(f"Unknown event: {event!r}") + print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}") + return + + # Synthetic kwargs in the same shape invoke_hook() would pass. Merged + # with --for-tool (overrides tool_name) and --payload-file (extra kwargs). + payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"})) + + if getattr(args, "for_tool", None): + payload["tool_name"] = args.for_tool + + if getattr(args, "payload_file", None): + try: + custom = json.loads(Path(args.payload_file).read_text()) + if isinstance(custom, dict): + payload.update(custom) + else: + print(f"Warning: {args.payload_file} is not a JSON object; ignoring") + except Exception as exc: + print(f"Error reading payload file: {exc}") + return + + specs = shell_hooks.iter_configured_hooks(load_config()) + specs = [s for s in specs if s.event == event] + + if getattr(args, "for_tool", None): + specs = [ + s for s in specs + if s.event not in ("pre_tool_call", "post_tool_call") + or s.matches_tool(args.for_tool) + ] + + if not specs: + print(f"No shell hooks configured for event: {event}") + if getattr(args, "for_tool", None): + print(f"(with matcher filter --for-tool={args.for_tool})") + return + + print(f"Firing {len(specs)} hook(s) for event '{event}':\n") + for spec in specs: + print(f" → {spec.command}") + result = shell_hooks.run_once(spec, payload) + _print_run_result(result) + print() + + +def _print_run_result(result: Dict[str, Any]) -> None: + if result.get("error"): + print(f" ✗ error: {result['error']}") + return + if result.get("timed_out"): + print(f" ✗ timed out after {result['elapsed_seconds']}s") + return + + rc = result.get("returncode") + elapsed = result.get("elapsed_seconds", 0) + print(f" exit={rc} elapsed={elapsed}s") + + stdout = (result.get("stdout") or "").strip() + stderr = (result.get("stderr") or "").strip() + if stdout: + print(f" stdout: {_truncate(stdout, 400)}") + if stderr: + print(f" stderr: {_truncate(stderr, 400)}") + + parsed = result.get("parsed") + if parsed: + print(f" parsed (Hermes wire shape): {json.dumps(parsed)}") + else: + print(" parsed: ") + + +def _truncate(s: str, n: int) -> str: + return s if len(s) <= n else s[: n - 3] + "..." + + +# --------------------------------------------------------------------------- +# revoke +# --------------------------------------------------------------------------- + +def _cmd_revoke(args) -> None: + from agent import shell_hooks + + removed = shell_hooks.revoke(args.command) + if removed == 0: + print(f"No allowlist entry found for command: {args.command}") + return + print(f"Removed {removed} allowlist entry/entries for: {args.command}") + print( + "Note: currently running CLI / gateway processes keep their " + "already-registered callbacks until they restart." + ) + + +# --------------------------------------------------------------------------- +# doctor +# --------------------------------------------------------------------------- + +def _cmd_doctor(_args) -> None: + from hermes_cli.config import load_config + from agent import shell_hooks + + specs = shell_hooks.iter_configured_hooks(load_config()) + + if not specs: + print("No shell hooks configured — nothing to check.") + return + + print(f"Checking {len(specs)} configured shell hook(s)...\n") + + problems = 0 + for spec in specs: + print(f" [{spec.event}] {spec.command}") + problems += _doctor_one(spec, shell_hooks) + print() + + if problems: + print(f"{problems} issue(s) found. Fix before relying on these hooks.") + else: + print("All shell hooks look healthy.") + + +def _doctor_one(spec, shell_hooks) -> int: + problems = 0 + + # 1. Script exists and is executable + if shell_hooks.script_is_executable(spec.command): + print(" ✓ script exists and is executable") + else: + problems += 1 + print(" ✗ script missing or not executable " + "(chmod +x the file, or fix the path)") + + # 2. Allowlist status + entry = shell_hooks.allowlist_entry_for(spec.event, spec.command) + if entry: + print(f" ✓ allowlisted (approved {entry.get('approved_at', '?')})") + else: + problems += 1 + print(" ✗ not allowlisted — hook will NOT fire at runtime " + "(run with --accept-hooks once, or confirm at the TTY prompt)") + + # 3. Mtime drift + if entry and entry.get("script_mtime_at_approval"): + mtime_now = shell_hooks.script_mtime_iso(spec.command) + mtime_at = entry["script_mtime_at_approval"] + if mtime_now and mtime_at and mtime_now > mtime_at: + problems += 1 + print(f" ⚠ script modified since approval " + f"(was {mtime_at}, now {mtime_now}) — review changes, " + f"then `hermes hooks revoke` + re-approve to refresh") + elif mtime_now and mtime_at and mtime_now == mtime_at: + print(" ✓ script unchanged since approval") + + # 4. Produces valid JSON for a synthetic payload — only when the entry + # is already allowlisted. Otherwise `hermes hooks doctor` would execute + # every script listed in a freshly-pulled config before the user has + # reviewed them, which directly contradicts the documented workflow + # ("spot newly-added hooks *before they register*"). + if not entry: + print(" ℹ skipped JSON smoke test — not allowlisted yet. " + "Approve the hook first (via TTY prompt or --accept-hooks), " + "then re-run `hermes hooks doctor`.") + elif shell_hooks.script_is_executable(spec.command): + payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}}) + result = shell_hooks.run_once(spec, payload) + if result.get("timed_out"): + problems += 1 + print(f" ✗ timed out after {result['elapsed_seconds']}s " + f"on synthetic payload (timeout={spec.timeout}s)") + elif result.get("error"): + problems += 1 + print(f" ✗ execution error: {result['error']}") + else: + rc = result.get("returncode") + elapsed = result.get("elapsed_seconds", 0) + stdout = (result.get("stdout") or "").strip() + if stdout: + try: + json.loads(stdout) + print(f" ✓ produced valid JSON on synthetic payload " + f"(exit={rc}, {elapsed}s)") + except json.JSONDecodeError: + problems += 1 + print(f" ✗ stdout was not valid JSON (exit={rc}, " + f"{elapsed}s): {_truncate(stdout, 120)}") + else: + print(f" ✓ ran clean with empty stdout " + f"(exit={rc}, {elapsed}s) — hook is observer-only") + + return problems diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 7e0220d918..404e59089a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -51,6 +51,19 @@ import sys from pathlib import Path from typing import Optional +def _add_accept_hooks_flag(parser) -> None: + """Attach the ``--accept-hooks`` flag. Shared across every agent + subparser so the flag works regardless of CLI position.""" + parser.add_argument( + "--accept-hooks", + action="store_true", + default=argparse.SUPPRESS, + help=( + "Auto-approve unseen shell hooks without a TTY prompt " + "(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)." + ), + ) + def _require_tty(command_name: str) -> None: """Exit with a clear error if stdin is not a terminal. @@ -180,7 +193,7 @@ import time as _time from datetime import datetime from hermes_cli import __version__, __release_date__ -from hermes_constants import OPENROUTER_BASE_URL +from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL logger = logging.getLogger(__name__) @@ -605,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list): container_info: dict with backend, container_name, exec_user, hermes_bin cli_args: the original CLI arguments (everything after 'hermes') """ - import shutil backend = container_info["backend"] container_name = container_info["container_name"] @@ -693,6 +705,10 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]: - If it looks like a session ID (contains underscore + hex), try direct lookup first. - Otherwise, treat it as a title and use resolve_session_by_title (auto-latest). - Falls back to the other method if the first doesn't match. + - If the resolved session is a compression root, follow the chain forward + to the latest continuation. Users who remember the old root ID (e.g. + from an exit summary printed before the bug fix, or from notes) get + resumed at the live tip instead of a stale parent with no messages. """ try: from hermes_state import SessionDB @@ -701,14 +717,23 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]: # Try as exact session ID first session = db.get_session(name_or_id) + resolved_id: Optional[str] = None if session: - db.close() - return session["id"] + resolved_id = session["id"] + else: + # Try as title (with auto-latest for lineage) + resolved_id = db.resolve_session_by_title(name_or_id) + + if resolved_id: + # Project forward through compression chain so resumes land on + # the live tip instead of a dead compressed parent. + try: + resolved_id = db.get_compression_tip(resolved_id) or resolved_id + except Exception: + pass - # Try as title (with auto-latest for lineage) - session_id = db.resolve_session_by_title(name_or_id) db.close() - return session_id + return resolved_id except Exception: pass return None @@ -990,6 +1015,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False): ) env.setdefault("HERMES_PYTHON", sys.executable) env.setdefault("HERMES_CWD", os.getcwd()) + # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is + # ~1.5–4GB depending on version and can fatal-OOM on long sessions with + # large transcripts / reasoning blobs. Token-level merge: respect any + # user-supplied --max-old-space-size (they may have set it higher) and + # avoid duplicating --expose-gc. + _tokens = env.get("NODE_OPTIONS", "").split() + if not any(t.startswith("--max-old-space-size=") for t in _tokens): + _tokens.append("--max-old-space-size=8192") + if "--expose-gc" not in _tokens: + _tokens.append("--expose-gc") + env["NODE_OPTIONS"] = " ".join(_tokens) if resume_session_id: env["HERMES_TUI_RESUME"] = resume_session_id @@ -1144,8 +1180,6 @@ def cmd_gateway(args): def cmd_whatsapp(args): """Set up WhatsApp: choose mode, configure, install bridge, pair via QR.""" _require_tty("whatsapp") - import subprocess - from pathlib import Path from hermes_cli.config import get_env_value, save_env_value print() @@ -1254,16 +1288,27 @@ def cmd_whatsapp(args): return if not (bridge_dir / "node_modules").exists(): - print("\n→ Installing WhatsApp bridge dependencies...") - result = subprocess.run( - ["npm", "install"], - cwd=str(bridge_dir), - capture_output=True, - text=True, - timeout=120, - ) + print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...") + npm = shutil.which("npm") + if not npm: + print(" ✗ npm not found on PATH — install Node.js first") + return + try: + result = subprocess.run( + [npm, "install", "--no-fund", "--no-audit", "--progress=false"], + cwd=str(bridge_dir), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + except KeyboardInterrupt: + print("\n ✗ Install cancelled") + return if result.returncode != 0: - print(f" ✗ npm install failed: {result.stderr}") + err = (result.stderr or "").strip() + preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)" + print(" ✗ npm install failed:") + print(preview) return print(" ✓ Dependencies installed") else: @@ -1282,8 +1327,6 @@ def cmd_whatsapp(args): except (EOFError, KeyboardInterrupt): response = "n" if response.lower() in ("y", "yes"): - import shutil - shutil.rmtree(session_dir, ignore_errors=True) session_dir.mkdir(parents=True, exist_ok=True) print(" ✓ Session cleared") @@ -1379,8 +1422,6 @@ def select_provider_and_model(args=None): # Read effective provider the same way the CLI does at startup: # config.yaml model.provider > env var > auto-detect - import os - config_provider = None model_cfg = config.get("model") if isinstance(model_cfg, dict): @@ -1491,6 +1532,8 @@ def select_provider_and_model(args=None): # Step 2: Provider-specific setup + model selection if selected_provider == "openrouter": _model_flow_openrouter(config, current_model) + elif selected_provider == "ai-gateway": + _model_flow_ai_gateway(config, current_model) elif selected_provider == "nous": _model_flow_nous(config, current_model, args=args) elif selected_provider == "openai-codex": @@ -1523,6 +1566,8 @@ def select_provider_and_model(args=None): _model_flow_anthropic(config, current_model) elif selected_provider == "kimi-coding": _model_flow_kimi(config, current_model) + elif selected_provider == "stepfun": + _model_flow_stepfun(config, current_model) elif selected_provider == "bedrock": _model_flow_bedrock(config, current_model) elif selected_provider in ( @@ -1536,7 +1581,6 @@ def select_provider_and_model(args=None): "kilocode", "opencode-zen", "opencode-go", - "ai-gateway", "alibaba", "huggingface", "xiaomi", @@ -2008,6 +2052,63 @@ def _model_flow_openrouter(config, current_model=""): print("No change.") +def _model_flow_ai_gateway(config, current_model=""): + """Vercel AI Gateway provider: ensure API key, then pick model with pricing.""" + from hermes_cli.auth import ( + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import get_env_value, save_env_value + + api_key = get_env_value("AI_GATEWAY_API_KEY") + if not api_key: + print("No Vercel AI Gateway API key configured.") + print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway") + print("Add a payment method to get $5 in free credits.") + print() + try: + import getpass + + key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not key: + print("Cancelled.") + return + save_env_value("AI_GATEWAY_API_KEY", key) + print("API key saved.") + print() + + from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider + + models_list = ai_gateway_model_ids(force_refresh=True) + pricing = get_pricing_for_provider("ai-gateway", force_refresh=True) + + selected = _prompt_model_selection( + models_list, current_model=current_model, pricing=pricing + ) + if selected: + _save_model_choice(selected) + + from hermes_cli.config import load_config, save_config + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = "ai-gateway" + model["base_url"] = AI_GATEWAY_BASE_URL + model["api_mode"] = "chat_completions" + save_config(cfg) + deactivate_provider() + print(f"Default model set to: {selected} (via Vercel AI Gateway)") + else: + print("No change.") + + def _model_flow_nous(config, current_model="", args=None): """Nous Portal provider: ensure logged in, then pick model.""" from hermes_cli.auth import ( @@ -2028,7 +2129,6 @@ def _model_flow_nous(config, current_model="", args=None): save_env_value, ) from hermes_cli.nous_subscription import prompt_enable_tool_gateway - import argparse state = get_provider_auth_state("nous") if not state or not state.get("access_token"): @@ -2067,7 +2167,6 @@ def _model_flow_nous(config, current_model="", args=None): from hermes_cli.models import ( _PROVIDER_MODELS, get_pricing_for_provider, - filter_nous_free_models, check_nous_free_tier, partition_nous_models_by_tier, ) @@ -2110,10 +2209,8 @@ def _model_flow_nous(config, current_model="", args=None): # Check if user is on free tier free_tier = check_nous_free_tier() - # For both tiers: apply the allowlist filter first (removes non-allowlisted - # free models and allowlist models that aren't actually free). - # Then for free users: partition remaining models into selectable/unavailable. - model_ids = filter_nous_free_models(model_ids, pricing) + # For free users: partition models into selectable/unavailable based on + # whether they are free per the Portal-reported pricing. unavailable_models: list[str] = [] if free_tier: model_ids, unavailable_models = partition_nous_models_by_tier( @@ -2196,7 +2293,6 @@ def _model_flow_openai_codex(config, current_model=""): DEFAULT_CODEX_BASE_URL, ) from hermes_cli.codex_models import get_codex_model_ids - import argparse status = get_codex_auth_status() if not status.get("logged_in"): @@ -2351,7 +2447,7 @@ def _model_flow_google_gemini_cli(_config, current_model=""): return models = list(_PROVIDER_MODELS.get("google-gemini-cli") or []) - default = current_model or (models[0] if models else "gemini-2.5-flash") + default = current_model or (models[0] if models else "gemini-3-flash-preview") selected = _prompt_model_selection(models, current_model=default) if selected: _save_model_choice(selected) @@ -3327,8 +3423,9 @@ def _model_flow_kimi(config, current_model=""): # Step 3: Model selection — show appropriate models for the endpoint if is_coding_plan: - # Coding Plan models (kimi-k2.5 first) + # Coding Plan models (kimi-k2.6 first) model_list = [ + "kimi-k2.6", "kimi-k2.5", "kimi-for-coding", "kimi-k2-thinking", @@ -3367,6 +3464,140 @@ def _model_flow_kimi(config, current_model=""): print("No change.") +def _infer_stepfun_region(base_url: str) -> str: + """Infer the current StepFun region from the configured endpoint.""" + normalized = (base_url or "").strip().lower() + if "api.stepfun.com" in normalized: + return "china" + return "international" + + +def _stepfun_base_url_for_region(region: str) -> str: + from hermes_cli.auth import ( + STEPFUN_STEP_PLAN_CN_BASE_URL, + STEPFUN_STEP_PLAN_INTL_BASE_URL, + ) + + return ( + STEPFUN_STEP_PLAN_CN_BASE_URL + if region == "china" + else STEPFUN_STEP_PLAN_INTL_BASE_URL + ) + + +def _model_flow_stepfun(config, current_model=""): + """StepFun Step Plan flow with region-specific endpoints.""" + from hermes_cli.auth import ( + PROVIDER_REGISTRY, + _prompt_model_selection, + _save_model_choice, + deactivate_provider, + ) + from hermes_cli.config import get_env_value, save_env_value, load_config, save_config + from hermes_cli.models import fetch_api_models + + provider_id = "stepfun" + pconfig = PROVIDER_REGISTRY[provider_id] + key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" + base_url_env = pconfig.base_url_env_var or "" + + existing_key = "" + for ev in pconfig.api_key_env_vars: + existing_key = get_env_value(ev) or os.getenv(ev, "") + if existing_key: + break + + if not existing_key: + print(f"No {pconfig.name} API key configured.") + if key_env: + try: + import getpass + new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not new_key: + print("Cancelled.") + return + save_env_value(key_env, new_key) + existing_key = new_key + print("API key saved.") + print() + else: + print(f" {pconfig.name} API key: {existing_key[:8]}... ✓") + print() + + current_base = "" + if base_url_env: + current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "") + if not current_base: + model_cfg = config.get("model") + if isinstance(model_cfg, dict): + current_base = str(model_cfg.get("base_url") or "").strip() + current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url) + + region_choices = [ + ("international", f"International ({_stepfun_base_url_for_region('international')})"), + ("china", f"China ({_stepfun_base_url_for_region('china')})"), + ] + ordered_regions = [] + for region_key, label in region_choices: + if region_key == current_region: + ordered_regions.insert(0, (region_key, f"{label} ← currently active")) + else: + ordered_regions.append((region_key, label)) + ordered_regions.append(("cancel", "Cancel")) + + region_idx = _prompt_provider_choice([label for _, label in ordered_regions]) + if region_idx is None or ordered_regions[region_idx][0] == "cancel": + print("No change.") + return + + selected_region = ordered_regions[region_idx][0] + effective_base = _stepfun_base_url_for_region(selected_region) + if base_url_env: + save_env_value(base_url_env, effective_base) + + live_models = fetch_api_models(existing_key, effective_base) + if live_models: + model_list = live_models + print(f" Found {len(model_list)} model(s) from {pconfig.name} API") + else: + model_list = _PROVIDER_MODELS.get(provider_id, []) + if model_list: + print( + f" Could not auto-detect models from {pconfig.name} API — " + "showing Step Plan fallback catalog." + ) + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + _save_model_choice(selected) + + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + model.pop("api_mode", None) + save_config(cfg) + deactivate_provider() + + config["model"] = dict(model) + print(f"Default model set to: {selected} (via {pconfig.name})") + else: + print("No change.") + + def _model_flow_bedrock_api_key(config, region, current_model=""): """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint. @@ -4067,6 +4298,12 @@ def cmd_webhook(args): webhook_command(args) +def cmd_hooks(args): + """Shell-hook inspection and management.""" + from hermes_cli.hooks import hooks_command + hooks_command(args) + + def cmd_doctor(args): """Check configuration and dependencies.""" from hermes_cli.doctor import run_doctor @@ -4176,9 +4413,7 @@ def _clear_bytecode_cache(root: Path) -> int: ] if os.path.basename(dirpath) == "__pycache__": try: - import shutil as _shutil - - _shutil.rmtree(dirpath) + shutil.rmtree(dirpath) removed += 1 except OSError: pass @@ -4217,8 +4452,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0) tmp.replace(prompt_path) # Poll for response - import time as _time - deadline = _time.monotonic() + timeout while _time.monotonic() < deadline: if response_path.exists(): @@ -4250,7 +4483,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: """ if not (web_dir / "package.json").exists(): return True - import shutil npm = shutil.which("npm") if not npm: @@ -4287,7 +4519,6 @@ def _update_via_zip(args): Used on Windows when git file I/O is broken (antivirus, NTFS filter drivers causing 'Invalid argument' errors on file creation). """ - import shutil import tempfile import zipfile from urllib.request import urlretrieve @@ -4364,7 +4595,6 @@ def _update_via_zip(args): # breaks on this machine, keep base deps and reinstall the remaining extras # individually so update does not silently strip working capabilities. print("→ Updating Python dependencies...") - import subprocess uv_bin = shutil.which("uv") if uv_bin: @@ -5115,9 +5345,11 @@ def _install_hangup_protection(gateway_mode: bool = False): # (2) Mirror output to update.log and wrap stdio for broken-pipe # tolerance. Any failure here is non-fatal; we just skip the wrap. try: - from hermes_cli.config import get_hermes_home + # Late-bound import so tests can monkeypatch + # hermes_cli.config.get_hermes_home to simulate setup failure. + from hermes_cli.config import get_hermes_home as _get_hermes_home - logs_dir = get_hermes_home() / "logs" + logs_dir = _get_hermes_home() / "logs" logs_dir.mkdir(parents=True, exist_ok=True) log_path = logs_dir / "update.log" log_file = open(log_path, "a", buffering=1, encoding="utf-8") @@ -5692,8 +5924,6 @@ def _cmd_update_impl(args, gateway_mode: bool): # Verify the service actually survived the # restart. systemctl restart returns 0 even # if the new process crashes immediately. - import time as _time - _time.sleep(3) verify = subprocess.run( scope_cmd + ["is-active", svc_name], @@ -6346,6 +6576,17 @@ For more help on a command: default=False, help="Run in an isolated git worktree (for parallel agents)", ) + parser.add_argument( + "--accept-hooks", + action="store_true", + default=False, + help=( + "Auto-approve any unseen shell hooks declared in config.yaml " + "without a TTY prompt. Equivalent to HERMES_ACCEPT_HOOKS=1 or " + "hooks_auto_accept: true in config.yaml. Use on CI / headless " + "runs that can't prompt." + ), + ) parser.add_argument( "--skills", "-s", @@ -6425,6 +6666,7 @@ For more help on a command: "zai", "kimi-coding", "kimi-coding-cn", + "stepfun", "minimax", "minimax-cn", "kilocode", @@ -6468,6 +6710,16 @@ For more help on a command: default=argparse.SUPPRESS, help="Run in an isolated git worktree (for parallel agents on the same repo)", ) + chat_parser.add_argument( + "--accept-hooks", + action="store_true", + default=argparse.SUPPRESS, + help=( + "Auto-approve any unseen shell hooks declared in config.yaml " + "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and " + "hooks_auto_accept: in config.yaml)." + ), + ) chat_parser.add_argument( "--checkpoints", action="store_true", @@ -6587,6 +6839,8 @@ For more help on a command: action="store_true", help="Replace any existing gateway instance (useful for systemd)", ) + _add_accept_hooks_flag(gateway_run) + _add_accept_hooks_flag(gateway_parser) # gateway start gateway_start = gateway_subparsers.add_parser( @@ -6951,6 +7205,7 @@ For more help on a command: "run", help="Run a job on the next scheduler tick" ) cron_run.add_argument("job_id", help="Job ID to trigger") + _add_accept_hooks_flag(cron_run) cron_remove = cron_subparsers.add_parser( "remove", aliases=["rm", "delete"], help="Remove a scheduled job" @@ -6961,8 +7216,9 @@ For more help on a command: cron_subparsers.add_parser("status", help="Check if cron scheduler is running") # cron tick (mostly for debugging) - cron_subparsers.add_parser("tick", help="Run due jobs once and exit") - + cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit") + _add_accept_hooks_flag(cron_tick) + _add_accept_hooks_flag(cron_parser) cron_parser.set_defaults(func=cmd_cron) # ========================================================================= @@ -7002,6 +7258,13 @@ For more help on a command: wh_sub.add_argument( "--secret", default="", help="HMAC secret (auto-generated if omitted)" ) + wh_sub.add_argument( + "--deliver-only", + action="store_true", + help="Skip the agent — deliver the rendered prompt directly as the " + "message. Zero LLM cost. Requires --deliver to be a real target " + "(not 'log').", + ) webhook_subparsers.add_parser( "list", aliases=["ls"], help="List all dynamic subscriptions" @@ -7022,6 +7285,67 @@ For more help on a command: webhook_parser.set_defaults(func=cmd_webhook) + # ========================================================================= + # hooks command — shell-hook inspection and management + # ========================================================================= + hooks_parser = subparsers.add_parser( + "hooks", + help="Inspect and manage shell-script hooks", + description=( + "Inspect shell-script hooks declared in ~/.hermes/config.yaml, " + "test them against synthetic payloads, and manage the first-use " + "consent allowlist at ~/.hermes/shell-hooks-allowlist.json." + ), + ) + hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action") + + hooks_subparsers.add_parser( + "list", aliases=["ls"], + help="List configured hooks with matcher, timeout, and consent status", + ) + + _hk_test = hooks_subparsers.add_parser( + "test", + help="Fire every hook matching against a synthetic payload", + ) + _hk_test.add_argument( + "event", + help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)", + ) + _hk_test.add_argument( + "--for-tool", dest="for_tool", default=None, + help=( + "Only fire hooks whose matcher matches this tool name " + "(used for pre_tool_call / post_tool_call)" + ), + ) + _hk_test.add_argument( + "--payload-file", dest="payload_file", default=None, + help=( + "Path to a JSON file whose contents are merged into the " + "synthetic payload before execution" + ), + ) + + _hk_revoke = hooks_subparsers.add_parser( + "revoke", aliases=["remove", "rm"], + help="Remove a command's allowlist entries (takes effect on next restart)", + ) + _hk_revoke.add_argument( + "command", + help="The exact command string to revoke (as declared in config.yaml)", + ) + + hooks_subparsers.add_parser( + "doctor", + help=( + "Check each configured hook: exec bit, allowlist, mtime drift, " + "JSON validity, and synthetic run timing" + ), + ) + + hooks_parser.set_defaults(func=cmd_hooks) + # ========================================================================= # doctor command # ========================================================================= @@ -7429,6 +7753,17 @@ Examples: action="store_true", help="Remove existing plugin and reinstall", ) + _install_enable_group = plugins_install.add_mutually_exclusive_group() + _install_enable_group.add_argument( + "--enable", + action="store_true", + help="Auto-enable the plugin after install (skip confirmation prompt)", + ) + _install_enable_group.add_argument( + "--no-enable", + action="store_true", + help="Install disabled (skip confirmation prompt); enable later with `hermes plugins enable `", + ) plugins_update = plugins_subparsers.add_parser( "update", help="Pull latest changes for an installed plugin" @@ -7476,9 +7811,7 @@ Examples: ) cmd_info["setup_fn"](plugin_parser) except Exception as _exc: - import logging as _log - - _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc) + logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc) # ========================================================================= # memory command @@ -7684,6 +8017,7 @@ Examples: action="store_true", help="Enable verbose logging on stderr", ) + _add_accept_hooks_flag(mcp_serve_p) mcp_add_p = mcp_sub.add_parser( "add", help="Add an MCP server (discovery-first install)" @@ -7722,6 +8056,8 @@ Examples: ) mcp_login_p.add_argument("name", help="Server name to re-authenticate") + _add_accept_hooks_flag(mcp_parser) + def cmd_mcp(args): from hermes_cli.mcp_config import mcp_command @@ -7860,7 +8196,6 @@ Examples: return line = _json.dumps(data, ensure_ascii=False) + "\n" if args.output == "-": - import sys sys.stdout.write(line) else: @@ -7870,7 +8205,6 @@ Examples: else: sessions = db.export_all(source=args.source) if args.output == "-": - import sys for s in sessions: sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n") @@ -7941,8 +8275,6 @@ Examples: # Launch hermes --resume by replacing the current process print(f"Resuming session: {selected_id}") - import shutil - hermes_bin = shutil.which("hermes") if hermes_bin: os.execvp(hermes_bin, ["hermes", "--resume", selected_id]) @@ -8133,6 +8465,7 @@ Examples: help="Run Hermes Agent as an ACP (Agent Client Protocol) server", description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)", ) + _add_accept_hooks_flag(acp_parser) def cmd_acp(args): """Launch Hermes Agent as an ACP server.""" @@ -8406,6 +8739,42 @@ Examples: cmd_version(args) return + # Discover Python plugins and register shell hooks once, before any + # command that can fire lifecycle hooks. Both are idempotent; gated + # so introspection/management commands (hermes hooks list, cron + # list, gateway status, mcp add, ...) don't pay discovery cost or + # trigger consent prompts for hooks the user is still inspecting. + # Groups with mixed admin/CRUD vs. agent-running entries narrow via + # the nested subcommand (dest varies by parser). + _AGENT_COMMANDS = {None, "chat", "acp", "rl"} + _AGENT_SUBCOMMANDS = { + "cron": ("cron_command", {"run", "tick"}), + "gateway": ("gateway_command", {"run"}), + "mcp": ("mcp_action", {"serve"}), + } + _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None)) + if ( + args.command in _AGENT_COMMANDS + or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set) + ): + _accept_hooks = bool(getattr(args, "accept_hooks", False)) + try: + from hermes_cli.plugins import discover_plugins + discover_plugins() + except Exception: + logger.debug( + "plugin discovery failed at CLI startup", exc_info=True, + ) + try: + from hermes_cli.config import load_config + from agent.shell_hooks import register_from_config + register_from_config(load_config(), accept_hooks=_accept_hooks) + except Exception: + logger.debug( + "shell-hook registration failed at CLI startup", + exc_info=True, + ) + # Handle top-level --resume / --continue as shortcut to chat if (args.resume or args.continue_last) and args.command is None: args.command = "chat" diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 004582a574..e5feaa8654 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = { # Z.AI / GLM "glm": ModelIdentity("z-ai", "glm"), - # StepFun + # Step Plan (StepFun) "step": ModelIdentity("stepfun", "step"), # Xiaomi @@ -678,6 +678,7 @@ def switch_model( _da = DIRECT_ALIASES.get(resolved_alias) if _da is not None and _da.base_url: base_url = _da.base_url + api_mode = "" # clear so determine_api_mode re-detects from URL if not api_key: api_key = "no-key-required" @@ -1035,21 +1036,49 @@ def list_authenticated_providers( seen_slugs.add(_cp.slug.lower()) # --- 3. User-defined endpoints from config --- + # Track (name, base_url) of what section 3 emits so section 4 can skip + # any overlapping ``custom_providers:`` entries. Callers typically pass + # both (gateway/CLI invoke ``get_compatible_custom_providers()`` which + # merges ``providers:`` into the list) — without this, the same endpoint + # produces two picker rows: one bare-slug ("openrouter") from section 3 + # and one "custom:openrouter" from section 4, both labelled identically. + _section3_emitted_pairs: set = set() if user_providers and isinstance(user_providers, dict): for ep_name, ep_cfg in user_providers.items(): if not isinstance(ep_cfg, dict): continue + # Skip if this slug was already emitted (e.g. canonical provider + # with the same name) or will be picked up by section 4. + if ep_name.lower() in seen_slugs: + continue display_name = ep_cfg.get("name", "") or ep_name - api_url = ep_cfg.get("api", "") or ep_cfg.get("url", "") or "" - default_model = ep_cfg.get("default_model", "") + # ``base_url`` is Hermes's canonical write key (matches + # custom_providers and _save_custom_provider); ``api`` / ``url`` + # remain as fallbacks for hand-edited / legacy configs. + api_url = ( + ep_cfg.get("base_url", "") + or ep_cfg.get("api", "") + or ep_cfg.get("url", "") + or "" + ) + # ``default_model`` is the legacy key; ``model`` matches what + # custom_providers entries use, so accept either. + default_model = ep_cfg.get("default_model", "") or ep_cfg.get("model", "") # Build models list from both default_model and full models array models_list = [] if default_model: models_list.append(default_model) - # Also include the full models list from config + # Also include the full models list from config. + # Hermes writes ``models:`` as a dict keyed by model id + # (see hermes_cli/main.py::_save_custom_provider); older + # configs or hand-edited files may still use a list. cfg_models = ep_cfg.get("models", []) - if isinstance(cfg_models, list): + if isinstance(cfg_models, dict): + for m in cfg_models: + if m and m not in models_list: + models_list.append(m) + elif isinstance(cfg_models, list): for m in cfg_models: if m and m not in models_list: models_list.append(m) @@ -1066,6 +1095,14 @@ def list_authenticated_providers( "source": "user-config", "api_url": api_url, }) + seen_slugs.add(ep_name.lower()) + seen_slugs.add(custom_provider_slug(display_name).lower()) + _pair = ( + str(display_name).strip().lower(), + str(api_url).strip().rstrip("/").lower(), + ) + if _pair[0] and _pair[1]: + _section3_emitted_pairs.add(_pair) # --- 4. Saved custom providers from config --- # Each ``custom_providers`` entry represents one model under a named @@ -1100,13 +1137,41 @@ def list_authenticated_providers( "api_url": api_url, "models": [], } + # The singular ``model:`` field only holds the currently + # active model. Hermes's own writer (main.py::_save_custom_provider) + # stores every configured model as a dict under ``models:``; + # downstream readers (agent/models_dev.py, gateway/run.py, + # run_agent.py, hermes_cli/config.py) already consume that dict. + # The /model picker previously ignored it, so multi-model + # custom providers appeared to have only the active model. default_model = (entry.get("model") or "").strip() if default_model and default_model not in groups[slug]["models"]: groups[slug]["models"].append(default_model) + cfg_models = entry.get("models", {}) + if isinstance(cfg_models, dict): + for m in cfg_models: + if m and m not in groups[slug]["models"]: + groups[slug]["models"].append(m) + elif isinstance(cfg_models, list): + for m in cfg_models: + if m and m not in groups[slug]["models"]: + groups[slug]["models"].append(m) + for slug, grp in groups.items(): if slug.lower() in seen_slugs: continue + # Skip if section 3 already emitted this endpoint under its + # ``providers:`` dict key — matches on (display_name, base_url), + # the tuple section 4 groups by. Prevents two picker rows + # labelled identically when callers pass both ``user_providers`` + # and a compatibility-merged ``custom_providers`` list. + _pair_key = ( + str(grp["name"]).strip().lower(), + str(grp["api_url"]).strip().rstrip("/").lower(), + ) + if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs: + continue results.append({ "slug": slug, "name": grp["name"], diff --git a/hermes_cli/models.py b/hermes_cli/models.py index a0d7c2220c..4b3493506d 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -16,6 +16,12 @@ from difflib import get_close_matches from pathlib import Path from typing import Any, NamedTuple, Optional +from hermes_cli import __version__ as _HERMES_VERSION + +# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity +# Check (error 1010) don't reject the default ``Python-urllib/*`` signature. +_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}" + COPILOT_BASE_URL = "https://api.githubcopilot.com" COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models" COPILOT_EDITOR_VERSION = "vscode/1.104.1" @@ -26,7 +32,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] # Fallback OpenRouter snapshot used when the live catalog is unavailable. # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ - ("moonshotai/kimi-k2.5", "recommended"), + ("moonshotai/kimi-k2.6", "recommended"), ("anthropic/claude-opus-4.7", ""), ("anthropic/claude-opus-4.6", ""), ("anthropic/claude-sonnet-4.6", ""), @@ -47,6 +53,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("stepfun/step-3.5-flash", ""), ("minimax/minimax-m2.7", ""), ("minimax/minimax-m2.5", ""), + ("minimax/minimax-m2.5:free", "free"), ("z-ai/glm-5.1", ""), ("z-ai/glm-5v-turbo", ""), ("z-ai/glm-5-turbo", ""), @@ -62,6 +69,31 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ _openrouter_catalog_cache: list[tuple[str, str]] | None = None +# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable. +# OSS / open-weight models prioritized first, then closed-source by family. +# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen, +# zai/ and xai/ without hyphens). +VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [ + ("moonshotai/kimi-k2.6", "recommended"), + ("alibaba/qwen3.6-plus", ""), + ("zai/glm-5.1", ""), + ("minimax/minimax-m2.7", ""), + ("anthropic/claude-sonnet-4.6", ""), + ("anthropic/claude-opus-4.7", ""), + ("anthropic/claude-opus-4.6", ""), + ("anthropic/claude-haiku-4.5", ""), + ("openai/gpt-5.4", ""), + ("openai/gpt-5.4-mini", ""), + ("openai/gpt-5.3-codex", ""), + ("google/gemini-3.1-pro-preview", ""), + ("google/gemini-3-flash", ""), + ("google/gemini-3.1-flash-lite-preview", ""), + ("xai/grok-4.20-reasoning", ""), +] + +_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None + + def _codex_curated_models() -> list[str]: """Derive the openai-codex curated list from codex_models.py. @@ -75,7 +107,7 @@ def _codex_curated_models() -> list[str]: _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ - "moonshotai/kimi-k2.5", + "moonshotai/kimi-k2.6", "xiaomi/mimo-v2-pro", "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", @@ -94,17 +126,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "stepfun/step-3.5-flash", "minimax/minimax-m2.7", "minimax/minimax-m2.5", + "minimax/minimax-m2.5:free", "z-ai/glm-5.1", "z-ai/glm-5v-turbo", "z-ai/glm-5-turbo", "x-ai/grok-4.20-beta", "nvidia/nemotron-3-super-120b-a12b", - "nvidia/nemotron-3-super-120b-a12b:free", - "arcee-ai/trinity-large-preview:free", "arcee-ai/trinity-large-thinking", "openai/gpt-5.4-pro", "openai/gpt-5.4-nano", - "openrouter/elephant-alpha", ], "openai-codex": _codex_curated_models(), "copilot-acp": [ @@ -128,16 +158,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = { ], "gemini": [ "gemini-3.1-pro-preview", + "gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", - "gemini-2.5-pro", - "gemini-2.5-flash", - "gemini-2.5-flash-lite", ], "google-gemini-cli": [ - "gemini-2.5-pro", - "gemini-2.5-flash", - "gemini-2.5-flash-lite", + "gemini-3.1-pro-preview", + "gemini-3-pro-preview", + "gemini-3-flash-preview", ], "zai": [ "glm-5.1", @@ -161,12 +189,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = { # (map to OpenRouter defaults — users get familiar picks on NIM) "qwen/qwen3.5-397b-a17b", "deepseek-ai/deepseek-v3.2", - "moonshotai/kimi-k2.5", + "moonshotai/kimi-k2.6", "minimaxai/minimax-m2.5", "z-ai/glm5", "openai/gpt-oss-120b", ], "kimi-coding": [ + "kimi-k2.6", "kimi-k2.5", "kimi-for-coding", "kimi-k2-thinking", @@ -175,12 +204,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "kimi-k2-0905-preview", ], "kimi-coding-cn": [ + "kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview", "kimi-k2-0905-preview", ], + "stepfun": [ + "step-3.5-flash", + "step-3.5-flash-2603", + ], "moonshot": [ + "kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview", @@ -227,7 +262,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gpt-5.4-pro", "gpt-5.4", "gpt-5.3-codex", - "gpt-5.3-codex-spark", "gpt-5.2", "gpt-5.2-codex", "gpt-5.1", @@ -261,6 +295,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "big-pickle", ], "opencode-go": [ + "kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", @@ -268,20 +303,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", - ], - "ai-gateway": [ - "anthropic/claude-opus-4.6", - "anthropic/claude-sonnet-4.6", - "anthropic/claude-sonnet-4.5", - "anthropic/claude-haiku-4.5", - "openai/gpt-5", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - "google/gemini-3-pro-preview", - "google/gemini-3-flash", - "google/gemini-2.5-pro", - "google/gemini-2.5-flash", - "deepseek/deepseek-v3.2", + "qwen3.6-plus", + "qwen3.5-plus", ], "kilocode": [ "anthropic/claude-opus-4.6", @@ -315,6 +338,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "zai-org/GLM-5", "XiaomiMiMo/MiMo-V2-Flash", "moonshotai/Kimi-K2-Thinking", + "moonshotai/Kimi-K2.6", ], # AWS Bedrock — static fallback list used when dynamic discovery is # unavailable (no boto3, no credentials, or API error). The agent @@ -334,18 +358,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = { ], } +# Vercel AI Gateway: derive the bare-model-id catalog from the curated +# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions) +# and the static fallback catalog (bare ids) stay in sync from a single +# source of truth. +_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS] + # --------------------------------------------------------------------------- -# Nous Portal free-model filtering +# Nous Portal free-model helper # --------------------------------------------------------------------------- -# Models that are ALLOWED to appear when priced as free on Nous Portal. -# Any other free model is hidden — prevents promotional/temporary free models -# from cluttering the selection when users are paying subscribers. -# Models in this list are ALSO filtered out if they are NOT free (i.e. they -# should only appear in the menu when they are genuinely free). -_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({ - "xiaomi/mimo-v2-pro", - "xiaomi/mimo-v2-omni", -}) +# The Nous Portal models endpoint is the source of truth for which models +# are currently offered (free or paid). We trust whatever it returns and +# surface it to users as-is — no local allowlist filtering. def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool: @@ -359,35 +383,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool: return False -def filter_nous_free_models( - model_ids: list[str], - pricing: dict[str, dict[str, str]], -) -> list[str]: - """Filter the Nous Portal model list according to free-model policy. - - Rules: - • Paid models that are NOT in the allowlist → keep (normal case). - • Free models that are NOT in the allowlist → drop. - • Allowlist models that ARE free → keep. - • Allowlist models that are NOT free → drop. - """ - if not pricing: - return model_ids # no pricing data — can't filter, show everything - - result: list[str] = [] - for mid in model_ids: - free = _is_model_free(mid, pricing) - if mid in _NOUS_ALLOWED_FREE_MODELS: - # Allowlist model: only show when it's actually free - if free: - result.append(mid) - else: - # Regular model: keep only when it's NOT free - if not free: - result.append(mid) - return result - - # --------------------------------------------------------------------------- # Nous Portal account tier detection # --------------------------------------------------------------------------- @@ -451,8 +446,7 @@ def partition_nous_models_by_tier( ) -> tuple[list[str], list[str]]: """Split Nous models into (selectable, unavailable) based on user tier. - For paid-tier users: all models are selectable, none unavailable - (free-model filtering is handled separately by ``filter_nous_free_models``). + For paid-tier users: all models are selectable, none unavailable. For free-tier users: only free models are selectable; paid models are returned as unavailable (shown grayed out in the menu). @@ -491,8 +485,6 @@ def check_nous_free_tier() -> bool: Returns False (assume paid) on any error — never blocks paying users. """ global _free_tier_cache - import time - now = time.monotonic() if _free_tier_cache is not None: cached_result, cached_at = _free_tier_cache @@ -524,6 +516,157 @@ def check_nous_free_tier() -> bool: return False # default to paid on error — don't block users +# --------------------------------------------------------------------------- +# Nous Portal recommended models +# +# The Portal publishes a curated list of suggested models (separated into +# paid and free tiers) plus dedicated recommendations for compaction (text +# summarisation / auxiliary) and vision tasks. We fetch it once per process +# with a TTL cache so callers can ask "what's the best aux model right now?" +# without hitting the network on every lookup. +# +# Shape of the response (fields we care about): +# { +# "paidRecommendedModels": [ {modelName, ...}, ... ], +# "freeRecommendedModels": [ {modelName, ...}, ... ], +# "paidRecommendedCompactionModel": {modelName, ...} | null, +# "paidRecommendedVisionModel": {modelName, ...} | null, +# "freeRecommendedCompactionModel": {modelName, ...} | null, +# "freeRecommendedVisionModel": {modelName, ...} | null, +# } +# --------------------------------------------------------------------------- + +NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models" +_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes) +# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide. +_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {} + + +def fetch_nous_recommended_models( + portal_base_url: str = "", + timeout: float = 5.0, + *, + force_refresh: bool = False, +) -> dict[str, Any]: + """Fetch the Nous Portal's curated recommended-models payload. + + Hits ``/api/nous/recommended-models``. The endpoint is public — + no auth is required. Results are cached per portal URL for + ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to + bypass the cache. + + Returns the parsed JSON dict on success, or ``{}`` on any failure + (network, parse, non-2xx). Callers must treat missing/null fields as + "no recommendation" and fall back to their own default. + """ + base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/") + now = time.monotonic() + cached = _nous_recommended_cache.get(base) + if not force_refresh and cached is not None: + payload, cached_at = cached + if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL: + return payload + + url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}" + try: + req = urllib.request.Request( + url, + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + if not isinstance(data, dict): + data = {} + except Exception: + data = {} + + _nous_recommended_cache[base] = (data, now) + return data + + +def _resolve_nous_portal_url() -> str: + """Best-effort lookup of the Portal base URL the user is authed against.""" + try: + from hermes_cli.auth import ( + DEFAULT_NOUS_PORTAL_URL, + get_provider_auth_state, + ) + state = get_provider_auth_state("nous") or {} + portal = str(state.get("portal_base_url") or "").strip() + if portal: + return portal.rstrip("/") + return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/") + except Exception: + return "https://portal.nousresearch.com" + + +def _extract_model_name(entry: Any) -> Optional[str]: + """Pull the ``modelName`` field from a recommended-model entry, else None.""" + if not isinstance(entry, dict): + return None + model_name = entry.get("modelName") + if isinstance(model_name, str) and model_name.strip(): + return model_name.strip() + return None + + +def get_nous_recommended_aux_model( + *, + vision: bool = False, + free_tier: Optional[bool] = None, + portal_base_url: str = "", + force_refresh: bool = False, +) -> Optional[str]: + """Return the Portal's recommended model name for an auxiliary task. + + Picks the best field from the Portal's recommended-models payload: + + * ``vision=True`` → ``paidRecommendedVisionModel`` (paid tier) or + ``freeRecommendedVisionModel`` (free tier) + * ``vision=False`` → ``paidRecommendedCompactionModel`` or + ``freeRecommendedCompactionModel`` + + When ``free_tier`` is ``None`` (default) the user's tier is auto-detected + via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the + detection — useful for tests or when the caller already knows the tier. + + For paid-tier users we prefer the paid recommendation but gracefully fall + back to the free recommendation if the Portal returned ``null`` for the + paid field (common during the staged rollout of new paid models). + + Returns ``None`` when every candidate is missing, null, or the fetch + fails — callers should fall back to their own default (currently + ``google/gemini-3-flash-preview``). + """ + base = portal_base_url or _resolve_nous_portal_url() + payload = fetch_nous_recommended_models(base, force_refresh=force_refresh) + if not payload: + return None + + if free_tier is None: + try: + free_tier = check_nous_free_tier() + except Exception: + # On any detection error, assume paid — paid users see both fields + # anyway so this is a safe default that maximises model quality. + free_tier = False + + if vision: + paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel" + else: + paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel" + + # Preference order: + # free tier → free only + # paid tier → paid, then free (if paid field is null) + candidates = [free_key] if free_tier else [paid_key, free_key] + for key in candidates: + name = _extract_model_name(payload.get(key)) + if name: + return name + return None + + # --------------------------------------------------------------------------- # Canonical provider list — single source of truth for provider identity. # Every code path that lists, displays, or iterates providers derives from @@ -544,6 +687,7 @@ class ProviderEntry(NamedTuple): CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"), ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"), + ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"), ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"), @@ -552,13 +696,14 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"), ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"), - ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"), + ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — native Gemini API)"), ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"), ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"), ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"), ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"), ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"), ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"), + ProviderEntry("stepfun", "StepFun Step Plan", "StepFun Step Plan (agent/coding models via Step Plan API)"), ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"), ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"), ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"), @@ -567,7 +712,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"), ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"), ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"), - ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"), ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"), ] @@ -594,6 +738,8 @@ _PROVIDER_ALIASES = { "moonshot": "kimi-coding", "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn", + "step": "stepfun", + "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", "minimax-china": "minimax-cn", @@ -663,6 +809,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool: return False +def _openrouter_model_supports_tools(item: Any) -> bool: + """Return True when the model's ``supported_parameters`` advertise tool calling. + + hermes-agent is tool-calling-first — every provider path assumes the model + can invoke tools. Models that don't advertise ``tools`` in their + ``supported_parameters`` (e.g. image-only or completion-only models) cannot + be driven by the agent loop and would fail at the first tool call. + + **Permissive when the field is missing.** Some OpenRouter-compatible gateways + (Nous Portal, private mirrors, older catalog snapshots) don't populate + ``supported_parameters`` at all. Treat that as "unknown capability → allow" + so the picker doesn't silently empty for those users. Only hide models + whose ``supported_parameters`` is an explicit list that omits ``tools``. + + Ported from Kilo-Org/kilocode#9068. + """ + if not isinstance(item, dict): + return True + params = item.get("supported_parameters") + if not isinstance(params, list): + # Field absent / malformed / None — be permissive. + return True + return "tools" in params + + def fetch_openrouter_models( timeout: float = 8.0, *, @@ -705,6 +876,11 @@ def fetch_openrouter_models( live_item = live_by_id.get(preferred_id) if live_item is None: continue + # Hide models that don't advertise tool-calling support — hermes-agent + # requires it and surfacing them leads to immediate runtime failures + # when the user selects them. Ported from Kilo-Org/kilocode#9068. + if not _openrouter_model_supports_tools(live_item): + continue desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else "" curated.append((preferred_id, desc)) @@ -722,6 +898,93 @@ def model_ids(*, force_refresh: bool = False) -> list[str]: return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)] +def _ai_gateway_model_is_free(pricing: Any) -> bool: + """Return True if an AI Gateway model has $0 input AND output pricing.""" + if not isinstance(pricing, dict): + return False + try: + return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0 + except (TypeError, ValueError): + return False + + +def fetch_ai_gateway_models( + timeout: float = 8.0, + *, + force_refresh: bool = False, +) -> list[tuple[str, str]]: + """Return the curated AI Gateway picker list, refreshed from the live catalog when possible.""" + global _ai_gateway_catalog_cache + + if _ai_gateway_catalog_cache is not None and not force_refresh: + return list(_ai_gateway_catalog_cache) + + from hermes_constants import AI_GATEWAY_BASE_URL + + fallback = list(VERCEL_AI_GATEWAY_MODELS) + preferred_ids = [mid for mid, _ in fallback] + + try: + req = urllib.request.Request( + f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models", + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + payload = json.loads(resp.read().decode()) + except Exception: + return list(_ai_gateway_catalog_cache or fallback) + + live_items = payload.get("data", []) + if not isinstance(live_items, list): + return list(_ai_gateway_catalog_cache or fallback) + + live_by_id: dict[str, dict[str, Any]] = {} + for item in live_items: + if not isinstance(item, dict): + continue + mid = str(item.get("id") or "").strip() + if not mid: + continue + live_by_id[mid] = item + + curated: list[tuple[str, str]] = [] + for preferred_id in preferred_ids: + live_item = live_by_id.get(preferred_id) + if live_item is None: + continue + desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else "" + curated.append((preferred_id, desc)) + + if not curated: + return list(_ai_gateway_catalog_cache or fallback) + + # If the live catalog offers a free Moonshot model, auto-promote it to + # position #1 as "recommended" — dynamic discovery without a PR. + free_moonshot = next( + ( + mid + for mid, item in live_by_id.items() + if mid.startswith("moonshotai/") + and _ai_gateway_model_is_free(item.get("pricing")) + ), + None, + ) + if free_moonshot: + curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot] + curated.insert(0, (free_moonshot, "recommended")) + else: + first_id, _ = curated[0] + curated[0] = (first_id, "recommended") + + _ai_gateway_catalog_cache = curated + return list(curated) + + +def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]: + """Return just the AI Gateway model-id strings.""" + return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)] + + # --------------------------------------------------------------------------- @@ -866,6 +1129,56 @@ def fetch_models_with_pricing( return result +def fetch_ai_gateway_pricing( + timeout: float = 8.0, + *, + force_refresh: bool = False, +) -> dict[str, dict[str, str]]: + """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing. + + Vercel uses ``input`` / ``output`` field names; hermes's picker expects + ``prompt`` / ``completion``. This translates. Cache read/write field names + already match. + """ + from hermes_constants import AI_GATEWAY_BASE_URL + + cache_key = AI_GATEWAY_BASE_URL.rstrip("/") + if not force_refresh and cache_key in _pricing_cache: + return _pricing_cache[cache_key] + + try: + req = urllib.request.Request( + f"{cache_key}/models", + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + payload = json.loads(resp.read().decode()) + except Exception: + _pricing_cache[cache_key] = {} + return {} + + result: dict[str, dict[str, str]] = {} + for item in payload.get("data", []): + if not isinstance(item, dict): + continue + mid = item.get("id") + pricing = item.get("pricing") + if not (mid and isinstance(pricing, dict)): + continue + entry: dict[str, str] = { + "prompt": str(pricing.get("input", "")), + "completion": str(pricing.get("output", "")), + } + if pricing.get("input_cache_read"): + entry["input_cache_read"] = str(pricing["input_cache_read"]) + if pricing.get("input_cache_write"): + entry["input_cache_write"] = str(pricing["input_cache_write"]) + result[mid] = entry + + _pricing_cache[cache_key] = result + return result + + def _resolve_openrouter_api_key() -> str: """Best-effort OpenRouter API key for pricing fetch.""" return os.getenv("OPENROUTER_API_KEY", "").strip() @@ -884,7 +1197,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]: def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]: - """Return live pricing for providers that support it (openrouter, nous).""" + """Return live pricing for providers that support it (openrouter, nous, ai-gateway).""" normalized = normalize_provider(provider) if normalized == "openrouter": return fetch_models_with_pricing( @@ -892,6 +1205,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d base_url="https://openrouter.ai/api", force_refresh=force_refresh, ) + if normalized == "ai-gateway": + return fetch_ai_gateway_pricing(force_refresh=force_refresh) if normalized == "nous": api_key, base_url = _resolve_nous_pricing_credentials() if base_url: @@ -1096,7 +1411,6 @@ def detect_provider_for_model( from hermes_cli.auth import PROVIDER_REGISTRY pconfig = PROVIDER_REGISTRY.get(direct_match) if pconfig: - import os for env_var in pconfig.api_key_env_vars: if os.getenv(env_var, "").strip(): has_creds = True @@ -1306,6 +1620,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) return live except Exception: pass + if normalized == "stepfun": + try: + from hermes_cli.auth import resolve_api_key_provider_credentials + + creds = resolve_api_key_provider_credentials("stepfun") + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip() + if api_key and base_url: + live = fetch_api_models(api_key, base_url) + if live: + return live + except Exception: + pass if normalized == "anthropic": live = _fetch_anthropic_models() if live: @@ -1771,7 +2098,7 @@ def probe_api_models( candidates.append((alternate_base, True)) tried: list[str] = [] - headers: dict[str, str] = {} + headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT} if api_key: headers["Authorization"] = f"Bearer {api_key}" if normalized.startswith(COPILOT_BASE_URL): @@ -2106,6 +2433,51 @@ def validate_requested_model( ), } + # MiniMax providers don't expose a /models endpoint — validate against + # the static catalog instead, similar to openai-codex. + if normalized in ("minimax", "minimax-cn"): + try: + catalog_models = provider_model_ids(normalized) + except Exception: + catalog_models = [] + if catalog_models: + # Case-insensitive lookup (catalog uses mixed case like MiniMax-M2.7) + catalog_lower = {m.lower(): m for m in catalog_models} + if requested_for_lookup.lower() in catalog_lower: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + # Auto-correct close matches (case-insensitive) + catalog_lower_list = list(catalog_lower.keys()) + auto = get_close_matches(requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9) + if auto: + corrected = catalog_lower[auto[0]] + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": corrected, + "message": f"Auto-corrected `{requested}` → `{corrected}`", + } + suggestions = get_close_matches(requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join(f"`{catalog_lower[s]}`" for s in suggestions) + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in the MiniMax catalog." + f"{suggestion_text}" + "\n MiniMax does not expose a /models endpoint, so Hermes cannot verify the model name." + "\n The model may still work if it exists on the server." + ), + } + # Probe the live API to check if the model actually exists api_models = fetch_api_models(api_key, base_url) @@ -2188,13 +2560,70 @@ def validate_requested_model( except Exception: pass # Fall through to generic warning + # Static-catalog fallback: when the /models probe was unreachable, + # validate against the curated list from provider_model_ids() — same + # pattern as the openai-codex and minimax branches above. This fixes + # /model switches in the gateway for providers like opencode-go and + # opencode-zen whose /models endpoint returns 404 against the HTML + # marketing site. Without this block, validate_requested_model would + # reject every model on such providers, switch_model() would return + # success=False, and the gateway would never write to + # _session_model_overrides. provider_label = _PROVIDER_LABELS.get(normalized, normalized) + try: + catalog_models = provider_model_ids(normalized) + except Exception: + catalog_models = [] + + if catalog_models: + catalog_lower = {m.lower(): m for m in catalog_models} + if requested_for_lookup.lower() in catalog_lower: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + catalog_lower_list = list(catalog_lower.keys()) + auto = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9 + ) + if auto: + corrected = catalog_lower[auto[0]] + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": corrected, + "message": f"Auto-corrected `{requested}` → `{corrected}`", + } + suggestions = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5 + ) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join( + f"`{catalog_lower[s]}`" for s in suggestions + ) + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in the {provider_label} curated catalog " + f"and the /models endpoint was unreachable.{suggestion_text}" + f"\n The model may still work if it exists on the provider." + ), + } + + # No catalog available — accept with a warning, matching the comment's + # stated intent ("Accept and persist, but warn"). return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Could not reach the {provider_label} API to validate `{requested}`. " + f"Note: could not reach the {provider_label} API to validate `{requested}`. " f"If the service isn't down, this model may not be valid." ), } diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index 691126a4c6..78181aab2b 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status from hermes_cli.config import get_env_value, load_config from tools.managed_tool_gateway import is_managed_tool_gateway_ready from tools.tool_backend_helpers import ( + fal_key_is_configured, has_direct_modal_credentials, managed_nous_tools_enabled, normalize_browser_cloud_provider, @@ -271,7 +272,7 @@ def get_nous_subscription_features( direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL")) direct_parallel = bool(get_env_value("PARALLEL_API_KEY")) direct_tavily = bool(get_env_value("TAVILY_API_KEY")) - direct_fal = bool(get_env_value("FAL_KEY")) + direct_fal = fal_key_is_configured() direct_openai_tts = bool(resolve_openai_audio_api_key()) direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY")) direct_camofox = bool(get_env_value("CAMOFOX_URL")) @@ -520,7 +521,7 @@ def apply_nous_managed_defaults( browser_cfg["cloud_provider"] = "browser-use" changed.add("browser") - if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"): + if "image_gen" in selected_toolsets and not fal_key_is_configured(): changed.add("image_gen") return changed @@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]: or get_env_value("TAVILY_API_KEY") or get_env_value("EXA_API_KEY") ), - "image_gen": bool(get_env_value("FAL_KEY")), + "image_gen": fal_key_is_configured(), "tts": bool( resolve_openai_audio_api_key() or get_env_value("ELEVENLABS_API_KEY") @@ -586,7 +587,6 @@ def get_gateway_eligible_tools( return [], [], [] if config is None: - from hermes_cli.config import load_config config = load_config() or {} # Quick provider check without the heavy get_nous_subscription_features call diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 2385a5c942..3dd7af823d 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -2,14 +2,20 @@ Hermes Plugin System ==================== -Discovers, loads, and manages plugins from three sources: +Discovers, loads, and manages plugins from four sources: -1. **User plugins** – ``~/.hermes/plugins//`` -2. **Project plugins** – ``./.hermes/plugins//`` (opt-in via +1. **Bundled plugins** – ``/plugins//`` (shipped with hermes-agent; + ``memory/`` and ``context_engine/`` subdirs are excluded — they have their + own discovery paths) +2. **User plugins** – ``~/.hermes/plugins//`` +3. **Project plugins** – ``./.hermes/plugins//`` (opt-in via ``HERMES_ENABLE_PROJECT_PLUGINS``) -3. **Pip plugins** – packages that expose the ``hermes_agent.plugins`` +4. **Pip plugins** – packages that expose the ``hermes_agent.plugins`` entry-point group. +Later sources override earlier ones on name collision, so a user or project +plugin with the same name as a bundled plugin replaces it. + Each directory plugin must contain a ``plugin.yaml`` manifest **and** an ``__init__.py`` with a ``register(ctx)`` function. @@ -54,6 +60,8 @@ logger = logging.getLogger(__name__) VALID_HOOKS: Set[str] = { "pre_tool_call", "post_tool_call", + "transform_terminal_output", + "transform_tool_result", "pre_llm_call", "post_llm_call", "pre_api_request", @@ -62,6 +70,7 @@ VALID_HOOKS: Set[str] = { "on_session_end", "on_session_finalize", "on_session_reset", + "subagent_stop", } ENTRY_POINTS_GROUP = "hermes_agent.plugins" @@ -75,7 +84,12 @@ def _env_enabled(name: str) -> bool: def _get_disabled_plugins() -> set: - """Read the disabled plugins list from config.yaml.""" + """Read the disabled plugins list from config.yaml. + + Kept for backward compat and explicit deny-list semantics. A plugin + name in this set will never load, even if it appears in + ``plugins.enabled``. + """ try: from hermes_cli.config import load_config config = load_config() @@ -85,10 +99,43 @@ def _get_disabled_plugins() -> set: return set() +def _get_enabled_plugins() -> Optional[set]: + """Read the enabled-plugins allow-list from config.yaml. + + Plugins are opt-in by default — only plugins whose name appears in + this set are loaded. Returns: + + * ``None`` — the key is missing or malformed. Callers should treat + this as "nothing enabled yet" (the opt-in default); the first + ``migrate_config`` run populates the key with a grandfathered set + of currently-installed user plugins so existing setups don't + break on upgrade. + * ``set()`` — an empty list was explicitly set; nothing loads. + * ``set(...)`` — the concrete allow-list. + """ + try: + from hermes_cli.config import load_config + config = load_config() + plugins_cfg = config.get("plugins") + if not isinstance(plugins_cfg, dict): + return None + if "enabled" not in plugins_cfg: + return None + enabled = plugins_cfg.get("enabled") + if not isinstance(enabled, list): + return None + return set(enabled) + except Exception: + return None + + # --------------------------------------------------------------------------- # Data classes # --------------------------------------------------------------------------- +_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"} + + @dataclass class PluginManifest: """Parsed representation of a plugin.yaml manifest.""" @@ -102,6 +149,23 @@ class PluginManifest: provides_hooks: List[str] = field(default_factory=list) source: str = "" # "user", "project", or "entrypoint" path: Optional[str] = None + # Plugin kind — see plugins.py module docstring for semantics. + # ``standalone`` (default): hooks/tools of its own; opt-in via + # ``plugins.enabled``. + # ``backend``: pluggable backend for an existing core tool (e.g. + # image_gen). Built-in (bundled) backends auto-load; + # user-installed still gated by ``plugins.enabled``. + # ``exclusive``: category with exactly one active provider (memory). + # Selection via ``.provider`` config key; the + # category's own discovery system handles loading and the + # general scanner skips these. + kind: str = "standalone" + # Registry key — path-derived, used by ``plugins.enabled``/``disabled`` + # lookups and by ``hermes plugins list``. For a flat plugin at + # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested + # category plugin at ``plugins/image_gen/openai/`` the key is + # ``image_gen/openai``. When empty, falls back to ``name``. + key: str = "" @dataclass @@ -322,6 +386,33 @@ class PluginContext: self.manifest.name, engine.name, ) + # -- image gen provider registration ------------------------------------ + + def register_image_gen_provider(self, provider) -> None: + """Register an image generation backend. + + ``provider`` must be an instance of + :class:`agent.image_gen_provider.ImageGenProvider`. The + ``provider.name`` attribute is what ``image_gen.provider`` in + ``config.yaml`` matches against when routing ``image_generate`` + tool calls. + """ + from agent.image_gen_provider import ImageGenProvider + from agent.image_gen_registry import register_provider + + if not isinstance(provider, ImageGenProvider): + logger.warning( + "Plugin '%s' tried to register an image_gen provider that does " + "not inherit from ImageGenProvider. Ignoring.", + self.manifest.name, + ) + return + register_provider(provider) + logger.info( + "Plugin '%s' registered image_gen provider: %s", + self.manifest.name, provider.name, + ) + # -- hook registration -------------------------------------------------- def register_hook(self, hook_name: str, callback: Callable) -> None: @@ -420,26 +511,103 @@ class PluginManager: manifests: List[PluginManifest] = [] - # 1. User plugins (~/.hermes/plugins/) + # 1. Bundled plugins (/plugins//) + # + # Repo-shipped plugins live next to hermes_cli/. Two layouts are + # supported (see ``_scan_directory`` for details): + # + # - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone) + # - category: ``plugins/image_gen/openai/plugin.yaml`` (backend) + # + # ``memory/`` and ``context_engine/`` are skipped at the top level — + # they have their own discovery systems. Porting those to the + # category-namespace ``kind: exclusive`` model is a future PR. + repo_plugins = Path(__file__).resolve().parent.parent / "plugins" + manifests.extend( + self._scan_directory( + repo_plugins, + source="bundled", + skip_names={"memory", "context_engine"}, + ) + ) + + # 2. User plugins (~/.hermes/plugins/) user_dir = get_hermes_home() / "plugins" manifests.extend(self._scan_directory(user_dir, source="user")) - # 2. Project plugins (./.hermes/plugins/) + # 3. Project plugins (./.hermes/plugins/) if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"): project_dir = Path.cwd() / ".hermes" / "plugins" manifests.extend(self._scan_directory(project_dir, source="project")) - # 3. Pip / entry-point plugins + # 4. Pip / entry-point plugins manifests.extend(self._scan_entry_points()) - # Load each manifest (skip user-disabled plugins) + # Load each manifest (skip user-disabled plugins). + # Later sources override earlier ones on key collision — user + # plugins take precedence over bundled, project plugins take + # precedence over user. Dedup here so we only load the final + # winner. Keys are path-derived (``image_gen/openai``, + # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai`` + # don't collide even when both manifests say ``name: openai``. disabled = _get_disabled_plugins() + enabled = _get_enabled_plugins() # None = opt-in default (nothing enabled) + winners: Dict[str, PluginManifest] = {} for manifest in manifests: - if manifest.name in disabled: + winners[manifest.key or manifest.name] = manifest + for manifest in winners.values(): + lookup_key = manifest.key or manifest.name + + # Explicit disable always wins (matches on key or on legacy + # bare name for back-compat with existing user configs). + if lookup_key in disabled or manifest.name in disabled: loaded = LoadedPlugin(manifest=manifest, enabled=False) loaded.error = "disabled via config" - self._plugins[manifest.name] = loaded - logger.debug("Skipping disabled plugin '%s'", manifest.name) + self._plugins[lookup_key] = loaded + logger.debug("Skipping disabled plugin '%s'", lookup_key) + continue + + # Exclusive plugins (memory providers) have their own + # discovery/activation path. The general loader records the + # manifest for introspection but does not load the module. + if manifest.kind == "exclusive": + loaded = LoadedPlugin(manifest=manifest, enabled=False) + loaded.error = ( + "exclusive plugin — activate via .provider config" + ) + self._plugins[lookup_key] = loaded + logger.debug( + "Skipping '%s' (exclusive, handled by category discovery)", + lookup_key, + ) + continue + + # Built-in backends auto-load — they ship with hermes and must + # just work. Selection among them (e.g. which image_gen backend + # services calls) is driven by ``.provider`` config, + # enforced by the tool wrapper. + if manifest.kind == "backend" and manifest.source == "bundled": + self._load_plugin(manifest) + continue + + # Everything else (standalone, user-installed backends, + # entry-point plugins) is opt-in via plugins.enabled. + # Accept both the path-derived key and the legacy bare name + # so existing configs keep working. + is_enabled = ( + enabled is not None + and (lookup_key in enabled or manifest.name in enabled) + ) + if not is_enabled: + loaded = LoadedPlugin(manifest=manifest, enabled=False) + loaded.error = ( + "not enabled in config (run `hermes plugins enable {}` to activate)" + .format(lookup_key) + ) + self._plugins[lookup_key] = loaded + logger.debug( + "Skipping '%s' (not in plugins.enabled)", lookup_key + ) continue self._load_plugin(manifest) @@ -454,8 +622,46 @@ class PluginManager: # Directory scanning # ----------------------------------------------------------------------- - def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]: - """Read ``plugin.yaml`` manifests from subdirectories of *path*.""" + def _scan_directory( + self, + path: Path, + source: str, + skip_names: Optional[Set[str]] = None, + ) -> List[PluginManifest]: + """Read ``plugin.yaml`` manifests from subdirectories of *path*. + + Supports two layouts, mixed freely: + + * **Flat** — ``//plugin.yaml``. Key is + ```` (e.g. ``disk-cleanup``). + * **Category** — ``///plugin.yaml``, + where the ```` directory itself has no ``plugin.yaml``. + Key is ``/`` (e.g. ``image_gen/openai``). + Depth is capped at two segments. + + *skip_names* is an optional allow-list of names to ignore at the + top level (kept for back-compat; the current call sites no longer + pass it now that categories are first-class). + """ + return self._scan_directory_level( + path, source, skip_names=skip_names, prefix="", depth=0 + ) + + def _scan_directory_level( + self, + path: Path, + source: str, + *, + skip_names: Optional[Set[str]], + prefix: str, + depth: int, + ) -> List[PluginManifest]: + """Recursive implementation of :meth:`_scan_directory`. + + ``prefix`` is the category path already accumulated ("" at root, + "image_gen" one level in). ``depth`` is the recursion depth; we + cap at 2 so ``/a/b/c/`` is ignored. + """ manifests: List[PluginManifest] = [] if not path.is_dir(): return manifests @@ -463,35 +669,112 @@ class PluginManager: for child in sorted(path.iterdir()): if not child.is_dir(): continue + if depth == 0 and skip_names and child.name in skip_names: + continue manifest_file = child / "plugin.yaml" if not manifest_file.exists(): manifest_file = child / "plugin.yml" - if not manifest_file.exists(): - logger.debug("Skipping %s (no plugin.yaml)", child) + + if manifest_file.exists(): + manifest = self._parse_manifest( + manifest_file, child, source, prefix + ) + if manifest is not None: + manifests.append(manifest) continue - try: - if yaml is None: - logger.warning("PyYAML not installed – cannot load %s", manifest_file) - continue - data = yaml.safe_load(manifest_file.read_text()) or {} - manifest = PluginManifest( - name=data.get("name", child.name), - version=str(data.get("version", "")), - description=data.get("description", ""), - author=data.get("author", ""), - requires_env=data.get("requires_env", []), - provides_tools=data.get("provides_tools", []), - provides_hooks=data.get("provides_hooks", []), - source=source, - path=str(child), + # No manifest at this level. If we're still within the depth + # cap, treat this directory as a category namespace and recurse + # one level in looking for children with manifests. + if depth >= 1: + logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child) + continue + + sub_prefix = f"{prefix}/{child.name}" if prefix else child.name + manifests.extend( + self._scan_directory_level( + child, + source, + skip_names=None, + prefix=sub_prefix, + depth=depth + 1, ) - manifests.append(manifest) - except Exception as exc: - logger.warning("Failed to parse %s: %s", manifest_file, exc) + ) return manifests + def _parse_manifest( + self, + manifest_file: Path, + plugin_dir: Path, + source: str, + prefix: str, + ) -> Optional[PluginManifest]: + """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`. + + Returns ``None`` on parse failure (logs a warning). + """ + try: + if yaml is None: + logger.warning("PyYAML not installed – cannot load %s", manifest_file) + return None + data = yaml.safe_load(manifest_file.read_text()) or {} + + name = data.get("name", plugin_dir.name) + key = f"{prefix}/{plugin_dir.name}" if prefix else name + + raw_kind = data.get("kind", "standalone") + if not isinstance(raw_kind, str): + raw_kind = "standalone" + kind = raw_kind.strip().lower() + if kind not in _VALID_PLUGIN_KINDS: + logger.warning( + "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'", + key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)), + ) + kind = "standalone" + + # Auto-coerce user-installed memory providers to kind="exclusive" + # so they're routed to plugins/memory discovery instead of being + # loaded by the general PluginManager (which has no + # register_memory_provider on PluginContext). Mirrors the + # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir. + # Bundled memory providers are already skipped via skip_names. + if kind == "standalone" and "kind" not in data: + init_file = plugin_dir / "__init__.py" + if init_file.exists(): + try: + source_text = init_file.read_text(errors="replace")[:8192] + if ( + "register_memory_provider" in source_text + or "MemoryProvider" in source_text + ): + kind = "exclusive" + logger.debug( + "Plugin %s: detected memory provider, " + "treating as kind='exclusive'", + key, + ) + except Exception: + pass + + return PluginManifest( + name=name, + version=str(data.get("version", "")), + description=data.get("description", ""), + author=data.get("author", ""), + requires_env=data.get("requires_env", []), + provides_tools=data.get("provides_tools", []), + provides_hooks=data.get("provides_hooks", []), + source=source, + path=str(plugin_dir), + kind=kind, + key=key, + ) + except Exception as exc: + logger.warning("Failed to parse %s: %s", manifest_file, exc) + return None + # ----------------------------------------------------------------------- # Entry-point scanning # ----------------------------------------------------------------------- @@ -514,6 +797,7 @@ class PluginManager: name=ep.name, source="entrypoint", path=ep.value, + key=ep.name, ) manifests.append(manifest) except Exception as exc: @@ -530,7 +814,7 @@ class PluginManager: loaded = LoadedPlugin(manifest=manifest) try: - if manifest.source in ("user", "project"): + if manifest.source in ("user", "project", "bundled"): module = self._load_directory_module(manifest) else: module = self._load_entrypoint_module(manifest) @@ -575,10 +859,16 @@ class PluginManager: loaded.error = str(exc) logger.warning("Failed to load plugin '%s': %s", manifest.name, exc) - self._plugins[manifest.name] = loaded + self._plugins[manifest.key or manifest.name] = loaded def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType: - """Import a directory-based plugin as ``hermes_plugins.``.""" + """Import a directory-based plugin as ``hermes_plugins.``. + + The module slug is derived from ``manifest.key`` so category-namespaced + plugins (``image_gen/openai``) import as + ``hermes_plugins.image_gen__openai`` without colliding with any + future ``tts/openai``. + """ plugin_dir = Path(manifest.path) # type: ignore[arg-type] init_file = plugin_dir / "__init__.py" if not init_file.exists(): @@ -591,7 +881,9 @@ class PluginManager: ns_pkg.__package__ = _NS_PARENT sys.modules[_NS_PARENT] = ns_pkg - module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}" + key = manifest.key or manifest.name + slug = key.replace("/", "__").replace("-", "_") + module_name = f"{_NS_PARENT}.{slug}" spec = importlib.util.spec_from_file_location( module_name, init_file, @@ -672,10 +964,12 @@ class PluginManager: def list_plugins(self) -> List[Dict[str, Any]]: """Return a list of info dicts for all discovered plugins.""" result: List[Dict[str, Any]] = [] - for name, loaded in sorted(self._plugins.items()): + for key, loaded in sorted(self._plugins.items()): result.append( { - "name": name, + "name": loaded.manifest.name, + "key": loaded.manifest.key or loaded.manifest.name, + "kind": loaded.manifest.kind, "version": loaded.manifest.version, "description": loaded.manifest.description, "source": loaded.manifest.source, @@ -779,23 +1073,31 @@ def get_pre_tool_call_block_message( return None +def _ensure_plugins_discovered() -> PluginManager: + """Return the global manager after running idempotent plugin discovery.""" + manager = get_plugin_manager() + manager.discover_and_load() + return manager + + def get_plugin_context_engine(): """Return the plugin-registered context engine, or None.""" - return get_plugin_manager()._context_engine + return _ensure_plugins_discovered()._context_engine def get_plugin_command_handler(name: str) -> Optional[Callable]: """Return the handler for a plugin-registered slash command, or ``None``.""" - entry = get_plugin_manager()._plugin_commands.get(name) + entry = _ensure_plugins_discovered()._plugin_commands.get(name) return entry["handler"] if entry else None def get_plugin_commands() -> Dict[str, dict]: """Return the full plugin commands dict (name → {handler, description, plugin}). - Safe to call before discovery — returns an empty dict if no plugins loaded. + Triggers idempotent plugin discovery so callers can use plugin commands + before any explicit discover_plugins() call. """ - return get_plugin_manager()._plugin_commands + return _ensure_plugins_discovered()._plugin_commands def get_plugin_toolsets() -> List[tuple]: diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py index c92d8b0dc6..230e134207 100644 --- a/hermes_cli/plugins_cmd.py +++ b/hermes_cli/plugins_cmd.py @@ -15,6 +15,7 @@ import shutil import subprocess import sys from pathlib import Path +from typing import Optional from hermes_constants import get_hermes_home @@ -281,8 +282,16 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path: # --------------------------------------------------------------------------- -def cmd_install(identifier: str, force: bool = False) -> None: - """Install a plugin from a Git URL or owner/repo shorthand.""" +def cmd_install( + identifier: str, + force: bool = False, + enable: Optional[bool] = None, +) -> None: + """Install a plugin from a Git URL or owner/repo shorthand. + + After install, prompt "Enable now? [y/N]" unless *enable* is provided + (True = auto-enable without prompting, False = install disabled). + """ import tempfile from rich.console import Console @@ -391,6 +400,40 @@ def cmd_install(identifier: str, force: bool = False) -> None: _display_after_install(target, identifier) + # Determine the canonical plugin name for enable-list bookkeeping. + installed_name = installed_manifest.get("name") or target.name + + # Decide whether to enable: explicit flag > interactive prompt > default off + should_enable = enable + if should_enable is None: + # Interactive prompt unless stdin isn't a TTY (scripted install). + if sys.stdin.isatty() and sys.stdout.isatty(): + try: + answer = input( + f" Enable '{installed_name}' now? [y/N]: " + ).strip().lower() + should_enable = answer in ("y", "yes") + except (EOFError, KeyboardInterrupt): + should_enable = False + else: + should_enable = False + + if should_enable: + enabled = _get_enabled_set() + disabled = _get_disabled_set() + enabled.add(installed_name) + disabled.discard(installed_name) + _save_enabled_set(enabled) + _save_disabled_set(disabled) + console.print( + f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled." + ) + else: + console.print( + f"[dim]Plugin installed but not enabled. " + f"Run `hermes plugins enable {installed_name}` to activate.[/dim]" + ) + console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]") console.print("[dim] hermes gateway restart[/dim]") console.print() @@ -468,7 +511,11 @@ def cmd_remove(name: str) -> None: def _get_disabled_set() -> set: - """Read the disabled plugins set from config.yaml.""" + """Read the disabled plugins set from config.yaml. + + An explicit deny-list. A plugin name here never loads, even if also + listed in ``plugins.enabled``. + """ try: from hermes_cli.config import load_config config = load_config() @@ -488,103 +535,196 @@ def _save_disabled_set(disabled: set) -> None: save_config(config) +def _get_enabled_set() -> set: + """Read the enabled plugins allow-list from config.yaml. + + Plugins are opt-in: only names here are loaded. Returns ``set()`` if + the key is missing (same behaviour as "nothing enabled yet"). + """ + try: + from hermes_cli.config import load_config + config = load_config() + plugins_cfg = config.get("plugins", {}) + if not isinstance(plugins_cfg, dict): + return set() + enabled = plugins_cfg.get("enabled", []) + return set(enabled) if isinstance(enabled, list) else set() + except Exception: + return set() + + +def _save_enabled_set(enabled: set) -> None: + """Write the enabled plugins list to config.yaml.""" + from hermes_cli.config import load_config, save_config + config = load_config() + if "plugins" not in config: + config["plugins"] = {} + config["plugins"]["enabled"] = sorted(enabled) + save_config(config) + + def cmd_enable(name: str) -> None: - """Enable a previously disabled plugin.""" + """Add a plugin to the enabled allow-list (and remove it from disabled).""" from rich.console import Console console = Console() - plugins_dir = _plugins_dir() - - # Verify the plugin exists - target = plugins_dir / name - if not target.is_dir(): - console.print(f"[red]Plugin '{name}' is not installed.[/red]") + # Discover the plugin — check installed (user) AND bundled. + if not _plugin_exists(name): + console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]") sys.exit(1) + enabled = _get_enabled_set() disabled = _get_disabled_set() - if name not in disabled: + + if name in enabled and name not in disabled: console.print(f"[dim]Plugin '{name}' is already enabled.[/dim]") return + enabled.add(name) disabled.discard(name) + _save_enabled_set(enabled) _save_disabled_set(disabled) - console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. Takes effect on next session.") + console.print( + f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. " + "Takes effect on next session." + ) def cmd_disable(name: str) -> None: - """Disable a plugin without removing it.""" + """Remove a plugin from the enabled allow-list (and add to disabled).""" from rich.console import Console console = Console() - plugins_dir = _plugins_dir() - - # Verify the plugin exists - target = plugins_dir / name - if not target.is_dir(): - console.print(f"[red]Plugin '{name}' is not installed.[/red]") + if not _plugin_exists(name): + console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]") sys.exit(1) + enabled = _get_enabled_set() disabled = _get_disabled_set() - if name in disabled: + + if name not in enabled and name in disabled: console.print(f"[dim]Plugin '{name}' is already disabled.[/dim]") return + enabled.discard(name) disabled.add(name) + _save_enabled_set(enabled) _save_disabled_set(disabled) - console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.") + console.print( + f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. " + "Takes effect on next session." + ) -def cmd_list() -> None: - """List installed plugins.""" - from rich.console import Console - from rich.table import Table +def _plugin_exists(name: str) -> bool: + """Return True if a plugin with *name* is installed (user) or bundled.""" + # Installed: directory name or manifest name match in user plugins dir + user_dir = _plugins_dir() + if user_dir.is_dir(): + if (user_dir / name).is_dir(): + return True + for child in user_dir.iterdir(): + if not child.is_dir(): + continue + manifest = _read_manifest(child) + if manifest.get("name") == name: + return True + # Bundled: /plugins// + from pathlib import Path as _P + import hermes_cli + repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins" + if repo_plugins.is_dir(): + candidate = repo_plugins / name + if candidate.is_dir() and ( + (candidate / "plugin.yaml").exists() + or (candidate / "plugin.yml").exists() + ): + return True + return False + +def _discover_all_plugins() -> list: + """Return a list of (name, version, description, source, dir_path) for + every plugin the loader can see — user + bundled + project. + + Matches the ordering/dedup of ``PluginManager.discover_and_load``: + bundled first, then user, then project; user overrides bundled on + name collision. + """ try: import yaml except ImportError: yaml = None - console = Console() - plugins_dir = _plugins_dir() + seen: dict = {} # name -> (name, version, description, source, path) - dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir()) - if not dirs: + # Bundled (/plugins//), excluding memory/ and context_engine/ + import hermes_cli + repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins" + for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")): + if not base.is_dir(): + continue + for d in sorted(base.iterdir()): + if not d.is_dir(): + continue + if source == "bundled" and d.name in ("memory", "context_engine"): + continue + manifest_file = d / "plugin.yaml" + if not manifest_file.exists(): + manifest_file = d / "plugin.yml" + if not manifest_file.exists(): + continue + name = d.name + version = "" + description = "" + if yaml: + try: + with open(manifest_file) as f: + manifest = yaml.safe_load(f) or {} + name = manifest.get("name", d.name) + version = manifest.get("version", "") + description = manifest.get("description", "") + except Exception: + pass + # User plugins override bundled on name collision. + if name in seen and source == "bundled": + continue + src_label = source + if source == "user" and (d / ".git").exists(): + src_label = "git" + seen[name] = (name, version, description, src_label, d) + return list(seen.values()) + + +def cmd_list() -> None: + """List all plugins (bundled + user) with enabled/disabled state.""" + from rich.console import Console + from rich.table import Table + + console = Console() + entries = _discover_all_plugins() + if not entries: console.print("[dim]No plugins installed.[/dim]") console.print("[dim]Install with:[/dim] hermes plugins install owner/repo") return + enabled = _get_enabled_set() disabled = _get_disabled_set() - table = Table(title="Installed Plugins", show_lines=False) + table = Table(title="Plugins", show_lines=False) table.add_column("Name", style="bold") table.add_column("Status") table.add_column("Version", style="dim") table.add_column("Description") table.add_column("Source", style="dim") - for d in dirs: - manifest_file = d / "plugin.yaml" - name = d.name - version = "" - description = "" - source = "local" - - if manifest_file.exists() and yaml: - try: - with open(manifest_file) as f: - manifest = yaml.safe_load(f) or {} - name = manifest.get("name", d.name) - version = manifest.get("version", "") - description = manifest.get("description", "") - except Exception: - pass - - # Check if it's a git repo (installed via hermes plugins install) - if (d / ".git").exists(): - source = "git" - - is_disabled = name in disabled or d.name in disabled - status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]" + for name, version, description, source, _dir in entries: + if name in disabled: + status = "[red]disabled[/red]" + elif name in enabled: + status = "[green]enabled[/green]" + else: + status = "[yellow]not enabled[/yellow]" table.add_row(name, status, str(version), description, source) console.print() @@ -592,6 +732,7 @@ def cmd_list() -> None: console.print() console.print("[dim]Interactive toggle:[/dim] hermes plugins") console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable ") + console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]") # --------------------------------------------------------------------------- @@ -742,41 +883,25 @@ def cmd_toggle() -> None: """Interactive composite UI — general plugins + provider plugin categories.""" from rich.console import Console - try: - import yaml - except ImportError: - yaml = None - console = Console() - plugins_dir = _plugins_dir() - # -- General plugins discovery -- - dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir()) - disabled = _get_disabled_set() + # -- General plugins discovery (bundled + user) -- + entries = _discover_all_plugins() + enabled_set = _get_enabled_set() + disabled_set = _get_disabled_set() plugin_names = [] plugin_labels = [] plugin_selected = set() - for i, d in enumerate(dirs): - manifest_file = d / "plugin.yaml" - name = d.name - description = "" - - if manifest_file.exists() and yaml: - try: - with open(manifest_file) as f: - manifest = yaml.safe_load(f) or {} - name = manifest.get("name", d.name) - description = manifest.get("description", "") - except Exception: - pass - - plugin_names.append(name) + for i, (name, _version, description, source, _d) in enumerate(entries): label = f"{name} \u2014 {description}" if description else name + if source == "bundled": + label = f"{label} [bundled]" + plugin_names.append(name) plugin_labels.append(label) - - if name not in disabled and d.name not in disabled: + # Selected (enabled) when in enabled-set AND not in disabled-set + if name in enabled_set and name not in disabled_set: plugin_selected.add(i) # -- Provider categories -- @@ -804,10 +929,10 @@ def cmd_toggle() -> None: try: import curses _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected, - disabled, categories, console) + disabled_set, categories, console) except ImportError: _run_composite_fallback(plugin_names, plugin_labels, plugin_selected, - disabled, categories, console) + disabled_set, categories, console) def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected, @@ -1020,18 +1145,29 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected, curses.wrapper(_draw) flush_stdin() - # Persist general plugin changes - new_disabled = set() + # Persist general plugin changes. The new allow-list is the set of + # plugin names that were checked; anything not checked is explicitly + # disabled (written to disabled-list) so it remains off even if the + # plugin code does something clever like auto-enable in the future. + new_enabled: set = set() + new_disabled: set = set(disabled) # preserve existing disabled state for unseen plugins for i, name in enumerate(plugin_names): - if i not in chosen: + if i in chosen: + new_enabled.add(name) + new_disabled.discard(name) + else: new_disabled.add(name) - if new_disabled != disabled: + prev_enabled = _get_enabled_set() + enabled_changed = new_enabled != prev_enabled + disabled_changed = new_disabled != disabled + + if enabled_changed or disabled_changed: + _save_enabled_set(new_enabled) _save_disabled_set(new_disabled) - enabled_count = len(plugin_names) - len(new_disabled) console.print( - f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, " - f"{len(new_disabled)} disabled." + f"\n[green]\u2713[/green] General plugins: {len(new_enabled)} enabled, " + f"{len(plugin_names) - len(new_enabled)} disabled." ) elif n_plugins > 0: console.print("\n[dim]General plugins unchanged.[/dim]") @@ -1078,11 +1214,17 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected, return print() - new_disabled = set() + new_enabled: set = set() + new_disabled: set = set(disabled) for i, name in enumerate(plugin_names): - if i not in chosen: + if i in chosen: + new_enabled.add(name) + new_disabled.discard(name) + else: new_disabled.add(name) - if new_disabled != disabled: + prev_enabled = _get_enabled_set() + if new_enabled != prev_enabled or new_disabled != disabled: + _save_enabled_set(new_enabled) _save_disabled_set(new_disabled) # Provider categories @@ -1108,7 +1250,17 @@ def plugins_command(args) -> None: action = getattr(args, "plugins_action", None) if action == "install": - cmd_install(args.identifier, force=getattr(args, "force", False)) + # Map argparse tri-state: --enable=True, --no-enable=False, neither=None (prompt) + enable_arg = None + if getattr(args, "enable", False): + enable_arg = True + elif getattr(args, "no_enable", False): + enable_arg = False + cmd_install( + args.identifier, + force=getattr(args, "force", False), + enable=enable_arg, + ) elif action == "update": cmd_update(args.name) elif action in ("remove", "rm", "uninstall"): diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index a71055cfe4..e842086a41 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -23,6 +23,8 @@ import logging from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple +from utils import base_url_host_matches, base_url_hostname + logger = logging.getLogger(__name__) @@ -92,6 +94,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { transport="openai_chat", base_url_env_var="KIMI_BASE_URL", ), + "stepfun": HermesOverlay( + transport="openai_chat", + extra_env_vars=("STEPFUN_API_KEY",), + base_url_override="https://api.stepfun.ai/step_plan/v1", + base_url_env_var="STEPFUN_BASE_URL", + ), "minimax": HermesOverlay( transport="anthropic_messages", base_url_env_var="MINIMAX_BASE_URL", @@ -208,6 +216,10 @@ ALIASES: Dict[str, str] = { "kimi-coding-cn": "kimi-for-coding", "moonshot": "kimi-for-coding", + # stepfun + "step": "stepfun", + "stepfun-coding-plan": "stepfun", + # minimax-cn "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", @@ -292,6 +304,7 @@ _LABEL_OVERRIDES: Dict[str, str] = { "nous": "Nous Portal", "openai-codex": "OpenAI Codex", "copilot-acp": "GitHub Copilot ACP", + "stepfun": "StepFun Step Plan", "xiaomi": "Xiaomi MiMo", "local": "Local endpoint", "bedrock": "AWS Bedrock", @@ -322,12 +335,16 @@ def normalize_provider(name: str) -> str: def get_provider(name: str) -> Optional[ProviderDef]: - """Look up a provider by id or alias, merging all data sources. + """Look up a built-in provider by id or alias. Resolution order: 1. Hermes overlays (for providers not in models.dev: nous, openai-codex, etc.) 2. models.dev catalog + Hermes overlay - 3. User-defined providers from config (TODO: Phase 4) + + User-defined providers from config.yaml (``providers:`` / ``custom_providers:``) + are resolved by :func:`resolve_provider_full`, which layers ``resolve_user_provider`` + and ``resolve_custom_provider`` on top of this function. Callers that need + user-config support should use ``resolve_provider_full`` instead. Returns a fully-resolved ProviderDef or None. """ @@ -421,6 +438,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: """ pdef = get_provider(provider) if pdef is not None: + # Even for known providers, check URL heuristics for special endpoints + # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom') + if base_url: + url_lower = base_url.rstrip("/").lower() + if "api.kimi.com/coding" in url_lower: + return "anthropic_messages" + if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower: + return "anthropic_messages" + if "api.openai.com" in url_lower: + return "codex_responses" return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions") # Direct provider checks for providers not in HERMES_OVERLAYS @@ -430,11 +457,14 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: # URL-based heuristics for custom / unknown providers if base_url: url_lower = base_url.rstrip("/").lower() - if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower: + hostname = base_url_hostname(base_url) + if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com": return "anthropic_messages" - if "api.openai.com" in url_lower: + if hostname == "api.kimi.com" and "/coding" in url_lower: + return "anthropic_messages" + if hostname == "api.openai.com": return "codex_responses" - if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower: + if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"): return "bedrock_converse" return "chat_completions" diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index a5c286fe01..922946e2ad 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -29,6 +29,7 @@ from hermes_cli.auth import ( ) from hermes_cli.config import get_compatible_custom_providers, load_config from hermes_constants import OPENROUTER_BASE_URL +from utils import base_url_host_matches, base_url_hostname def _normalize_custom_provider_name(value: str) -> str: @@ -38,14 +39,27 @@ def _normalize_custom_provider_name(value: str) -> str: def _detect_api_mode_for_url(base_url: str) -> Optional[str]: """Auto-detect api_mode from the resolved base URL. - Direct api.openai.com endpoints need the Responses API for GPT-5.x - tool calls with reasoning (chat/completions returns 400). + - Direct api.openai.com endpoints need the Responses API for GPT-5.x + tool calls with reasoning (chat/completions returns 400). + - Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, + LiteLLM proxies, etc.) conventionally expose the native Anthropic + protocol under a ``/anthropic`` suffix — treat those as + ``anthropic_messages`` transport instead of the default + ``chat_completions``. + - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the + Anthropic Messages protocol (the /coding route accepts Claude + Code's native request shape). """ normalized = (base_url or "").strip().lower().rstrip("/") - if "api.x.ai" in normalized: + hostname = base_url_hostname(base_url) + if hostname == "api.x.ai": return "codex_responses" - if "api.openai.com" in normalized and "openrouter" not in normalized: + if hostname == "api.openai.com": return "codex_responses" + if normalized.endswith("/anthropic"): + return "anthropic_messages" + if hostname == "api.kimi.com" and "/coding" in normalized: + return "anthropic_messages" return None @@ -194,8 +208,13 @@ def _resolve_runtime_from_pool_entry( elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, + # Kimi /coding, api.openai.com → codex_responses, api.x.ai → + # codex_responses). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the # Anthropic SDK prepends its own /v1/messages to the base_url. Strip the @@ -469,7 +488,7 @@ def _resolve_openrouter_runtime( # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated # provider (issues #420, #560). - _is_openrouter_url = "openrouter.ai" in base_url + _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai") if _is_openrouter_url: api_key_candidates = [ explicit_api_key, @@ -479,8 +498,12 @@ def _resolve_openrouter_runtime( else: # Custom endpoint: use api_key from config when using config base_url (#1760). # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's - # the canonical env var for ollama.com authentication. - _is_ollama_url = "ollama.com" in base_url.lower() + # the canonical env var for ollama.com authentication. Match on + # HOST, not substring — a custom base_url whose path contains + # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose + # hostname is a look-alike (ollama.com.attacker.test) must not + # receive the Ollama credential. See GHSA-76xc-57q6-vm5m. + _is_ollama_url = base_url_host_matches(base_url, "ollama.com") api_key_candidates = [ explicit_api_key, (cfg_api_key if use_config_base_url else ""), @@ -642,8 +665,12 @@ def _resolve_explicit_runtime( configured_mode = _parse_api_mode(model_cfg.get("api_mode")) if configured_mode: api_mode = configured_mode - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect from URL (Anthropic /anthropic suffix, + # api.openai.com → Responses, Kimi /coding, etc.). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected return { "provider": provider, @@ -890,8 +917,7 @@ def resolve_runtime_provider( code="no_aws_credentials", ) # Read bedrock-specific config from config.yaml - from hermes_cli.config import load_config as _load_bedrock_config - _bedrock_cfg = _load_bedrock_config().get("bedrock", {}) + _bedrock_cfg = load_config().get("bedrock", {}) # Region priority: config.yaml bedrock.region → env var → us-east-1 region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region() auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain" @@ -965,10 +991,13 @@ def resolve_runtime_provider( elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) - # Auto-detect Anthropic-compatible endpoints by URL convention - # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) - elif base_url.rstrip("/").endswith("/anthropic"): - api_mode = "anthropic_messages" + else: + # Auto-detect Anthropic-compatible endpoints by URL convention + # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) + # plus api.openai.com → codex_responses and api.x.ai → codex_responses. + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # Strip trailing /v1 for OpenCode Anthropic models (see comment above). if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"): base_url = re.sub(r"/v1/?$", "", base_url) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 8f6b633c6a..1fe5ae0580 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -22,6 +22,7 @@ from typing import Optional, Dict, Any from hermes_cli.nous_subscription import get_nous_subscription_features from tools.tool_backend_helpers import managed_nous_tools_enabled +from utils import base_url_hostname from hermes_constants import get_optional_skills_dir logger = logging.getLogger(__name__) @@ -89,19 +90,20 @@ _DEFAULT_PROVIDER_MODELS = { "grok-code-fast-1", ], "gemini": [ - "gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", - "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", + "gemini-3.1-pro-preview", "gemini-3-pro-preview", + "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview", ], "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], - "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], - "kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], + "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], + "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], + "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"], "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"], "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"], "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"], "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], - "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"], + "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"], "huggingface": [ "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", @@ -407,13 +409,36 @@ def _print_setup_summary(config: dict, hermes_home): ("Browser Automation", False, missing_browser_hint) ) - # FAL (image generation) + # Image generation — FAL (direct or via Nous), or any plugin-registered + # provider (OpenAI, etc.) if subscription_features.image_gen.managed_by_nous: tool_status.append(("Image Generation (Nous subscription)", True, None)) elif subscription_features.image_gen.available: tool_status.append(("Image Generation", True, None)) else: - tool_status.append(("Image Generation", False, "FAL_KEY")) + # Fall back to probing plugin-registered providers so OpenAI-only + # setups don't show as "missing FAL_KEY". + _img_backend = None + try: + from agent.image_gen_registry import list_providers + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + for _p in list_providers(): + if _p.name == "fal": + continue + try: + if _p.is_available(): + _img_backend = _p.display_name + break + except Exception: + continue + except Exception: + pass + if _img_backend: + tool_status.append((f"Image Generation ({_img_backend})", True, None)) + else: + tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY")) # TTS — show configured provider tts_provider = config.get("tts", {}).get("provider", "edge") @@ -433,7 +458,6 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Text-to-Speech (Google Gemini)", True, None)) elif tts_provider == "neutts": try: - import importlib.util neutts_ok = importlib.util.find_spec("neutts") is not None except Exception: neutts_ok = False @@ -441,6 +465,16 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Text-to-Speech (NeuTTS local)", True, None)) else: tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'")) + elif tts_provider == "kittentts": + try: + import importlib.util + kittentts_ok = importlib.util.find_spec("kittentts") is not None + except Exception: + kittentts_ok = False + if kittentts_ok: + tool_status.append(("Text-to-Speech (KittenTTS local)", True, None)) + else: + tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'")) else: tool_status.append(("Text-to-Speech (Edge TTS)", True, None)) @@ -771,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False): "zai": "Z.AI / GLM", "kimi-coding": "Kimi / Moonshot", "kimi-coding-cn": "Kimi / Moonshot (China)", + "stepfun": "StepFun Step Plan", "minimax": "MiniMax", "minimax-cn": "MiniMax CN", "anthropic": "Anthropic", @@ -803,7 +838,8 @@ def setup_model_provider(config: dict, *, quick: bool = False): elif _vision_idx == 1: # OpenAI-compatible endpoint _base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1" _api_key_label = " API key" - if "api.openai.com" in _base_url.lower(): + _is_native_openai = base_url_hostname(_base_url) == "api.openai.com" + if _is_native_openai: _api_key_label = " OpenAI API key" _oai_key = prompt(_api_key_label, password=True).strip() if _oai_key: @@ -811,7 +847,7 @@ def setup_model_provider(config: dict, *, quick: bool = False): # Save vision base URL to config (not .env — only secrets go there) _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {}) _vaux["base_url"] = _base_url - if "api.openai.com" in _base_url.lower(): + if _is_native_openai: _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"] _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"] _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0) @@ -847,7 +883,6 @@ def setup_model_provider(config: dict, *, quick: bool = False): def _check_espeak_ng() -> bool: """Check if espeak-ng is installed.""" - import shutil return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None @@ -901,6 +936,31 @@ def _install_neutts_deps() -> bool: return False +def _install_kittentts_deps() -> bool: + """Install KittenTTS dependencies with user approval. Returns True on success.""" + import subprocess + import sys + + wheel_url = ( + "https://github.com/KittenML/KittenTTS/releases/download/" + "0.8.1/kittentts-0.8.1-py3-none-any.whl" + ) + print() + print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...") + print() + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"], + check=True, timeout=300, + ) + print_success("kittentts installed successfully") + return True + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + print_error(f"Failed to install kittentts: {e}") + print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile") + return False + + def _setup_tts_provider(config: dict): """Interactive TTS provider selection with install flow for NeuTTS.""" tts_config = config.get("tts", {}) @@ -916,6 +976,7 @@ def _setup_tts_provider(config: dict): "mistral": "Mistral Voxtral TTS", "gemini": "Google Gemini TTS", "neutts": "NeuTTS", + "kittentts": "KittenTTS", } current_label = provider_labels.get(current_provider, current_provider) @@ -939,9 +1000,10 @@ def _setup_tts_provider(config: dict): "Mistral Voxtral TTS (multilingual, native Opus, needs API key)", "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)", "NeuTTS (local on-device, free, ~300MB model download)", + "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)", ] ) - providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"]) + providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"]) choices.append(f"Keep current ({current_label})") keep_current_idx = len(choices) - 1 idx = prompt_choice("Select TTS provider:", choices, keep_current_idx) @@ -962,7 +1024,6 @@ def _setup_tts_provider(config: dict): if selected == "neutts": # Check if already installed try: - import importlib.util already_installed = importlib.util.find_spec("neutts") is not None except Exception: already_installed = False @@ -1061,6 +1122,29 @@ def _setup_tts_provider(config: dict): print_warning("No API key provided. Falling back to Edge TTS.") selected = "edge" + elif selected == "kittentts": + # Check if already installed + try: + import importlib.util + already_installed = importlib.util.find_spec("kittentts") is not None + except Exception: + already_installed = False + + if already_installed: + print_success("KittenTTS is already installed") + else: + print() + print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).") + print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo") + print() + if prompt_yes_no("Install KittenTTS now?", True): + if not _install_kittentts_deps(): + print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.") + selected = "edge" + else: + print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.") + selected = "edge" + # Save the selection if "tts" not in config: config["tts"] = {} @@ -1082,8 +1166,6 @@ def setup_tts(config: dict): def setup_terminal_backend(config: dict): """Configure the terminal execution backend.""" import platform as _platform - import shutil - print_header("Terminal Backend") print_info("Choose where Hermes runs shell commands and code.") print_info("This affects tool execution, file access, and isolation.") @@ -1460,7 +1542,9 @@ def setup_agent_settings(config: dict): ) print_info("Maximum tool-calling iterations per conversation.") print_info("Higher = more complex tasks, but costs more tokens.") - print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.") + print_info( + f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration." + ) max_iter_str = prompt("Max iterations", current_max) try: @@ -2356,6 +2440,74 @@ def setup_tools(config: dict, first_install: bool = False): # ============================================================================= +def _model_section_has_credentials(config: dict) -> bool: + """Return True when any known inference provider has usable credentials. + + Sources of truth: + * ``PROVIDER_REGISTRY`` in ``hermes_cli.auth`` — lists every supported + provider along with its ``api_key_env_vars``. + * ``active_provider`` in the auth store — covers OAuth device-code / + external-OAuth providers (Nous, Codex, Qwen, Gemini CLI, ...). + * The legacy OpenRouter aggregator env vars, which route generic + ``OPENAI_API_KEY`` / ``OPENROUTER_API_KEY`` values through OpenRouter. + """ + try: + from hermes_cli.auth import get_active_provider + if get_active_provider(): + return True + except Exception: + pass + + try: + from hermes_cli.auth import PROVIDER_REGISTRY + except Exception: + PROVIDER_REGISTRY = {} # type: ignore[assignment] + + def _has_key(pconfig) -> bool: + for env_var in pconfig.api_key_env_vars: + # CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself, not by + # the user — mirrors is_provider_explicitly_configured in auth.py. + if env_var == "CLAUDE_CODE_OAUTH_TOKEN": + continue + if get_env_value(env_var): + return True + return False + + # Prefer the provider declared in config.yaml, avoids false positives + # from stray env vars (GH_TOKEN, etc.) when the user has already picked + # a different provider. + model_cfg = config.get("model") if isinstance(config, dict) else None + if isinstance(model_cfg, dict): + provider_id = (model_cfg.get("provider") or "").strip().lower() + if provider_id in PROVIDER_REGISTRY: + if _has_key(PROVIDER_REGISTRY[provider_id]): + return True + if provider_id == "openrouter": + for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"): + if get_env_value(env_var): + return True + + # OpenRouter aggregator fallback (no provider declared in config). + for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"): + if get_env_value(env_var): + return True + + for pid, pconfig in PROVIDER_REGISTRY.items(): + # Skip copilot in auto-detect: GH_TOKEN / GITHUB_TOKEN are + # commonly set for git tooling. Mirrors resolve_provider in auth.py. + if pid == "copilot": + continue + if _has_key(pconfig): + return True + return False + + +def _gateway_platform_short_label(label: str) -> str: + """Strip trailing parenthetical qualifiers from a gateway platform label.""" + base = label.split("(", 1)[0].strip() + return base or label + + def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]: """Return a short summary if a setup section is already configured, else None. @@ -2364,20 +2516,7 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str] so that test patches on ``setup_mod.get_env_value`` take effect. """ if section_key == "model": - has_key = bool( - get_env_value("OPENROUTER_API_KEY") - or get_env_value("OPENAI_API_KEY") - or get_env_value("ANTHROPIC_API_KEY") - ) - if not has_key: - # Check for OAuth providers - try: - from hermes_cli.auth import get_active_provider - if get_active_provider(): - has_key = True - except Exception: - pass - if not has_key: + if not _model_section_has_credentials(config): return None model = config.get("model") if isinstance(model, str) and model.strip(): @@ -2395,37 +2534,11 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str] return f"max turns: {max_turns}" elif section_key == "gateway": - platforms = [] - if get_env_value("TELEGRAM_BOT_TOKEN"): - platforms.append("Telegram") - if get_env_value("DISCORD_BOT_TOKEN"): - platforms.append("Discord") - if get_env_value("SLACK_BOT_TOKEN"): - platforms.append("Slack") - if get_env_value("SIGNAL_ACCOUNT"): - platforms.append("Signal") - if get_env_value("EMAIL_ADDRESS"): - platforms.append("Email") - if get_env_value("TWILIO_ACCOUNT_SID"): - platforms.append("SMS") - if get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD"): - platforms.append("Matrix") - if get_env_value("MATTERMOST_TOKEN"): - platforms.append("Mattermost") - if get_env_value("WHATSAPP_PHONE_NUMBER_ID"): - platforms.append("WhatsApp") - if get_env_value("DINGTALK_CLIENT_ID"): - platforms.append("DingTalk") - if get_env_value("FEISHU_APP_ID"): - platforms.append("Feishu") - if get_env_value("WECOM_BOT_ID"): - platforms.append("WeCom") - if get_env_value("WEIXIN_ACCOUNT_ID"): - platforms.append("Weixin") - if get_env_value("BLUEBUBBLES_SERVER_URL"): - platforms.append("BlueBubbles") - if get_env_value("WEBHOOK_ENABLED"): - platforms.append("Webhooks") + platforms = [ + _gateway_platform_short_label(label) + for label, env_var, _ in _GATEWAY_PLATFORMS + if get_env_value(env_var) + ] if platforms: return ", ".join(platforms) return None # No platforms configured — section must run diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 540afc3037..8541f0a05f 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -122,6 +122,7 @@ def show_status(args): "OpenAI": "OPENAI_API_KEY", "Z.AI/GLM": "GLM_API_KEY", "Kimi": "KIMI_API_KEY", + "StepFun Step Plan": "STEPFUN_API_KEY", "MiniMax": "MINIMAX_API_KEY", "MiniMax-CN": "MINIMAX_CN_API_KEY", "Firecrawl": "FIRECRAWL_API_KEY", @@ -252,6 +253,7 @@ def show_status(args): apikey_providers = { "Z.AI / GLM": ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "Kimi / Moonshot": ("KIMI_API_KEY",), + "StepFun Step Plan": ("STEPFUN_API_KEY",), "MiniMax": ("MINIMAX_API_KEY",), "MiniMax (China)": ("MINIMAX_CN_API_KEY",), } diff --git a/hermes_cli/timeouts.py b/hermes_cli/timeouts.py new file mode 100644 index 0000000000..59db4012be --- /dev/null +++ b/hermes_cli/timeouts.py @@ -0,0 +1,82 @@ +from __future__ import annotations + + +def _coerce_timeout(raw: object) -> float | None: + try: + timeout = float(raw) + except (TypeError, ValueError): + return None + if timeout <= 0: + return None + return timeout + + +def get_provider_request_timeout( + provider_id: str, model: str | None = None +) -> float | None: + """Return a configured provider request timeout in seconds, if any.""" + if not provider_id: + return None + + try: + from hermes_cli.config import load_config + except ImportError: + return None + + config = load_config() + providers = config.get("providers", {}) if isinstance(config, dict) else {} + provider_config = ( + providers.get(provider_id, {}) if isinstance(providers, dict) else {} + ) + if not isinstance(provider_config, dict): + return None + + model_config = _get_model_config(provider_config, model) + if model_config is not None: + timeout = _coerce_timeout(model_config.get("timeout_seconds")) + if timeout is not None: + return timeout + + return _coerce_timeout(provider_config.get("request_timeout_seconds")) + + +def get_provider_stale_timeout( + provider_id: str, model: str | None = None +) -> float | None: + """Return a configured non-stream stale timeout in seconds, if any.""" + if not provider_id: + return None + + try: + from hermes_cli.config import load_config + except ImportError: + return None + + config = load_config() + providers = config.get("providers", {}) if isinstance(config, dict) else {} + provider_config = ( + providers.get(provider_id, {}) if isinstance(providers, dict) else {} + ) + if not isinstance(provider_config, dict): + return None + + model_config = _get_model_config(provider_config, model) + if model_config is not None: + timeout = _coerce_timeout(model_config.get("stale_timeout_seconds")) + if timeout is not None: + return timeout + + return _coerce_timeout(provider_config.get("stale_timeout_seconds")) + + +def _get_model_config( + provider_config: dict[str, object], model: str | None +) -> dict[str, object] | None: + if not model: + return None + + models = provider_config.get("models", {}) + model_config = models.get(model, {}) if isinstance(models, dict) else {} + if isinstance(model_config, dict): + return model_config + return None diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index aa6cb9729f..24acc15f53 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -127,7 +127,7 @@ TIPS = [ # --- Tools & Capabilities --- "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.", - "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.", + "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.", "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.", "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.", "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.", @@ -245,7 +245,7 @@ TIPS = [ "Three plugin types: general (tools/hooks), memory providers, and context engines.", "hermes plugins install owner/repo installs plugins directly from GitHub.", "8 external memory providers available: Honcho, OpenViking, Mem0, Hindsight, and more.", - "Plugin hooks include pre_tool_call, post_tool_call, pre_llm_call, and post_llm_call.", + "Plugin hooks include pre/post_tool_call, pre/post_llm_call, and transform_terminal_output for output canonicalization.", # --- Miscellaneous --- "Prompt caching (Anthropic) reduces costs by reusing cached system prompt prefixes.", @@ -323,7 +323,6 @@ TIPS = [ "GPT-5 and Codex use 'developer' role instead of 'system' in the message format.", "Per-task auxiliary overrides: auxiliary.vision.provider, auxiliary.compression.model, etc. in config.yaml.", "The auxiliary client treats 'main' as a provider alias — resolves to your actual primary provider + model.", - "Smart routing can auto-route simple queries to a cheaper model — set smart_model_routing.enabled: true.", "hermes claw migrate --dry-run previews OpenClaw migration without writing anything.", "File paths pasted with quotes or escaped spaces are handled automatically — no manual cleanup needed.", "Slash commands never trigger the large-paste collapse — /command with big arguments works correctly.", @@ -346,4 +345,3 @@ def get_random_tip(exclude_recent: int = 0) -> str: return random.choice(TIPS) - diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 8e4bde883f..7a9a598f95 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -24,7 +24,8 @@ from hermes_cli.nous_subscription import ( apply_nous_managed_defaults, get_nous_subscription_features, ) -from tools.tool_backend_helpers import managed_nous_tools_enabled +from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled +from utils import base_url_hostname logger = logging.getLogger(__name__) @@ -181,6 +182,14 @@ TOOL_CATEGORIES = { ], "tts_provider": "gemini", }, + { + "name": "KittenTTS", + "badge": "local · free", + "tag": "Lightweight local ONNX TTS (~25MB), no API key", + "env_vars": [], + "tts_provider": "kittentts", + "post_setup": "kittentts", + }, ], }, "web": { @@ -422,6 +431,36 @@ def _run_post_setup(post_setup_key: str): _print_warning(" Node.js not found. Install Camofox via Docker:") _print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser") + elif post_setup_key == "kittentts": + try: + __import__("kittentts") + _print_success(" kittentts is already installed") + return + except ImportError: + pass + import subprocess + _print_info(" Installing kittentts (~25-80MB model, CPU-only)...") + wheel_url = ( + "https://github.com/KittenML/KittenTTS/releases/download/" + "0.8.1/kittentts-0.8.1-py3-none-any.whl" + ) + try: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"], + capture_output=True, text=True, timeout=300, + ) + if result.returncode == 0: + _print_success(" kittentts installed") + _print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo") + _print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)") + else: + _print_warning(" kittentts install failed:") + _print_info(f" {result.stderr.strip()[:300]}") + _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + except subprocess.TimeoutExpired: + _print_warning(" kittentts install timed out (>5min)") + _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") @@ -546,6 +585,10 @@ def _get_platform_tools( ts_tools = set(resolve_toolset(ts_key)) if ts_tools and ts_tools.issubset(all_tool_names): enabled_toolsets.add(ts_key) + default_off = set(_DEFAULT_OFF_TOOLSETS) + if platform in default_off: + default_off.remove(platform) + enabled_toolsets -= default_off # Plugin toolsets: enabled by default unless explicitly disabled. # A plugin toolset is "known" for a platform once `hermes tools` @@ -804,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict): _configure_simple_requirements(ts_key) +def _plugin_image_gen_providers() -> list[dict]: + """Build picker-row dicts from plugin-registered image gen providers. + + Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider + row but carries an ``image_gen_plugin_name`` marker so downstream + code (config writing, model picker) knows to route through the + plugin registry instead of the in-tree FAL backend. + + FAL is skipped — it's already exposed by the hardcoded + ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to + a plugin in a follow-up PR, the hardcoded entries go away and this + function surfaces it alongside OpenAI automatically. + """ + try: + from agent.image_gen_registry import list_providers + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + providers = list_providers() + except Exception: + return [] + + rows: list[dict] = [] + for provider in providers: + if getattr(provider, "name", None) == "fal": + # FAL has its own hardcoded rows today. + continue + try: + schema = provider.get_setup_schema() + except Exception: + continue + if not isinstance(schema, dict): + continue + rows.append( + { + "name": schema.get("name", provider.display_name), + "badge": schema.get("badge", ""), + "tag": schema.get("tag", ""), + "env_vars": schema.get("env_vars", []), + "image_gen_plugin_name": provider.name, + } + ) + return rows + + def _visible_providers(cat: dict, config: dict) -> list[dict]: """Return provider entries visible for the current auth/config state.""" features = get_nous_subscription_features(config) @@ -814,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]: if provider.get("requires_nous_auth") and not features.nous_auth_present: continue visible.append(provider) + + # Inject plugin-registered image_gen backends (OpenAI today, more + # later) so the picker lists them alongside FAL / Nous Subscription. + if cat.get("name") == "Image Generation": + visible.extend(_plugin_image_gen_providers()) + return visible @@ -833,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool: browser_cfg = config.get("browser", {}) return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg if ts_key == "image_gen": - return not get_env_value("FAL_KEY") + # Satisfied when the in-tree FAL backend is configured OR any + # plugin-registered image gen provider is available. + if fal_key_is_configured(): + return False + try: + from agent.image_gen_registry import list_providers + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + for provider in list_providers(): + try: + if provider.is_available(): + return False + except Exception: + continue + except Exception: + pass + return True return not _toolset_has_keys(ts_key, config) @@ -1052,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None: _print_success(f" Model set to: {chosen}") +def _plugin_image_gen_catalog(plugin_name: str): + """Return ``(catalog_dict, default_model_id)`` for a plugin provider. + + ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table — + ``{model_id: {"display", "speed", "strengths", "price", ...}}`` — + so the existing picker code paths work without change. Returns + ``({}, None)`` if the provider isn't registered or has no models. + """ + try: + from agent.image_gen_registry import get_provider + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + provider = get_provider(plugin_name) + except Exception: + return {}, None + if provider is None: + return {}, None + try: + models = provider.list_models() or [] + default = provider.default_model() + except Exception: + return {}, None + catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m} + return catalog, default + + +def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None: + """Prompt the user to pick a model for a plugin-registered backend. + + Writes selection to ``image_gen.model``. Mirrors + :func:`_configure_imagegen_model` but sources its catalog from the + plugin registry instead of :data:`IMAGEGEN_BACKENDS`. + """ + catalog, default_model = _plugin_image_gen_catalog(plugin_name) + if not catalog: + return + + cur_cfg = config.setdefault("image_gen", {}) + if not isinstance(cur_cfg, dict): + cur_cfg = {} + config["image_gen"] = cur_cfg + current_model = cur_cfg.get("model") or default_model + if current_model not in catalog: + current_model = default_model + + model_ids = list(catalog.keys()) + ordered = [current_model] + [m for m in model_ids if m != current_model] + + widths = { + "model": max(len(m) for m in model_ids), + "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6), + "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0), + } + + print() + header = ( + f" {'Model':<{widths['model']}} " + f"{'Speed':<{widths['speed']}} " + f"{'Strengths':<{widths['strengths']}} " + f"Price" + ) + print(color(header, Colors.CYAN)) + + rows = [] + for mid in ordered: + row = _format_imagegen_model_row(mid, catalog[mid], widths) + if mid == current_model: + row += " ← currently in use" + rows.append(row) + + idx = _prompt_choice( + f" Choose {plugin_name} model:", + rows, + default=0, + ) + + chosen = ordered[idx] + cur_cfg["model"] = chosen + _print_success(f" Model set to: {chosen}") + + def _configure_provider(provider: dict, config: dict): """Configure a single provider - prompt for API keys and set config.""" env_vars = provider.get("env_vars", []) @@ -1108,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict): _print_success(f" {provider['name']} - no configuration needed!") if managed_feature: _print_info(" Requests for this tool will be billed to your Nous subscription.") + # Plugin-registered image_gen provider: write image_gen.provider + # and route model selection to the plugin's own catalog. + plugin_name = provider.get("image_gen_plugin_name") + if plugin_name: + img_cfg = config.setdefault("image_gen", {}) + if not isinstance(img_cfg, dict): + img_cfg = {} + config["image_gen"] = img_cfg + img_cfg["provider"] = plugin_name + _print_success(f" image_gen.provider set to: {plugin_name}") + _configure_imagegen_model_for_plugin(plugin_name, config) + return # Imagegen backends prompt for model selection after backend pick. backend = provider.get("imagegen_backend") if backend: _configure_imagegen_model(backend, config) + # In-tree FAL is the only non-plugin backend today. Keep + # image_gen.provider clear so the dispatch shim falls through + # to the legacy FAL path. + img_cfg = config.setdefault("image_gen", {}) + if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"): + img_cfg["provider"] = "fal" return # Prompt for each required env var @@ -1146,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict): if all_configured: _print_success(f" {provider['name']} configured!") + plugin_name = provider.get("image_gen_plugin_name") + if plugin_name: + img_cfg = config.setdefault("image_gen", {}) + if not isinstance(img_cfg, dict): + img_cfg = {} + config["image_gen"] = img_cfg + img_cfg["provider"] = plugin_name + _print_success(f" image_gen.provider set to: {plugin_name}") + _configure_imagegen_model_for_plugin(plugin_name, config) + return # Imagegen backends prompt for model selection after env vars are in. backend = provider.get("imagegen_backend") if backend: _configure_imagegen_model(backend, config) + img_cfg = config.setdefault("image_gen", {}) + if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"): + img_cfg["provider"] = "fal" def _configure_simple_requirements(ts_key: str): @@ -1175,17 +1399,17 @@ def _configure_simple_requirements(ts_key: str): _print_warning(" Skipped") elif idx == 1: base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1" - key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key" + is_native_openai = base_url_hostname(base_url) == "api.openai.com" + key_label = " OPENAI_API_KEY" if is_native_openai else " API key" api_key = _prompt(key_label, password=True) if api_key and api_key.strip(): save_env_value("OPENAI_API_KEY", api_key.strip()) # Save vision base URL to config (not .env — only secrets go there) - from hermes_cli.config import load_config, save_config _cfg = load_config() _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {}) _aux["base_url"] = base_url save_config(_cfg) - if "api.openai.com" in base_url.lower(): + if is_native_openai: save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini") _print_success(" Saved") else: diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 110b81e4b5..9cdfdb37df 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -16,6 +16,7 @@ import json import logging import os import secrets +import subprocess import sys import threading import time @@ -114,6 +115,91 @@ def _require_token(request: Request) -> None: raise HTTPException(status_code=401, detail="Unauthorized") +# Accepted Host header values for loopback binds. DNS rebinding attacks +# point a victim browser at an attacker-controlled hostname (evil.test) +# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin +# checks because the browser now considers evil.test and our dashboard +# "same origin". Validating the Host header at the app layer rejects any +# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7. +_LOOPBACK_HOST_VALUES: frozenset = frozenset({ + "localhost", "127.0.0.1", "::1", +}) + + +def _is_accepted_host(host_header: str, bound_host: str) -> bool: + """True if the Host header targets the interface we bound to. + + Accepts: + - Exact bound host (with or without port suffix) + - Loopback aliases when bound to loopback + - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback, + no protection possible at this layer) + """ + if not host_header: + return False + # Strip port suffix. IPv6 addresses use bracket notation: + # [::1] — no port + # [::1]:9119 — with port + # Plain hosts/v4: + # localhost:9119 + # 127.0.0.1:9119 + h = host_header.strip() + if h.startswith("["): + # IPv6 bracketed — port (if any) follows "]:" + close = h.find("]") + if close != -1: + host_only = h[1:close] # strip brackets + else: + host_only = h.strip("[]") + else: + host_only = h.rsplit(":", 1)[0] if ":" in h else h + host_only = host_only.lower() + + # 0.0.0.0 bind means operator explicitly opted into all-interfaces + # (requires --insecure per web_server.start_server). No Host-layer + # defence can protect that mode; rely on operator network controls. + if bound_host in ("0.0.0.0", "::"): + return True + + # Loopback bind: accept the loopback names + bound_lc = bound_host.lower() + if bound_lc in _LOOPBACK_HOST_VALUES: + return host_only in _LOOPBACK_HOST_VALUES + + # Explicit non-loopback bind: require exact host match + return host_only == bound_lc + + +@app.middleware("http") +async def host_header_middleware(request: Request, call_next): + """Reject requests whose Host header doesn't match the bound interface. + + Defends against DNS rebinding: a victim browser on a localhost + dashboard is tricked into fetching from an attacker hostname that + TTL-flips to 127.0.0.1. CORS and same-origin checks don't help — + the browser now treats the attacker origin as same-origin with the + dashboard. Host-header validation at the app layer catches it. + + See GHSA-ppp5-vxwm-4cf7. + """ + # Store the bound host on app.state so this middleware can read it — + # set by start_server() at listen time. + bound_host = getattr(app.state, "bound_host", None) + if bound_host: + host_header = request.headers.get("host", "") + if not _is_accepted_host(host_header, bound_host): + return JSONResponse( + status_code=400, + content={ + "detail": ( + "Invalid Host header. Dashboard requests must use " + "the hostname the server was bound to." + ), + }, + ) + return await call_next(request) + + @app.middleware("http") async def auth_middleware(request: Request, call_next): """Require the session token on all /api/ routes except the public list.""" @@ -232,8 +318,8 @@ _CATEGORY_MERGE: Dict[str, str] = { "checkpoints": "agent", "approvals": "security", "human_delay": "display", - "smart_model_routing": "agent", "dashboard": "display", + "code_execution": "agent", } # Display order for tabs — unlisted categories sort alphabetically after these. @@ -476,6 +562,138 @@ async def get_status(): } +# --------------------------------------------------------------------------- +# Gateway + update actions (invoked from the Status page). +# +# Both commands are spawned as detached subprocesses so the HTTP request +# returns immediately. stdin is closed (``DEVNULL``) so any stray ``input()`` +# calls fail fast with EOF rather than hanging forever. stdout/stderr are +# streamed to a per-action log file under ``~/.hermes/logs/.log`` so +# the dashboard can tail them back to the user. +# --------------------------------------------------------------------------- + +_ACTION_LOG_DIR: Path = get_hermes_home() / "logs" + +# Short ``name`` (from the URL) → absolute log file path. +_ACTION_LOG_FILES: Dict[str, str] = { + "gateway-restart": "gateway-restart.log", + "hermes-update": "hermes-update.log", +} + +# ``name`` → most recently spawned Popen handle. Used so ``status`` can +# report liveness and exit code without shelling out to ``ps``. +_ACTION_PROCS: Dict[str, subprocess.Popen] = {} + + +def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen: + """Spawn ``hermes `` detached and record the Popen handle. + + Uses the running interpreter's ``hermes_cli.main`` module so the action + inherits the same venv/PYTHONPATH the web server is using. + """ + log_file_name = _ACTION_LOG_FILES[name] + _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True) + log_path = _ACTION_LOG_DIR / log_file_name + log_file = open(log_path, "ab", buffering=0) + log_file.write( + f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode() + ) + + cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand] + + popen_kwargs: Dict[str, Any] = { + "cwd": str(PROJECT_ROOT), + "stdin": subprocess.DEVNULL, + "stdout": log_file, + "stderr": subprocess.STDOUT, + "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"}, + } + if sys.platform == "win32": + popen_kwargs["creationflags"] = ( + subprocess.CREATE_NEW_PROCESS_GROUP # type: ignore[attr-defined] + | getattr(subprocess, "DETACHED_PROCESS", 0) + ) + else: + popen_kwargs["start_new_session"] = True + + proc = subprocess.Popen(cmd, **popen_kwargs) + _ACTION_PROCS[name] = proc + return proc + + +def _tail_lines(path: Path, n: int) -> List[str]: + """Return the last ``n`` lines of ``path``. Reads the whole file — fine + for our small per-action logs. Binary-decoded with ``errors='replace'`` + so log corruption doesn't 500 the endpoint.""" + if not path.exists(): + return [] + try: + text = path.read_text(errors="replace") + except OSError: + return [] + lines = text.splitlines() + return lines[-n:] if n > 0 else lines + + +@app.post("/api/gateway/restart") +async def restart_gateway(): + """Kick off a ``hermes gateway restart`` in the background.""" + try: + proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart") + except Exception as exc: + _log.exception("Failed to spawn gateway restart") + raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}") + return { + "ok": True, + "pid": proc.pid, + "name": "gateway-restart", + } + + +@app.post("/api/hermes/update") +async def update_hermes(): + """Kick off ``hermes update`` in the background.""" + try: + proc = _spawn_hermes_action(["update"], "hermes-update") + except Exception as exc: + _log.exception("Failed to spawn hermes update") + raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}") + return { + "ok": True, + "pid": proc.pid, + "name": "hermes-update", + } + + +@app.get("/api/actions/{name}/status") +async def get_action_status(name: str, lines: int = 200): + """Tail an action log and report whether the process is still running.""" + log_file_name = _ACTION_LOG_FILES.get(name) + if log_file_name is None: + raise HTTPException(status_code=404, detail=f"Unknown action: {name}") + + log_path = _ACTION_LOG_DIR / log_file_name + tail = _tail_lines(log_path, min(max(lines, 1), 2000)) + + proc = _ACTION_PROCS.get(name) + if proc is None: + running = False + exit_code: Optional[int] = None + pid: Optional[int] = None + else: + exit_code = proc.poll() + running = exit_code is None + pid = proc.pid + + return { + "name": name, + "running": running, + "exit_code": exit_code, + "pid": pid, + "lines": tail, + } + + @app.get("/api/sessions") async def get_sessions(limit: int = 20, offset: int = 0): try: @@ -1958,6 +2176,8 @@ async def update_config_raw(body: RawConfigUpdate): @app.get("/api/analytics/usage") async def get_usage_analytics(days: int = 30): from hermes_state import SessionDB + from agent.insights import InsightsEngine + db = SessionDB() try: cutoff = time.time() - (days * 86400) @@ -1969,7 +2189,8 @@ async def get_usage_analytics(days: int = 30): SUM(reasoning_tokens) as reasoning_tokens, COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost, COALESCE(SUM(actual_cost_usd), 0) as actual_cost, - COUNT(*) as sessions + COUNT(*) as sessions, + SUM(COALESCE(api_call_count, 0)) as api_calls FROM sessions WHERE started_at > ? GROUP BY day ORDER BY day """, (cutoff,)) @@ -1980,7 +2201,8 @@ async def get_usage_analytics(days: int = 30): SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost, - COUNT(*) as sessions + COUNT(*) as sessions, + SUM(COALESCE(api_call_count, 0)) as api_calls FROM sessions WHERE started_at > ? AND model IS NOT NULL GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC """, (cutoff,)) @@ -1993,12 +2215,29 @@ async def get_usage_analytics(days: int = 30): SUM(reasoning_tokens) as total_reasoning, COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost, COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost, - COUNT(*) as total_sessions + COUNT(*) as total_sessions, + SUM(COALESCE(api_call_count, 0)) as total_api_calls FROM sessions WHERE started_at > ? """, (cutoff,)) totals = dict(cur3.fetchone()) + insights_report = InsightsEngine(db).generate(days=days) + skills = insights_report.get("skills", { + "summary": { + "total_skill_loads": 0, + "total_skill_edits": 0, + "total_skill_actions": 0, + "distinct_skills_used": 0, + }, + "top_skills": [], + }) - return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days} + return { + "daily": daily, + "by_model": by_model, + "totals": totals, + "period_days": days, + "skills": skills, + } finally: db.close() @@ -2305,13 +2544,15 @@ def start_server( "authentication. Only use on trusted networks.", host, ) + # Record the bound host so host_header_middleware can validate incoming + # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7). + app.state.bound_host = host + if open_browser: - import threading import webbrowser def _open(): - import time as _t - _t.sleep(1.0) + time.sleep(1.0) webbrowser.open(f"http://{host}:{port}") threading.Thread(target=_open, daemon=True).start() diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py index 8ff135e29e..378f11b4a7 100644 --- a/hermes_cli/webhook.py +++ b/hermes_cli/webhook.py @@ -155,6 +155,15 @@ def _cmd_subscribe(args): "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), } + if getattr(args, "deliver_only", False): + if route["deliver"] == "log": + print( + "Error: --deliver-only requires --deliver to be a real target " + "(telegram, discord, slack, github_comment, etc.) — not 'log'." + ) + return + route["deliver_only"] = True + if args.deliver_chat_id: route["deliver_extra"] = {"chat_id": args.deliver_chat_id} @@ -172,9 +181,12 @@ def _cmd_subscribe(args): else: print(" Events: (all)") print(f" Deliver: {route['deliver']}") + if route.get("deliver_only"): + print(" Mode: direct delivery (no agent, zero LLM cost)") if route.get("prompt"): prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "") - print(f" Prompt: {prompt_preview}") + label = "Message" if route.get("deliver_only") else "Prompt" + print(f" {label}: {prompt_preview}") print(f"\n Configure your service to POST to the URL above.") print(f" Use the secret for HMAC-SHA256 signature validation.") print(f" The gateway must be running to receive events (hermes gateway run).\n") @@ -192,6 +204,8 @@ def _cmd_list(args): for name, route in subs.items(): events = ", ".join(route.get("events", [])) or "(all)" deliver = route.get("deliver", "log") + if route.get("deliver_only"): + deliver = f"{deliver} (direct — no agent)" desc = route.get("description", "") print(f" ◆ {name}") if desc: diff --git a/hermes_state.py b/hermes_state.py index af97f7fbd8..0ea9815b5a 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -31,7 +31,7 @@ T = TypeVar("T") DEFAULT_DB_PATH = get_hermes_home() / "state.db" -SCHEMA_VERSION = 6 +SCHEMA_VERSION = 8 SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions ( cost_source TEXT, pricing_version TEXT, title TEXT, + api_call_count INTEGER DEFAULT 0, FOREIGN KEY (parent_session_id) REFERENCES sessions(id) ); @@ -80,10 +81,16 @@ CREATE TABLE IF NOT EXISTS messages ( token_count INTEGER, finish_reason TEXT, reasoning TEXT, + reasoning_content TEXT, reasoning_details TEXT, codex_reasoning_items TEXT ); +CREATE TABLE IF NOT EXISTS state_meta ( + key TEXT PRIMARY KEY, + value TEXT +); + CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source); CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id); CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC); @@ -329,6 +336,26 @@ class SessionDB: except sqlite3.OperationalError: pass # Column already exists cursor.execute("UPDATE schema_version SET version = 6") + if current_version < 7: + # v7: preserve provider-native reasoning_content separately from + # normalized reasoning text. Kimi/Moonshot replay can require + # this field on assistant tool-call messages when thinking is on. + try: + cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT') + except sqlite3.OperationalError: + pass # Column already exists + cursor.execute("UPDATE schema_version SET version = 7") + if current_version < 8: + # v8: add api_call_count column to sessions — tracks the number + # of individual LLM API calls made within a session (as opposed + # to the session count itself). + try: + cursor.execute( + 'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0' + ) + except sqlite3.OperationalError: + pass # Column already exists + cursor.execute("UPDATE schema_version SET version = 8") # Unique title index — always ensure it exists (safe to run after migrations # since the title column is guaranteed to exist at this point) @@ -383,10 +410,19 @@ class SessionDB: return session_id def end_session(self, session_id: str, end_reason: str) -> None: - """Mark a session as ended.""" + """Mark a session as ended. + + No-ops when the session is already ended. The first end_reason wins: + compression-split sessions must keep their ``end_reason = 'compression'`` + record even if a later stale ``end_session()`` call (e.g. from a + desynced CLI session_id after ``/resume`` or ``/branch``) targets them + with a different reason. Use ``reopen_session()`` first if you + intentionally need to re-end a closed session with a new reason. + """ def _do(conn): conn.execute( - "UPDATE sessions SET ended_at = ?, end_reason = ? WHERE id = ?", + "UPDATE sessions SET ended_at = ?, end_reason = ? " + "WHERE id = ? AND ended_at IS NULL", (time.time(), end_reason, session_id), ) self._execute_write(_do) @@ -426,6 +462,7 @@ class SessionDB: billing_provider: Optional[str] = None, billing_base_url: Optional[str] = None, billing_mode: Optional[str] = None, + api_call_count: int = 0, absolute: bool = False, ) -> None: """Update token counters and backfill model if not already set. @@ -455,7 +492,8 @@ class SessionDB: billing_provider = COALESCE(billing_provider, ?), billing_base_url = COALESCE(billing_base_url, ?), billing_mode = COALESCE(billing_mode, ?), - model = COALESCE(model, ?) + model = COALESCE(model, ?), + api_call_count = ? WHERE id = ?""" else: sql = """UPDATE sessions SET @@ -475,7 +513,8 @@ class SessionDB: billing_provider = COALESCE(billing_provider, ?), billing_base_url = COALESCE(billing_base_url, ?), billing_mode = COALESCE(billing_mode, ?), - model = COALESCE(model, ?) + model = COALESCE(model, ?), + api_call_count = COALESCE(api_call_count, 0) + ? WHERE id = ?""" params = ( input_tokens, @@ -493,6 +532,7 @@ class SessionDB: billing_base_url, billing_mode, model, + api_call_count, session_id, ) def _do(conn): @@ -714,6 +754,42 @@ class SessionDB: return f"{base} #{max_num + 1}" + def get_compression_tip(self, session_id: str) -> Optional[str]: + """Walk the compression-continuation chain forward and return the tip. + + A compression continuation is a child session where: + 1. The parent's ``end_reason = 'compression'`` + 2. The child was created AFTER the parent was ended (started_at >= ended_at) + + The second condition distinguishes compression continuations from + delegate subagents or branch children, which can also have a + ``parent_session_id`` but were created while the parent was still live. + + Returns the session_id of the latest continuation in the chain, or the + input ``session_id`` if it isn't part of a compression chain (or if the + input itself doesn't exist). + """ + current = session_id + # Bound the walk defensively — compression chains this deep are + # pathological and shouldn't happen in practice. 100 = plenty. + for _ in range(100): + with self._lock: + cursor = self._conn.execute( + "SELECT id FROM sessions " + "WHERE parent_session_id = ? " + " AND started_at >= (" + " SELECT ended_at FROM sessions " + " WHERE id = ? AND end_reason = 'compression'" + " ) " + "ORDER BY started_at DESC LIMIT 1", + (current, current), + ) + row = cursor.fetchone() + if row is None: + return current + current = row["id"] + return current + def list_sessions_rich( self, source: str = None, @@ -721,6 +797,7 @@ class SessionDB: limit: int = 20, offset: int = 0, include_children: bool = False, + project_compression_tips: bool = True, ) -> List[Dict[str, Any]]: """List sessions with preview (first user message) and last active timestamp. @@ -732,6 +809,14 @@ class SessionDB: By default, child sessions (subagent runs, compression continuations) are excluded. Pass ``include_children=True`` to include them. + + With ``project_compression_tips=True`` (default), sessions that are + roots of compression chains are projected forward to their latest + continuation — one logical conversation = one list entry, showing the + live continuation's id/message_count/title/last_active. This prevents + compressed continuations from being invisible to users while keeping + delegate subagents and branches hidden. Pass ``False`` to return the + raw root rows (useful for admin/debug UIs). """ where_clauses = [] params = [] @@ -782,8 +867,77 @@ class SessionDB: s["preview"] = "" sessions.append(s) + # Project compression roots forward to their tips. Each row whose + # end_reason is 'compression' has a continuation child; replace the + # surfaced fields (id, message_count, title, last_active, ended_at, + # end_reason, preview) with the tip's values so the list entry acts + # as the live conversation. Keep the root's started_at to preserve + # chronological ordering by original conversation start. + if project_compression_tips and not include_children: + projected = [] + for s in sessions: + if s.get("end_reason") != "compression": + projected.append(s) + continue + tip_id = self.get_compression_tip(s["id"]) + if tip_id == s["id"]: + projected.append(s) + continue + tip_row = self._get_session_rich_row(tip_id) + if not tip_row: + projected.append(s) + continue + # Preserve the root's started_at for stable sort order, but + # surface the tip's identity and activity data. + merged = dict(s) + for key in ( + "id", "ended_at", "end_reason", "message_count", + "tool_call_count", "title", "last_active", "preview", + "model", "system_prompt", + ): + if key in tip_row: + merged[key] = tip_row[key] + merged["_lineage_root_id"] = s["id"] + projected.append(merged) + sessions = projected + return sessions + def _get_session_rich_row(self, session_id: str) -> Optional[Dict[str, Any]]: + """Fetch a single session with the same enriched columns as + ``list_sessions_rich`` (preview + last_active). Returns None if the + session doesn't exist. + """ + query = """ + SELECT s.*, + COALESCE( + (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63) + FROM messages m + WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL + ORDER BY m.timestamp, m.id LIMIT 1), + '' + ) AS _preview_raw, + COALESCE( + (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id), + s.started_at + ) AS last_active + FROM sessions s + WHERE s.id = ? + """ + with self._lock: + cursor = self._conn.execute(query, (session_id,)) + row = cursor.fetchone() + if not row: + return None + s = dict(row) + raw = s.pop("_preview_raw", "").strip() + if raw: + text = raw[:60] + s["preview"] = text + ("..." if len(raw) > 60 else "") + else: + s["preview"] = "" + return s + # ========================================================================= # Message storage # ========================================================================= @@ -799,6 +953,7 @@ class SessionDB: token_count: int = None, finish_reason: str = None, reasoning: str = None, + reasoning_content: str = None, reasoning_details: Any = None, codex_reasoning_items: Any = None, ) -> int: @@ -828,8 +983,8 @@ class SessionDB: cursor = conn.execute( """INSERT INTO messages (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, - reasoning, reasoning_details, codex_reasoning_items) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + reasoning, reasoning_content, reasoning_details, codex_reasoning_items) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", ( session_id, role, @@ -841,6 +996,7 @@ class SessionDB: token_count, finish_reason, reasoning, + reasoning_content, reasoning_details_json, codex_items_json, ), @@ -891,7 +1047,7 @@ class SessionDB: with self._lock: cursor = self._conn.execute( "SELECT role, content, tool_call_id, tool_calls, tool_name, " - "reasoning, reasoning_details, codex_reasoning_items " + "reasoning, reasoning_content, reasoning_details, codex_reasoning_items " "FROM messages WHERE session_id = ? ORDER BY timestamp, id", (session_id,), ) @@ -915,6 +1071,8 @@ class SessionDB: if row["role"] == "assistant": if row["reasoning"]: msg["reasoning"] = row["reasoning"] + if row["reasoning_content"] is not None: + msg["reasoning_content"] = row["reasoning_content"] if row["reasoning_details"]: try: msg["reasoning_details"] = json.loads(row["reasoning_details"]) @@ -1126,10 +1284,37 @@ class SessionDB: try: with self._lock: ctx_cursor = self._conn.execute( - """SELECT role, content FROM messages - WHERE session_id = ? AND id >= ? - 1 AND id <= ? + 1 - ORDER BY id""", - (match["session_id"], match["id"], match["id"]), + """WITH target AS ( + SELECT session_id, timestamp, id + FROM messages + WHERE id = ? + ) + SELECT role, content + FROM ( + SELECT m.id, m.timestamp, m.role, m.content + FROM messages m + JOIN target t ON t.session_id = m.session_id + WHERE (m.timestamp < t.timestamp) + OR (m.timestamp = t.timestamp AND m.id < t.id) + ORDER BY m.timestamp DESC, m.id DESC + LIMIT 1 + ) + UNION ALL + SELECT role, content + FROM messages + WHERE id = ? + UNION ALL + SELECT role, content + FROM ( + SELECT m.id, m.timestamp, m.role, m.content + FROM messages m + JOIN target t ON t.session_id = m.session_id + WHERE (m.timestamp > t.timestamp) + OR (m.timestamp = t.timestamp AND m.id > t.id) + ORDER BY m.timestamp ASC, m.id ASC + LIMIT 1 + )""", + (match["id"], match["id"]), ) context_msgs = [ {"role": r["role"], "content": (r["content"] or "")[:200]} @@ -1291,3 +1476,116 @@ class SessionDB: return len(session_ids) return self._execute_write(_do) + + # ── Meta key/value (for scheduler bookkeeping) ── + + def get_meta(self, key: str) -> Optional[str]: + """Read a value from the state_meta key/value store.""" + with self._lock: + row = self._conn.execute( + "SELECT value FROM state_meta WHERE key = ?", (key,) + ).fetchone() + if row is None: + return None + return row["value"] if isinstance(row, sqlite3.Row) else row[0] + + def set_meta(self, key: str, value: str) -> None: + """Write a value to the state_meta key/value store.""" + def _do(conn): + conn.execute( + "INSERT INTO state_meta (key, value) VALUES (?, ?) " + "ON CONFLICT(key) DO UPDATE SET value = excluded.value", + (key, value), + ) + self._execute_write(_do) + + # ── Space reclamation ── + + def vacuum(self) -> None: + """Run VACUUM to reclaim disk space after large deletes. + + SQLite does not shrink the database file when rows are deleted — + freed pages just get reused on the next insert. After a prune that + removed hundreds of sessions, the file stays bloated unless we + explicitly VACUUM. + + VACUUM rewrites the entire DB, so it's expensive (seconds per + 100MB) and cannot run inside a transaction. It also acquires an + exclusive lock, so callers must ensure no other writers are + active. Safe to call at startup before the gateway/CLI starts + serving traffic. + """ + # VACUUM cannot be executed inside a transaction. + with self._lock: + # Best-effort WAL checkpoint first, then VACUUM. + try: + self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") + except Exception: + pass + self._conn.execute("VACUUM") + + def maybe_auto_prune_and_vacuum( + self, + retention_days: int = 90, + min_interval_hours: int = 24, + vacuum: bool = True, + ) -> Dict[str, Any]: + """Idempotent auto-maintenance: prune old sessions + optional VACUUM. + + Records the last run timestamp in state_meta so subsequent calls + within ``min_interval_hours`` no-op. Designed to be called once at + startup from long-lived entrypoints (CLI, gateway, cron scheduler). + + Never raises. On any failure, logs a warning and returns a dict + with ``"error"`` set. + + Returns a dict with keys: + - ``"skipped"`` (bool) — true if within min_interval_hours of last run + - ``"pruned"`` (int) — number of sessions deleted + - ``"vacuumed"`` (bool) — true if VACUUM ran + - ``"error"`` (str, optional) — present only on failure + """ + result: Dict[str, Any] = {"skipped": False, "pruned": 0, "vacuumed": False} + try: + # Skip if another process/call did maintenance recently. + last_raw = self.get_meta("last_auto_prune") + now = time.time() + if last_raw: + try: + last_ts = float(last_raw) + if now - last_ts < min_interval_hours * 3600: + result["skipped"] = True + return result + except (TypeError, ValueError): + pass # corrupt meta; treat as no prior run + + pruned = self.prune_sessions(older_than_days=retention_days) + result["pruned"] = pruned + + # Only VACUUM if we actually freed rows — VACUUM on a tight DB + # is wasted I/O. Threshold keeps small DBs from paying the cost. + if vacuum and pruned > 0: + try: + self.vacuum() + result["vacuumed"] = True + except Exception as exc: + logger.warning("state.db VACUUM failed: %s", exc) + + # Record the attempt even if pruned == 0, so we don't retry + # every startup within the min_interval_hours window. + self.set_meta("last_auto_prune", str(now)) + + if pruned > 0: + logger.info( + "state.db auto-maintenance: pruned %d session(s) older than %d days%s", + pruned, + retention_days, + " + VACUUM" if result["vacuumed"] else "", + ) + except Exception as exc: + # Maintenance must never block startup. Log and return error marker. + logger.warning("state.db auto-maintenance failed: %s", exc) + result["error"] = str(exc) + + return result + diff --git a/mini_swe_runner.py b/mini_swe_runner.py index 739074402d..c434515045 100644 --- a/mini_swe_runner.py +++ b/mini_swe_runner.py @@ -43,13 +43,23 @@ from dotenv import load_dotenv load_dotenv() -def _effective_temperature_for_model(model: str) -> Optional[float]: - """Return a fixed temperature for models with strict sampling contracts.""" +def _effective_temperature_for_model( + model: str, + base_url: Optional[str] = None, +) -> Optional[float]: + """Return a fixed temperature for models with strict sampling contracts. + + Returns ``None`` when the model manages temperature server-side (Kimi); + callers must omit the ``temperature`` kwarg entirely in that case. + """ try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE except Exception: return None - return _fixed_temperature_for_model(model) + result = _fixed_temperature_for_model(model, base_url) + if result is OMIT_TEMPERATURE: + return None # caller must omit temperature + return result @@ -457,7 +467,10 @@ Complete the user's task step by step.""" "tools": self.tools, "timeout": 300.0, } - fixed_temperature = _effective_temperature_for_model(self.model) + fixed_temperature = _effective_temperature_for_model( + self.model, + str(getattr(self.client, "base_url", "") or ""), + ) if fixed_temperature is not None: api_kwargs["temperature"] = fixed_temperature diff --git a/model_tools.py b/model_tools.py index 5ec806e78b..db4b46326b 100644 --- a/model_tools.py +++ b/model_tools.py @@ -282,6 +282,31 @@ def get_tool_definitions( filtered_tools[i] = {"type": "function", "function": dynamic_schema} break + # Rebuild discord_server schema based on the bot's privileged intents + # (detected from GET /applications/@me) and the user's action allowlist + # in config. Hides actions the bot's intents don't support so the + # model never attempts them, and annotates fetch_messages when the + # MESSAGE_CONTENT intent is missing. + if "discord_server" in available_tool_names: + try: + from tools.discord_tool import get_dynamic_schema + dynamic = get_dynamic_schema() + except Exception: # pragma: no cover — defensive, fall back to static + dynamic = None + if dynamic is None: + # Tool filtered out entirely (empty allowlist or detection disabled + # the only remaining actions). Drop it from the schema list. + filtered_tools = [ + t for t in filtered_tools + if t.get("function", {}).get("name") != "discord_server" + ] + available_tool_names.discard("discord_server") + else: + for i, td in enumerate(filtered_tools): + if td.get("function", {}).get("name") == "discord_server": + filtered_tools[i] = {"type": "function", "function": dynamic} + break + # Strip web tool cross-references from browser_navigate description when # web_search / web_extract are not available. The static schema says # "prefer web_search or web_extract" which causes the model to hallucinate @@ -525,6 +550,30 @@ def handle_function_call( except Exception: pass + # Generic tool-result canonicalization seam: plugins receive the + # final result string (JSON, usually) and may replace it by + # returning a string from transform_tool_result. Runs after + # post_tool_call (which stays observational) and before the result + # is appended back into conversation context. Fail-open; the first + # valid string return wins; non-string returns are ignored. + try: + from hermes_cli.plugins import invoke_hook + hook_results = invoke_hook( + "transform_tool_result", + tool_name=function_name, + args=function_args, + result=result, + task_id=task_id or "", + session_id=session_id or "", + tool_call_id=tool_call_id or "", + ) + for hook_result in hook_results: + if isinstance(hook_result, str): + result = hook_result + break + except Exception: + pass + return result except Exception as e: diff --git a/nix/devShell.nix b/nix/devShell.nix index 63edc59cf1..d0d56e40b0 100644 --- a/nix/devShell.nix +++ b/nix/devShell.nix @@ -7,7 +7,8 @@ let hermes-agent = inputs.self.packages.${system}.default; hermes-tui = inputs.self.packages.${system}.tui; - packages = [ hermes-agent hermes-tui ]; + hermes-web = inputs.self.packages.${system}.web; + packages = [ hermes-agent hermes-tui hermes-web ]; in { devShells.default = pkgs.mkShell { inputsFrom = packages; diff --git a/nix/lib.nix b/nix/lib.nix new file mode 100644 index 0000000000..ee28537a66 --- /dev/null +++ b/nix/lib.nix @@ -0,0 +1,217 @@ +# nix/lib.nix — Shared helpers for nix stuff +{ pkgs, npm-lockfile-fix }: +{ + # Returns a buildNpmPackage-compatible attrs set that provides: + # patchPhase — ensures lockfile has exactly one trailing newline + # nativeBuildInputs — [ updateLockfileScript ] (list, prepend with ++ for more) + # passthru.devShellHook — stamp-checked npm install + hash auto-update + # passthru.npmLockfile — metadata for mkFixLockfiles + # + # NOTE: npmConfigHook runs `diff` between the source lockfile and the + # npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing + # newlines the lockfile has. The patchPhase normalizes to exactly one + # trailing newline so both sides always match. + # + # Usage: + # npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; }; + # pkgs.buildNpmPackage (npm // { ... } # or: + # pkgs.buildNpmPackage ({ ... } // npm) + mkNpmPassthru = + { + folder, # repo-relative folder with package.json, e.g. "ui-tui" + attr, # flake package attr, e.g. "tui" + pname, # e.g. "hermes-tui" + nixFile ? "nix/${attr}.nix", # defaults to nix/.nix + }: + { + patchPhase = '' + runHook prePatch + # Normalize trailing newlines so source and npm-deps always match, + # regardless of what fetchNpmDeps preserves. + sed -i -z 's/\n*$/\n/' package-lock.json + + # Make npmConfigHook's byte-for-byte diff newline-agnostic by + # replacing its hardcoded /nix/store/.../diff with a wrapper that + # normalizes trailing newlines on both sides before comparing. + mkdir -p "$TMPDIR/bin" + cat > "$TMPDIR/bin/diff" << DIFFWRAP + #!/bin/sh + f1=\$(mktemp) && sed -z 's/\n*$/\n/' "\$1" > "\$f1" + f2=\$(mktemp) && sed -z 's/\n*$/\n/' "\$2" > "\$f2" + ${pkgs.diffutils}/bin/diff "\$f1" "\$f2" && rc=0 || rc=\$? + rm -f "\$f1" "\$f2" + exit \$rc + DIFFWRAP + chmod +x "$TMPDIR/bin/diff" + export PATH="$TMPDIR/bin:$PATH" + + runHook postPatch + ''; + + nativeBuildInputs = [ + (pkgs.writeShellScriptBin "update_${attr}_lockfile" '' + set -euox pipefail + + REPO_ROOT=$(git rev-parse --show-toplevel) + + cd "$REPO_ROOT/${folder}" + rm -rf node_modules/ + npm cache clean --force + CI=true npm install + ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json + + NIX_FILE="$REPO_ROOT/${nixFile}" + sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE + NIX_OUTPUT=$(nix build .#${attr} 2>&1 || true) + NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') + echo got new hash $NEW_HASH + sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE + nix build .#${attr} + echo "Updated npm hash in $NIX_FILE to $NEW_HASH" + '') + ]; + + passthru = { + devShellHook = pkgs.writeShellScript "npm-dev-hook-${pname}" '' + REPO_ROOT=$(git rev-parse --show-toplevel) + + _hermes_npm_stamp() { + sha256sum "${folder}/package.json" "${folder}/package-lock.json" \ + 2>/dev/null | sha256sum | awk '{print $1}' + } + STAMP=".nix-stamps/${pname}" + STAMP_VALUE="$(_hermes_npm_stamp)" + if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then + echo "${pname}: installing npm dependencies..." + ( cd ${folder} && CI=true npm install --silent --no-fund --no-audit 2>/dev/null ) + + # Auto-update the nix hash so it stays in sync with the lockfile + echo "${pname}: prefetching npm deps..." + NIX_FILE="$REPO_ROOT/${nixFile}" + if NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "${folder}/package-lock.json" 2>/dev/null); then + sed -i "s|hash = \"sha256-[A-Za-z0-9+/=]+\"|hash = \"$NEW_HASH\";|" "$NIX_FILE" + echo "${pname}: updated hash to $NEW_HASH" + else + echo "${pname}: warning: prefetch failed, run 'nix run .#fix-lockfiles -- --apply' manually" >&2 + fi + + mkdir -p .nix-stamps + _hermes_npm_stamp > "$STAMP" + fi + unset -f _hermes_npm_stamp + ''; + + npmLockfile = { + inherit attr folder nixFile; + }; + }; + }; + + # Aggregate `fix-lockfiles` bin from a list of packages carrying + # passthru.npmLockfile = { attr; folder; nixFile; }; + # Invocations: + # fix-lockfiles --check # exit 1 if any hash is stale + # fix-lockfiles --apply # rewrite stale hashes in place + # Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT + # when set, so CI workflows can post a sticky PR comment directly. + mkFixLockfiles = + { + packages, # list of packages with passthru.npmLockfile + }: + let + entries = map (p: p.passthru.npmLockfile) packages; + entryArgs = pkgs.lib.concatMapStringsSep " " (e: "\"${e.attr}:${e.folder}:${e.nixFile}\"") entries; + in + pkgs.writeShellScriptBin "fix-lockfiles" '' + set -uox pipefail + MODE="''${1:---check}" + case "$MODE" in + --check|--apply) ;; + -h|--help) + echo "usage: fix-lockfiles [--check|--apply]" + exit 0 ;; + *) + echo "usage: fix-lockfiles [--check|--apply]" >&2 + exit 2 ;; + esac + + ENTRIES=(${entryArgs}) + + REPO_ROOT="$(git rev-parse --show-toplevel)" + cd "$REPO_ROOT" + + # When running in GH Actions, emit Markdown links in the report pointing + # at the offending line of the nix file (and the lockfile) at the exact + # commit that was checked. LINK_SHA should be set by the workflow to the + # PR head SHA; falls back to GITHUB_SHA (which on pull_request is the + # test-merge commit, still browseable). + LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}" + LINK_REPO="''${GITHUB_REPOSITORY:-}" + LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}" + + STALE=0 + FIXED=0 + REPORT="" + + for entry in "''${ENTRIES[@]}"; do + IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry" + echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)" + OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1) + STATUS=$? + if [ "$STATUS" -eq 0 ]; then + echo " ok" + continue + fi + + NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}') + if [ -z "$NEW_HASH" ]; then + echo " build failed with no hash mismatch:" >&2 + echo "$OUTPUT" | tail -40 >&2 + exit 1 + fi + + HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1) + OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \ + | sed -E 's/hash = "(.*)"/\1/') + LOCK_FILE="$FOLDER/package-lock.json" + echo " stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH" + STALE=1 + + if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then + NIX_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$NIX_FILE#L$HASH_LINE" + LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE" + REPORT+="- [\`$NIX_FILE:$HASH_LINE\`]($NIX_URL) (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\` — lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\n' + else + REPORT+="- \`$NIX_FILE:$HASH_LINE\` (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\`"$'\n' + fi + + if [ "$MODE" = "--apply" ]; then + sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE" + nix build ".#$ATTR.npmDeps" --no-link --print-build-logs + FIXED=1 + echo " fixed" + fi + done + + if [ -n "''${GITHUB_OUTPUT:-}" ]; then + { + [ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false" + [ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false" + if [ -n "$REPORT" ]; then + echo "report<> "$GITHUB_OUTPUT" + fi + + if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then + echo + echo "Stale lockfile hashes detected. Run:" + echo " nix run .#fix-lockfiles -- --apply" + exit 1 + fi + + exit 0 + ''; +} diff --git a/nix/packages.nix b/nix/packages.nix index 912be7843b..721546851d 100644 --- a/nix/packages.nix +++ b/nix/packages.nix @@ -8,10 +8,14 @@ inherit (inputs) uv2nix pyproject-nix pyproject-build-systems; }; - hermesTui = pkgs.callPackage ./tui.nix { + hermesNpmLib = pkgs.callPackage ./lib.nix { npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default; }; + hermesTui = pkgs.callPackage ./tui.nix { + inherit hermesNpmLib; + }; + # Import bundled skills, excluding runtime caches bundledSkills = pkgs.lib.cleanSourceWith { src = ../skills; @@ -19,7 +23,7 @@ }; hermesWeb = pkgs.callPackage ./web.nix { - npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default; + inherit hermesNpmLib; }; runtimeDeps = with pkgs; [ @@ -111,6 +115,10 @@ tui = hermesTui; web = hermesWeb; + + fix-lockfiles = hermesNpmLib.mkFixLockfiles { + packages = [ hermesTui hermesWeb ]; + }; }; }; } diff --git a/nix/tui.nix b/nix/tui.nix index 7303edecb9..04bbfa034e 100644 --- a/nix/tui.nix +++ b/nix/tui.nix @@ -1,18 +1,18 @@ # nix/tui.nix — Hermes TUI (Ink/React) compiled with tsc and bundled -{ pkgs, npm-lockfile-fix, ... }: +{ pkgs, hermesNpmLib, ... }: let src = ../ui-tui; npmDeps = pkgs.fetchNpmDeps { inherit src; - hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90="; + hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0="; }; + npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; }; + packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json")); version = packageJson.version; - - npmLockHash = builtins.hashString "sha256" (builtins.readFile ../ui-tui/package-lock.json); in -pkgs.buildNpmPackage { +pkgs.buildNpmPackage (npm // { pname = "hermes-tui"; inherit src npmDeps version; @@ -37,41 +37,4 @@ pkgs.buildNpmPackage { runHook postInstall ''; - - nativeBuildInputs = [ - (pkgs.writeShellScriptBin "update_tui_lockfile" '' - set -euox pipefail - - # get root of repo - REPO_ROOT=$(git rev-parse --show-toplevel) - - # cd into ui-tui and reinstall - cd "$REPO_ROOT/ui-tui" - rm -rf node_modules/ - npm cache clean --force - CI=true npm install # ci env var to suppress annoying unicode install banner lag - ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json - - NIX_FILE="$REPO_ROOT/nix/tui.nix" - # compute the new hash - sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE - NIX_OUTPUT=$(nix build .#tui 2>&1 || true) - NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') - echo got new hash $NEW_HASH - sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE - nix build .#tui - echo "Updated npm hash in $NIX_FILE to $NEW_HASH" - '') - ]; - - passthru.devShellHook = '' - STAMP=".nix-stamps/hermes-tui" - STAMP_VALUE="${npmLockHash}" - if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then - echo "hermes-tui: installing npm dependencies..." - cd ui-tui && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd .. - mkdir -p .nix-stamps - echo "$STAMP_VALUE" > "$STAMP" - fi - ''; -} +}) diff --git a/nix/web.nix b/nix/web.nix index 247889753f..fc77728966 100644 --- a/nix/web.nix +++ b/nix/web.nix @@ -1,15 +1,15 @@ # nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build -{ pkgs, npm-lockfile-fix, ... }: +{ pkgs, hermesNpmLib, ... }: let src = ../web; npmDeps = pkgs.fetchNpmDeps { inherit src; - hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4="; + hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg="; }; - npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json); + npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; }; in -pkgs.buildNpmPackage { +pkgs.buildNpmPackage (npm // { pname = "hermes-web"; version = "0.0.0"; inherit src npmDeps; @@ -26,38 +26,4 @@ pkgs.buildNpmPackage { cp -r dist $out runHook postInstall ''; - - nativeBuildInputs = [ - (pkgs.writeShellScriptBin "update_web_lockfile" '' - set -euox pipefail - - REPO_ROOT=$(git rev-parse --show-toplevel) - - cd "$REPO_ROOT/web" - rm -rf node_modules/ - npm cache clean --force - CI=true npm install - ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json - - NIX_FILE="$REPO_ROOT/nix/web.nix" - sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE - NIX_OUTPUT=$(nix build .#web 2>&1 || true) - NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') - echo got new hash $NEW_HASH - sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE - nix build .#web - echo "Updated npm hash in $NIX_FILE to $NEW_HASH" - '') - ]; - - passthru.devShellHook = '' - STAMP=".nix-stamps/hermes-web" - STAMP_VALUE="${npmLockHash}" - if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then - echo "hermes-web: installing npm dependencies..." - cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd .. - mkdir -p .nix-stamps - echo "$STAMP_VALUE" > "$STAMP" - fi - ''; -} +}) diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index c60d2c6356..1c099ca605 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic API calls | +| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | -Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn. +Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn. ### Depth (how many) @@ -180,6 +180,8 @@ If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived This keeps earlier passes cheap while using full depth on the final synthesis. +**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout. + ### Level (how hard) Controls the **intensity** of each dialectic reasoning round. @@ -368,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic LLM calls | +| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/optional-skills/dogfood/DESCRIPTION.md b/optional-skills/dogfood/DESCRIPTION.md new file mode 100644 index 0000000000..f083fd72bd --- /dev/null +++ b/optional-skills/dogfood/DESCRIPTION.md @@ -0,0 +1,3 @@ +# Dogfood — Advanced QA & Testing Skills + +Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses. diff --git a/optional-skills/dogfood/adversarial-ux-test/SKILL.md b/optional-skills/dogfood/adversarial-ux-test/SKILL.md new file mode 100644 index 0000000000..1777e083d1 --- /dev/null +++ b/optional-skills/dogfood/adversarial-ux-test/SKILL.md @@ -0,0 +1,190 @@ +--- +name: adversarial-ux-test +description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only. +version: 1.0.0 +author: Omni @ Comelse +license: MIT +metadata: + hermes: + tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing] + related_skills: [dogfood] +--- + +# Adversarial UX Test + +Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise. + +Think of it as an automated "mom test" — but angry. + +## Why This Works + +Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches: +- Confusing terminology that makes sense to developers but not users +- Too many steps to accomplish basic tasks +- Missing onboarding or "aha moments" +- Accessibility issues (font size, contrast, click targets) +- Cold-start problems (empty states, no demo content) +- Paywall/signup friction that kills conversion + +The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs. + +## How to Use + +Tell the agent: +``` +"Run an adversarial UX test on [URL]" +"Be a grumpy [persona type] and test [app name]" +"Do an asshole user test on my staging site" +``` + +You can provide a persona or let the agent generate one based on your product's target audience. + +## Step 1: Define the Persona + +If no persona is provided, generate one by answering: + +1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way") +2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email) +3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list) +4. **What would make them give up?** (too many clicks, jargon, slow, confusing) +5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing) + +### Good Persona Example +> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords. + +### Bad Persona Example +> "A user who doesn't like the app" — too vague, no constraints, no voice. + +The persona must be **specific enough to stay in character** for 20 minutes of testing. + +## Step 2: Become the Asshole (Browse as the Persona) + +1. Read any available project docs for app context and URLs +2. **Fully inhabit the persona** — their frustrations, limitations, goals +3. Navigate to the app using browser tools +4. **Attempt the persona's ACTUAL TASKS** (not a feature tour): + - Can they do what they came to do? + - How many clicks/screens to accomplish it? + - What confuses them? + - What makes them angry? + - Where do they get lost? + - What would make them give up and go back to their old way? + +5. Test these friction categories: + - **First impression** — would they even bother past the landing page? + - **Core workflow** — the ONE thing they need to do most often + - **Error recovery** — what happens when they do something wrong? + - **Readability** — text size, contrast, information density + - **Speed** — does it feel faster than their current method? + - **Terminology** — any jargon they wouldn't understand? + - **Navigation** — can they find their way back? do they know where they are? + +6. Take screenshots of every pain point +7. Check browser console for JS errors on every page + +## Step 3: The Rant (Write Feedback in Character) + +Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting. + +``` +[PERSONA NAME]'s Review of [PRODUCT] + +Overall: [Would they keep using it? Yes/No/Maybe with conditions] + +THE GOOD (grudging admission): +- [things even they have to admit work] + +THE BAD (legitimate UX issues): +- [real problems that would stop them from using the product] + +THE UGLY (showstoppers): +- [things that would make them uninstall/cancel immediately] + +SPECIFIC COMPLAINTS: +1. [Page/feature]: "[quote in persona voice]" — [what happened, expected] +2. ... + +VERDICT: "[one-line persona quote summarizing their experience]" +``` + +## Step 4: The Pragmatism Filter (Critical — Do Not Skip) + +Step OUT of the persona. Evaluate each complaint as a product person: + +- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it. +- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it. +- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it. +- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it. + +### Filter Criteria +1. Would a 35-year-old competent-but-busy user have the same complaint? → RED +2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED +3. Is this "I want it to work like paper" resistance to digital? → WHITE +4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED +5. Would fixing this add complexity for the 80% who are fine? → WHITE +6. Does the complaint reveal a missing onboarding moment? → GREEN + +**This filter is MANDATORY.** Never ship raw persona complaints as tickets. + +## Step 5: Create Tickets + +For **RED** and **GREEN** items only: +- Clear, actionable title +- Include the persona's verbatim quote (entertaining + memorable) +- The real UX issue underneath (objective) +- A suggested fix (actionable) +- Tag/label: "ux-review" + +For **YELLOW** items: one catch-all ticket with all notes. + +**WHITE** items appear in the report only. No tickets. + +**Max 10 tickets per session** — focus on the worst issues. + +## Step 6: Report + +Deliver: +1. The persona rant (Step 3) — entertaining and visceral +2. The filtered assessment (Step 4) — pragmatic and actionable +3. Tickets created (Step 5) — with links +4. Screenshots of key issues + +## Tips + +- **One persona per session.** Don't mix perspectives. +- **Stay in character during Steps 2-3.** Break character only at Step 4. +- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages. +- **Empty states are gold.** New user experience reveals the most friction. +- **The best findings are RED items the persona found accidentally** while trying to do something else. +- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways. +- **Run this before demos, launches, or after shipping a batch of features.** +- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives. +- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona. +- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain. +- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage. +- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level. + +## Example Personas by Industry + +These are starting points — customize for your specific product: + +| Product Type | Persona | Age | Key Trait | +|-------------|---------|-----|-----------| +| CRM | Retirement home director | 68 | Filing cabinet is the current CRM | +| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper | +| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups | +| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes | +| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions | +| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls | +| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer | +| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders | + +## Rules + +- Stay in character during Steps 2-3 +- Be genuinely mean but fair — find real problems, not manufactured ones +- The pragmatism filter (Step 4) is **MANDATORY** +- Screenshots required for every complaint +- Max 10 tickets per session +- Test on staging/deployed app, not local dev +- One persona, one session, one report diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md similarity index 100% rename from skills/mcp/mcporter/SKILL.md rename to optional-skills/mcp/mcporter/SKILL.md diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md similarity index 100% rename from skills/mlops/models/clip/SKILL.md rename to optional-skills/mlops/clip/SKILL.md diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md similarity index 100% rename from skills/mlops/models/clip/references/applications.md rename to optional-skills/mlops/clip/references/applications.md diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md similarity index 100% rename from skills/mlops/cloud/modal/SKILL.md rename to optional-skills/mlops/modal/SKILL.md diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md similarity index 100% rename from skills/mlops/cloud/modal/references/advanced-usage.md rename to optional-skills/mlops/modal/references/advanced-usage.md diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md similarity index 100% rename from skills/mlops/cloud/modal/references/troubleshooting.md rename to optional-skills/mlops/modal/references/troubleshooting.md diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md similarity index 100% rename from skills/mlops/training/peft/SKILL.md rename to optional-skills/mlops/peft/SKILL.md diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md similarity index 100% rename from skills/mlops/training/peft/references/advanced-usage.md rename to optional-skills/mlops/peft/references/advanced-usage.md diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md similarity index 100% rename from skills/mlops/training/peft/references/troubleshooting.md rename to optional-skills/mlops/peft/references/troubleshooting.md diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/SKILL.md rename to optional-skills/mlops/pytorch-fsdp/SKILL.md diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/references/index.md rename to optional-skills/mlops/pytorch-fsdp/references/index.md diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/references/other.md rename to optional-skills/mlops/pytorch-fsdp/references/other.md diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md similarity index 100% rename from skills/mlops/models/stable-diffusion/SKILL.md rename to optional-skills/mlops/stable-diffusion/SKILL.md diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md similarity index 100% rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md similarity index 100% rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md similarity index 100% rename from skills/mlops/models/whisper/SKILL.md rename to optional-skills/mlops/whisper/SKILL.md diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md similarity index 100% rename from skills/mlops/models/whisper/references/languages.md rename to optional-skills/mlops/whisper/references/languages.md diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md index c74a369209..6c457592a9 100644 --- a/optional-skills/productivity/telephony/SKILL.md +++ b/optional-skills/productivity/telephony/SKILL.md @@ -7,7 +7,7 @@ license: MIT metadata: hermes: tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting] - related_skills: [find-nearby, google-workspace, agentmail] + related_skills: [maps, google-workspace, agentmail] category: productivity --- diff --git a/optional-skills/research/duckduckgo-search/SKILL.md b/optional-skills/research/duckduckgo-search/SKILL.md index ea14e6b30f..c24fc1b956 100644 --- a/optional-skills/research/duckduckgo-search/SKILL.md +++ b/optional-skills/research/duckduckgo-search/SKILL.md @@ -57,32 +57,32 @@ Use the `ddgs` command via `terminal` when it exists. This is the preferred path ```bash # Text search -ddgs text -k "python async programming" -m 5 +ddgs text -q "python async programming" -m 5 # News search -ddgs news -k "artificial intelligence" -m 5 +ddgs news -q "artificial intelligence" -m 5 # Image search -ddgs images -k "landscape photography" -m 10 +ddgs images -q "landscape photography" -m 10 # Video search -ddgs videos -k "python tutorial" -m 5 +ddgs videos -q "python tutorial" -m 5 # With region filter -ddgs text -k "best restaurants" -m 5 -r us-en +ddgs text -q "best restaurants" -m 5 -r us-en # Recent results only (d=day, w=week, m=month, y=year) -ddgs text -k "latest AI news" -m 5 -t w +ddgs text -q "latest AI news" -m 5 -t w # JSON output for parsing -ddgs text -k "fastapi tutorial" -m 5 -o json +ddgs text -q "fastapi tutorial" -m 5 -o json ``` ### CLI Flags | Flag | Description | Example | |------|-------------|---------| -| `-k` | Keywords (query) — **required** | `-k "search terms"` | +| `-q` | Query — **required** | `-q "search terms"` | | `-m` | Max results | `-m 5` | | `-r` | Region | `-r us-en` | | `-t` | Time limit | `-t w` (week) | @@ -189,7 +189,7 @@ DuckDuckGo returns titles, URLs, and snippets — not full page content. To get CLI example: ```bash -ddgs text -k "fastapi deployment guide" -m 3 -o json +ddgs text -q "fastapi deployment guide" -m 3 -o json ``` Python example, only after verifying `ddgs` is installed in that runtime: @@ -229,7 +229,7 @@ Then extract the best URL with `web_extract` or another content-retrieval tool. - **Do not assume the CLI exists**: Check `command -v ddgs` before using it. - **Do not assume `execute_code` can import `ddgs`**: `from ddgs import DDGS` may fail with `ModuleNotFoundError` unless that runtime was prepared separately. - **Package name**: The package is `ddgs` (previously `duckduckgo-search`). Install with `pip install ddgs`. -- **Don't confuse `-k` and `-m`** (CLI): `-k` is for keywords, `-m` is for max results count. +- **Don't confuse `-q` and `-m`** (CLI): `-q` is for the query, `-m` is for max results count. - **Empty results**: If `ddgs` returns nothing, it may be rate-limited. Wait a few seconds and retry. ## Validated With diff --git a/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh index b33ac8a60d..1553d45968 100755 --- a/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh +++ b/optional-skills/research/duckduckgo-search/scripts/duckduckgo.sh @@ -25,4 +25,4 @@ if ! command -v ddgs &> /dev/null; then exit 1 fi -ddgs text -k "$QUERY" -m "$MAX_RESULTS" +ddgs text -q "$QUERY" -m "$MAX_RESULTS" diff --git a/optional-skills/web-development/DESCRIPTION.md b/optional-skills/web-development/DESCRIPTION.md new file mode 100644 index 0000000000..588817bbca --- /dev/null +++ b/optional-skills/web-development/DESCRIPTION.md @@ -0,0 +1,5 @@ +# Web Development + +Optional skills for client-side web development workflows — embedding agents, copilots, and AI-native UX patterns into user-facing web apps. + +These are distinct from Hermes' own browser automation (Browserbase, Camofox), which operate *on* websites from outside. Web-development skills here help users build *into* their own websites. diff --git a/optional-skills/web-development/page-agent/SKILL.md b/optional-skills/web-development/page-agent/SKILL.md new file mode 100644 index 0000000000..caab19901f --- /dev/null +++ b/optional-skills/web-development/page-agent/SKILL.md @@ -0,0 +1,189 @@ +--- +name: page-agent +description: Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single +``` + +A panel appears. Type an instruction. Done. + +Bookmarklet form (drop into bookmarks bar, click on any page): + +```javascript +javascript:(function(){var s=document.createElement('script');s.src='https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js';document.head.appendChild(s);})(); +``` + +## Path 2 — npm install into your own web app (production use) + +Inside an existing web project (React / Vue / Svelte / plain): + +```bash +npm install page-agent +``` + +Wire it up with your own LLM endpoint — **never ship the demo CDN to real users**: + +```javascript +import { PageAgent } from 'page-agent' + +const agent = new PageAgent({ + model: 'qwen3.5-plus', + baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1', + apiKey: process.env.LLM_API_KEY, // never hardcode + language: 'en-US', +}) + +// Show the panel for end users: +agent.panel.show() + +// Or drive it programmatically: +await agent.execute('Click submit button, then fill username as John') +``` + +Provider examples (any OpenAI-compatible endpoint works): + +| Provider | `baseURL` | `model` | +|----------|-----------|---------| +| Qwen / DashScope | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen3.5-plus` | +| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` | +| Ollama (local) | `http://localhost:11434/v1` | `qwen3:14b` | +| OpenRouter | `https://openrouter.ai/api/v1` | `anthropic/claude-sonnet-4.6` | + +**Key config fields** (passed to `new PageAgent({...})`): + +- `model`, `baseURL`, `apiKey` — LLM connection +- `language` — UI language (`en-US`, `zh-CN`, etc.) +- Allowlist and data-masking hooks exist for locking down what the agent can touch — see https://alibaba.github.io/page-agent/ for the full option list + +**Security.** Don't put your `apiKey` in client-side code for a real deployment — proxy LLM calls through your backend and point `baseURL` at your proxy. The demo CDN exists because alibaba runs that proxy for evaluation. + +## Path 3 — clone the source repo (contributing, or hacking on it) + +Use this when the user wants to modify page-agent itself, test it against arbitrary sites via a local IIFE bundle, or develop the browser extension. + +```bash +git clone https://github.com/alibaba/page-agent.git +cd page-agent +npm ci # exact lockfile install (or `npm i` to allow updates) +``` + +Create `.env` in the repo root with an LLM endpoint. Example: + +``` +LLM_MODEL_NAME=gpt-4o-mini +LLM_API_KEY=sk-... +LLM_BASE_URL=https://api.openai.com/v1 +``` + +Ollama flavor: + +``` +LLM_BASE_URL=http://localhost:11434/v1 +LLM_API_KEY=NA +LLM_MODEL_NAME=qwen3:14b +``` + +Common commands: + +```bash +npm start # docs/website dev server +npm run build # build every package +npm run dev:demo # serve IIFE bundle at http://localhost:5174/page-agent.demo.js +npm run dev:ext # develop the browser extension (WXT + React) +npm run build:ext # build the extension +``` + +**Test on any website** using the local IIFE bundle. Add this bookmarklet: + +```javascript +javascript:(function(){var s=document.createElement('script');s.src=`http://localhost:5174/page-agent.demo.js?t=${Math.random()}`;s.onload=()=>console.log('PageAgent ready!');document.head.appendChild(s);})(); +``` + +Then: `npm run dev:demo`, click the bookmarklet on any page, and the local build injects. Auto-rebuilds on save. + +**Warning:** your `.env` `LLM_API_KEY` is inlined into the IIFE bundle during dev builds. Don't share the bundle. Don't commit it. Don't paste the URL into Slack. (Verified: grepping the public dev bundle returns the literal values from `.env`.) + +## Repo layout (Path 3) + +Monorepo with npm workspaces. Key packages: + +| Package | Path | Purpose | +|---------|------|---------| +| `page-agent` | `packages/page-agent/` | Main entry with UI panel | +| `@page-agent/core` | `packages/core/` | Core agent logic, no UI | +| `@page-agent/mcp` | `packages/mcp/` | MCP server (beta) | +| — | `packages/llms/` | LLM client | +| — | `packages/page-controller/` | DOM ops + visual feedback | +| — | `packages/ui/` | Panel + i18n | +| — | `packages/extension/` | Chrome/Firefox extension | +| — | `packages/website/` | Docs + landing site | + +## Verifying it works + +After Path 1 or Path 2: +1. Open the page in a browser with devtools open +2. You should see a floating panel. If not, check the console for errors (most common: CORS on the LLM endpoint, wrong `baseURL`, or a bad API key) +3. Type a simple instruction matching something visible on the page ("click the Login link") +4. Watch the Network tab — you should see a request to your `baseURL` + +After Path 3: +1. `npm run dev:demo` prints `Accepting connections at http://localhost:5174` +2. `curl -I http://localhost:5174/page-agent.demo.js` returns `HTTP/1.1 200 OK` with `Content-Type: application/javascript` +3. Click the bookmarklet on any site; panel appears + +## Pitfalls + +- **Demo CDN in production** — don't. It's rate-limited, uses alibaba's free proxy, and their terms forbid production use. +- **API key exposure** — any key passed to `new PageAgent({apiKey: ...})` ships in your JS bundle. Always proxy through your own backend for real deployments. +- **Non-OpenAI-compatible endpoints** fail silently or with cryptic errors. If your provider needs native Anthropic/Gemini formatting, use an OpenAI-compatibility proxy (LiteLLM, OpenRouter) in front. +- **CSP blocks** — sites with strict Content-Security-Policy may refuse to load the CDN script or disallow inline eval. In that case, self-host from your origin. +- **Restart dev server** after editing `.env` in Path 3 — Vite only reads env at startup. +- **Node version** — the repo declares `^22.13.0 || >=24`. Node 20 will fail `npm ci` with engine errors. +- **npm 10 vs 11** — docs say npm 11+; npm 10.9 actually works fine. + +## Reference + +- Repo: https://github.com/alibaba/page-agent +- Docs: https://alibaba.github.io/page-agent/ +- License: MIT (built on browser-use's DOM processing internals, Copyright 2024 Gregor Zunic) diff --git a/package-lock.json b/package-lock.json index 9d0ae80cdc..728429e51b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1069,6 +1069,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", @@ -3911,6 +3912,7 @@ "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz", "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==", "license": "Apache-2.0", + "peer": true, "dependencies": { "playwright-core": "1.59.1" }, @@ -3929,6 +3931,7 @@ "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz", "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==", "license": "Apache-2.0", + "peer": true, "bin": { "playwright-core": "cli.js" }, diff --git a/plans/gemini-oauth-provider.md b/plans/gemini-oauth-provider.md index 9953d0eca5..a466183e80 100644 --- a/plans/gemini-oauth-provider.md +++ b/plans/gemini-oauth-provider.md @@ -4,7 +4,7 @@ Add a first-class `gemini` provider that authenticates via Google OAuth, using the standard Gemini API (not Cloud Code Assist). Users who have a Google AI subscription or Gemini API access can authenticate through the browser without needing to manually copy API keys. ## Architecture Decision -- **Path A (chosen):** Standard Gemini API at `generativelanguage.googleapis.com/v1beta/openai/` +- **Path A (chosen):** Standard Gemini API at `generativelanguage.googleapis.com/v1beta` - **NOT Path B:** Cloud Code Assist (`cloudcode-pa.googleapis.com`) — rate-limited free tier, internal API, account ban risk - Standard `chat_completions` api_mode via OpenAI SDK — no new api_mode needed - Our own OAuth credentials — NOT sharing tokens with Gemini CLI @@ -32,9 +32,9 @@ Add a first-class `gemini` provider that authenticates via Google OAuth, using t - File locking for concurrent access (multiple agent sessions) ## API Integration -- Base URL: `https://generativelanguage.googleapis.com/v1beta/openai/` -- Auth: `Authorization: Bearer ` (passed as `api_key` to OpenAI SDK) -- api_mode: `chat_completions` (standard) +- Base URL: `https://generativelanguage.googleapis.com/v1beta` +- Auth: native Gemini API authentication handled by the provider adapter +- api_mode: `chat_completions` (standard facade over native transport) - Models: gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, etc. ## Files to Create/Modify diff --git a/plugins/disk-cleanup/README.md b/plugins/disk-cleanup/README.md new file mode 100644 index 0000000000..bc46047325 --- /dev/null +++ b/plugins/disk-cleanup/README.md @@ -0,0 +1,51 @@ +# disk-cleanup + +Auto-tracks and cleans up ephemeral files created during Hermes Agent +sessions — test scripts, temp outputs, cron logs, stale chrome profiles. +Scoped strictly to `$HERMES_HOME` and `/tmp/hermes-*`. + +Originally contributed by [@LVT382009](https://github.com/LVT382009) as a +skill in PR #12212. Ported to the plugin system so the behaviour runs +automatically via `post_tool_call` and `on_session_end` hooks — the agent +never needs to remember to call a tool. + +## How it works + +| Hook | Behaviour | +|---|---| +| `post_tool_call` | When `write_file` / `terminal` / `patch` creates a file matching `test_*`, `tmp_*`, or `*.test.*` inside `HERMES_HOME`, track it silently as `test` / `temp` / `cron-output`. | +| `on_session_end` | If any test files were auto-tracked during this turn, run `quick` cleanup (no prompts). | + +Deletion rules (same as the original PR): + +| Category | Threshold | Confirmation | +|---|---|---| +| `test` | every session end | Never | +| `temp` | >7 days since tracked | Never | +| `cron-output` | >14 days since tracked | Never | +| empty dirs under HERMES_HOME | always | Never | +| `research` | >30 days, beyond 10 newest | Always (deep only) | +| `chrome-profile` | >14 days since tracked | Always (deep only) | +| files >500 MB | never auto | Always (deep only) | + +## Slash command + +``` +/disk-cleanup status # breakdown + top-10 largest +/disk-cleanup dry-run # preview without deleting +/disk-cleanup quick # run safe cleanup now +/disk-cleanup deep # quick + list items needing prompt +/disk-cleanup track # manual tracking +/disk-cleanup forget # stop tracking +``` + +## Safety + +- `is_safe_path()` rejects anything outside `HERMES_HOME` or `/tmp/hermes-*` +- Windows mounts (`/mnt/c` etc.) are rejected +- The state directory `$HERMES_HOME/disk-cleanup/` is itself excluded +- `$HERMES_HOME/logs/`, `memories/`, `sessions/`, `skills/`, `plugins/`, + and config files are never tracked +- Backup/restore is scoped to `tracked.json` — the plugin never touches + agent logs +- Atomic writes: `.tmp` → backup → rename diff --git a/plugins/disk-cleanup/__init__.py b/plugins/disk-cleanup/__init__.py new file mode 100644 index 0000000000..0a4b6c7ae1 --- /dev/null +++ b/plugins/disk-cleanup/__init__.py @@ -0,0 +1,316 @@ +"""disk-cleanup plugin — auto-cleanup of ephemeral Hermes session files. + +Wires three behaviours: + +1. ``post_tool_call`` hook — inspects ``write_file`` and ``terminal`` + tool results for newly-created paths matching test/temp patterns + under ``HERMES_HOME`` and tracks them silently. Zero agent + compliance required. + +2. ``on_session_end`` hook — when any test files were auto-tracked + during the just-finished turn, runs :func:`disk_cleanup.quick` and + logs a single line to ``$HERMES_HOME/disk-cleanup/cleanup.log``. + +3. ``/disk-cleanup`` slash command — manual ``status``, ``dry-run``, + ``quick``, ``deep``, ``track``, ``forget``. + +Replaces PR #12212's skill-plus-script design: the agent no longer +needs to remember to run commands. +""" + +from __future__ import annotations + +import logging +import re +import shlex +import threading +from pathlib import Path +from typing import Any, Dict, Optional, Set + +from . import disk_cleanup as dg + +logger = logging.getLogger(__name__) + + +# Per-task set of "test files newly tracked this turn". Keyed by task_id +# (or session_id as fallback) so on_session_end can decide whether to run +# cleanup. Guarded by a lock — post_tool_call can fire concurrently on +# parallel tool calls. +_recent_test_tracks: Dict[str, Set[str]] = {} +_lock = threading.Lock() + + +# Tool-call result shapes we can parse +_WRITE_FILE_PATH_KEY = "path" +_TERMINAL_PATH_REGEX = re.compile(r"(?:^|\s)(/[^\s'\"`]+|\~/[^\s'\"`]+)") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _tracker_key(task_id: str, session_id: str) -> str: + return task_id or session_id or "default" + + +def _record_track(task_id: str, session_id: str, path: Path, category: str) -> None: + """Record that we tracked *path* as *category* during this turn.""" + if category != "test": + return + key = _tracker_key(task_id, session_id) + with _lock: + _recent_test_tracks.setdefault(key, set()).add(str(path)) + + +def _drain(task_id: str, session_id: str) -> Set[str]: + """Pop the set of test paths tracked during this turn.""" + key = _tracker_key(task_id, session_id) + with _lock: + return _recent_test_tracks.pop(key, set()) + + +def _attempt_track(path_str: str, task_id: str, session_id: str) -> None: + """Best-effort auto-track. Never raises.""" + try: + p = Path(path_str).expanduser() + except Exception: + return + if not p.exists(): + return + category = dg.guess_category(p) + if category is None: + return + newly = dg.track(str(p), category, silent=True) + if newly: + _record_track(task_id, session_id, p, category) + + +def _extract_paths_from_write_file(args: Dict[str, Any]) -> Set[str]: + path = args.get(_WRITE_FILE_PATH_KEY) + return {path} if isinstance(path, str) and path else set() + + +def _extract_paths_from_patch(args: Dict[str, Any]) -> Set[str]: + # The patch tool creates new files via the `mode="patch"` path too, but + # most of its use is editing existing files — we only care about new + # ephemeral creations, so treat patch conservatively and only pick up + # the single-file `path` arg. Track-then-cleanup is idempotent, so + # re-tracking an already-tracked file is a no-op (dedup in track()). + path = args.get("path") + return {path} if isinstance(path, str) and path else set() + + +def _extract_paths_from_terminal(args: Dict[str, Any], result: str) -> Set[str]: + """Best-effort: pull candidate filesystem paths from a terminal command + and its output, then let ``guess_category`` / ``is_safe_path`` filter. + """ + paths: Set[str] = set() + cmd = args.get("command") or "" + if isinstance(cmd, str) and cmd: + # Tokenise the command — catches `touch /tmp/hermes-x/test_foo.py` + try: + for tok in shlex.split(cmd, posix=True): + if tok.startswith(("/", "~")): + paths.add(tok) + except ValueError: + pass + # Only scan the result text if it's a reasonable size (avoid 50KB dumps). + if isinstance(result, str) and len(result) < 4096: + for match in _TERMINAL_PATH_REGEX.findall(result): + paths.add(match) + return paths + + +# --------------------------------------------------------------------------- +# Hooks +# --------------------------------------------------------------------------- + +def _on_post_tool_call( + tool_name: str = "", + args: Optional[Dict[str, Any]] = None, + result: Any = None, + task_id: str = "", + session_id: str = "", + tool_call_id: str = "", + **_: Any, +) -> None: + """Auto-track ephemeral files created by recent tool calls.""" + if not isinstance(args, dict): + return + + candidates: Set[str] = set() + if tool_name == "write_file": + candidates = _extract_paths_from_write_file(args) + elif tool_name == "patch": + candidates = _extract_paths_from_patch(args) + elif tool_name == "terminal": + candidates = _extract_paths_from_terminal(args, result if isinstance(result, str) else "") + else: + return + + for path_str in candidates: + _attempt_track(path_str, task_id, session_id) + + +def _on_session_end( + session_id: str = "", + completed: bool = True, + interrupted: bool = False, + **_: Any, +) -> None: + """Run quick cleanup if any test files were tracked during this turn.""" + # Drain both task-level and session-level buckets. In practice only one + # is populated per turn; the other is empty. + drained_session = _drain("", session_id) + # Also drain any task-scoped buckets that happen to exist. This is a + # cheap sweep: if an agent spawned subagents (each with their own + # task_id) they'll have recorded into separate buckets; we want to + # cleanup them all at session end. + with _lock: + task_buckets = list(_recent_test_tracks.keys()) + for key in task_buckets: + if key and key != session_id: + _recent_test_tracks.pop(key, None) + + if not drained_session and not task_buckets: + return + + try: + summary = dg.quick() + except Exception as exc: + logger.debug("disk-cleanup quick cleanup failed: %s", exc) + return + + if summary["deleted"] or summary["empty_dirs"]: + dg._log( + f"AUTO_QUICK (session_end): deleted={summary['deleted']} " + f"dirs={summary['empty_dirs']} freed={dg.fmt_size(summary['freed'])}" + ) + + +# --------------------------------------------------------------------------- +# Slash command +# --------------------------------------------------------------------------- + +_HELP_TEXT = """\ +/disk-cleanup — ephemeral-file cleanup + +Subcommands: + status Per-category breakdown + top-10 largest + dry-run Preview what quick/deep would delete + quick Run safe cleanup now (no prompts) + deep Run quick, then list items that need prompts + track Manually add a path to tracking + forget Stop tracking a path (does not delete) + +Categories: temp | test | research | download | chrome-profile | cron-output | other + +All operations are scoped to HERMES_HOME and /tmp/hermes-*. +Test files are auto-tracked on write_file / terminal and auto-cleaned at session end. +""" + + +def _fmt_summary(summary: Dict[str, Any]) -> str: + base = ( + f"[disk-cleanup] Cleaned {summary['deleted']} files + " + f"{summary['empty_dirs']} empty dirs, freed {dg.fmt_size(summary['freed'])}." + ) + if summary.get("errors"): + base += f"\n {len(summary['errors'])} error(s); see cleanup.log." + return base + + +def _handle_slash(raw_args: str) -> Optional[str]: + argv = raw_args.strip().split() + if not argv or argv[0] in ("help", "-h", "--help"): + return _HELP_TEXT + + sub = argv[0] + + if sub == "status": + return dg.format_status(dg.status()) + + if sub == "dry-run": + auto, prompt = dg.dry_run() + auto_size = sum(i["size"] for i in auto) + prompt_size = sum(i["size"] for i in prompt) + lines = [ + "Dry-run preview (nothing deleted):", + f" Auto-delete : {len(auto)} files ({dg.fmt_size(auto_size)})", + ] + for item in auto: + lines.append(f" [{item['category']}] {item['path']}") + lines.append( + f" Needs prompt: {len(prompt)} files ({dg.fmt_size(prompt_size)})" + ) + for item in prompt: + lines.append(f" [{item['category']}] {item['path']}") + lines.append( + f"\n Total potential: {dg.fmt_size(auto_size + prompt_size)}" + ) + return "\n".join(lines) + + if sub == "quick": + return _fmt_summary(dg.quick()) + + if sub == "deep": + # In-session deep can't prompt the user interactively — show what + # quick cleaned plus the items that WOULD need confirmation. + quick_summary = dg.quick() + _auto, prompt_items = dg.dry_run() + lines = [_fmt_summary(quick_summary)] + if prompt_items: + size = sum(i["size"] for i in prompt_items) + lines.append( + f"\n{len(prompt_items)} item(s) need confirmation " + f"({dg.fmt_size(size)}):" + ) + for item in prompt_items: + lines.append(f" [{item['category']}] {item['path']}") + lines.append( + "\nRun `/disk-cleanup forget ` to skip, or delete " + "manually via terminal." + ) + return "\n".join(lines) + + if sub == "track": + if len(argv) < 3: + return "Usage: /disk-cleanup track " + path_arg = argv[1] + category = argv[2] + if category not in dg.ALLOWED_CATEGORIES: + return ( + f"Unknown category '{category}'. " + f"Allowed: {sorted(dg.ALLOWED_CATEGORIES)}" + ) + if dg.track(path_arg, category, silent=True): + return f"Tracked {path_arg} as '{category}'." + return ( + f"Not tracked (already present, missing, or outside HERMES_HOME): " + f"{path_arg}" + ) + + if sub == "forget": + if len(argv) < 2: + return "Usage: /disk-cleanup forget " + n = dg.forget(argv[1]) + return ( + f"Removed {n} tracking entr{'y' if n == 1 else 'ies'} for {argv[1]}." + if n else f"Not found in tracking: {argv[1]}" + ) + + return f"Unknown subcommand: {sub}\n\n{_HELP_TEXT}" + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + +def register(ctx) -> None: + ctx.register_hook("post_tool_call", _on_post_tool_call) + ctx.register_hook("on_session_end", _on_session_end) + ctx.register_command( + "disk-cleanup", + handler=_handle_slash, + description="Track and clean up ephemeral Hermes session files.", + ) diff --git a/plugins/disk-cleanup/disk_cleanup.py b/plugins/disk-cleanup/disk_cleanup.py new file mode 100755 index 0000000000..cef2698316 --- /dev/null +++ b/plugins/disk-cleanup/disk_cleanup.py @@ -0,0 +1,496 @@ +"""disk_cleanup — ephemeral file cleanup for Hermes Agent. + +Library module wrapping the deterministic cleanup rules written by +@LVT382009 in PR #12212. The plugin ``__init__.py`` wires these +functions into ``post_tool_call`` and ``on_session_end`` hooks so +tracking and cleanup happen automatically — the agent never needs to +call a tool or remember a skill. + +Rules: + - test files → delete immediately at task end (age >= 0) + - temp files → delete after 7 days + - cron-output → delete after 14 days + - empty dirs → always delete (under HERMES_HOME) + - research → keep 10 newest, prompt for older (deep only) + - chrome-profile→ prompt after 14 days (deep only) + - >500 MB files → prompt always (deep only) + +Scope: strictly HERMES_HOME and /tmp/hermes-* +Never touches: ~/.hermes/logs/ or any system directory. +""" + +from __future__ import annotations + +import json +import logging +import shutil +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +try: + from hermes_constants import get_hermes_home +except Exception: # pragma: no cover — plugin may load before constants resolves + import os + + def get_hermes_home() -> Path: # type: ignore[no-redef] + val = (os.environ.get("HERMES_HOME") or "").strip() + return Path(val).resolve() if val else (Path.home() / ".hermes").resolve() + + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +def get_state_dir() -> Path: + """State dir — separate from ``$HERMES_HOME/logs/``.""" + return get_hermes_home() / "disk-cleanup" + + +def get_tracked_file() -> Path: + return get_state_dir() / "tracked.json" + + +def get_log_file() -> Path: + """Audit log — intentionally NOT under ``$HERMES_HOME/logs/``.""" + return get_state_dir() / "cleanup.log" + + +# --------------------------------------------------------------------------- +# Path safety +# --------------------------------------------------------------------------- + +def is_safe_path(path: Path) -> bool: + """Accept only paths under HERMES_HOME or ``/tmp/hermes-*``. + + Rejects Windows mounts (``/mnt/c`` etc.) and any system directory. + """ + hermes_home = get_hermes_home() + try: + path.resolve().relative_to(hermes_home) + return True + except (ValueError, OSError): + pass + # Allow /tmp/hermes-* explicitly + parts = path.parts + if len(parts) >= 3 and parts[1] == "tmp" and parts[2].startswith("hermes-"): + return True + return False + + +# --------------------------------------------------------------------------- +# Audit log +# --------------------------------------------------------------------------- + +def _log(message: str) -> None: + try: + log_file = get_log_file() + log_file.parent.mkdir(parents=True, exist_ok=True) + ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + with open(log_file, "a") as f: + f.write(f"[{ts}] {message}\n") + except OSError: + # Never let the audit log break the agent loop. + pass + + +# --------------------------------------------------------------------------- +# tracked.json — atomic read/write, backup scoped to tracked.json only +# --------------------------------------------------------------------------- + +def load_tracked() -> List[Dict[str, Any]]: + """Load tracked.json. Restores from ``.bak`` on corruption.""" + tf = get_tracked_file() + tf.parent.mkdir(parents=True, exist_ok=True) + + if not tf.exists(): + return [] + + try: + return json.loads(tf.read_text()) + except (json.JSONDecodeError, ValueError): + bak = tf.with_suffix(".json.bak") + if bak.exists(): + try: + data = json.loads(bak.read_text()) + _log("WARN: tracked.json corrupted — restored from .bak") + return data + except Exception: + pass + _log("WARN: tracked.json corrupted, no backup — starting fresh") + return [] + + +def save_tracked(tracked: List[Dict[str, Any]]) -> None: + """Atomic write: ``.tmp`` → backup old → rename.""" + tf = get_tracked_file() + tf.parent.mkdir(parents=True, exist_ok=True) + tmp = tf.with_suffix(".json.tmp") + tmp.write_text(json.dumps(tracked, indent=2)) + if tf.exists(): + shutil.copy2(tf, tf.with_suffix(".json.bak")) + tmp.replace(tf) + + +# --------------------------------------------------------------------------- +# Categories +# --------------------------------------------------------------------------- + +ALLOWED_CATEGORIES = { + "temp", "test", "research", "download", + "chrome-profile", "cron-output", "other", +} + + +def fmt_size(n: float) -> str: + for unit in ("B", "KB", "MB", "GB", "TB"): + if n < 1024: + return f"{n:.1f} {unit}" + n /= 1024 + return f"{n:.1f} PB" + + +# --------------------------------------------------------------------------- +# Track / forget +# --------------------------------------------------------------------------- + +def track(path_str: str, category: str, silent: bool = False) -> bool: + """Register a file for tracking. Returns True if newly tracked.""" + if category not in ALLOWED_CATEGORIES: + _log(f"WARN: unknown category '{category}', using 'other'") + category = "other" + + path = Path(path_str).resolve() + + if not path.exists(): + _log(f"SKIP: {path} (does not exist)") + return False + + if not is_safe_path(path): + _log(f"REJECT: {path} (outside HERMES_HOME)") + return False + + size = path.stat().st_size if path.is_file() else 0 + tracked = load_tracked() + + # Deduplicate + if any(item["path"] == str(path) for item in tracked): + return False + + tracked.append({ + "path": str(path), + "timestamp": datetime.now(timezone.utc).isoformat(), + "category": category, + "size": size, + }) + save_tracked(tracked) + _log(f"TRACKED: {path} ({category}, {fmt_size(size)})") + if not silent: + print(f"Tracked: {path} ({category}, {fmt_size(size)})") + return True + + +def forget(path_str: str) -> int: + """Remove a path from tracking without deleting the file.""" + p = Path(path_str).resolve() + tracked = load_tracked() + before = len(tracked) + tracked = [i for i in tracked if Path(i["path"]).resolve() != p] + removed = before - len(tracked) + if removed: + save_tracked(tracked) + _log(f"FORGOT: {p} ({removed} entries)") + return removed + + +# --------------------------------------------------------------------------- +# Dry run +# --------------------------------------------------------------------------- + +def dry_run() -> Tuple[List[Dict], List[Dict]]: + """Return (auto_delete_list, needs_prompt_list) without touching files.""" + tracked = load_tracked() + now = datetime.now(timezone.utc) + + auto: List[Dict] = [] + prompt: List[Dict] = [] + + for item in tracked: + p = Path(item["path"]) + if not p.exists(): + continue + age = (now - datetime.fromisoformat(item["timestamp"])).days + cat = item["category"] + size = item["size"] + + if cat == "test": + auto.append(item) + elif cat == "temp" and age > 7: + auto.append(item) + elif cat == "cron-output" and age > 14: + auto.append(item) + elif cat == "research" and age > 30: + prompt.append(item) + elif cat == "chrome-profile" and age > 14: + prompt.append(item) + elif size > 500 * 1024 * 1024: + prompt.append(item) + + return auto, prompt + + +# --------------------------------------------------------------------------- +# Quick cleanup +# --------------------------------------------------------------------------- + +def quick() -> Dict[str, Any]: + """Safe deterministic cleanup — no prompts. + + Returns: ``{"deleted": N, "empty_dirs": N, "freed": bytes, + "errors": [str, ...]}``. + """ + tracked = load_tracked() + now = datetime.now(timezone.utc) + deleted = 0 + freed = 0 + new_tracked: List[Dict] = [] + errors: List[str] = [] + + for item in tracked: + p = Path(item["path"]) + cat = item["category"] + + if not p.exists(): + _log(f"STALE: {p} (removed from tracking)") + continue + + age = (now - datetime.fromisoformat(item["timestamp"])).days + + should_delete = ( + cat == "test" + or (cat == "temp" and age > 7) + or (cat == "cron-output" and age > 14) + ) + + if should_delete: + try: + if p.is_file(): + p.unlink() + elif p.is_dir(): + shutil.rmtree(p) + freed += item["size"] + deleted += 1 + _log(f"DELETED: {p} ({cat}, {fmt_size(item['size'])})") + except OSError as e: + _log(f"ERROR deleting {p}: {e}") + errors.append(f"{p}: {e}") + new_tracked.append(item) + else: + new_tracked.append(item) + + # Remove empty dirs under HERMES_HOME (but leave HERMES_HOME itself and + # a short list of well-known top-level state dirs alone — a fresh install + # has these empty, and deleting them would surprise the user). + hermes_home = get_hermes_home() + _PROTECTED_TOP_LEVEL = { + "logs", "memories", "sessions", "cron", "cronjobs", + "cache", "skills", "plugins", "disk-cleanup", "optional-skills", + "hermes-agent", "backups", "profiles", ".worktrees", + } + empty_removed = 0 + try: + for dirpath in sorted(hermes_home.rglob("*"), reverse=True): + if not dirpath.is_dir() or dirpath == hermes_home: + continue + try: + rel_parts = dirpath.relative_to(hermes_home).parts + except ValueError: + continue + # Skip the well-known top-level state dirs themselves. + if len(rel_parts) == 1 and rel_parts[0] in _PROTECTED_TOP_LEVEL: + continue + try: + if not any(dirpath.iterdir()): + dirpath.rmdir() + empty_removed += 1 + _log(f"DELETED: {dirpath} (empty dir)") + except OSError: + pass + except OSError: + pass + + save_tracked(new_tracked) + _log( + f"QUICK_SUMMARY: {deleted} files, {empty_removed} dirs, " + f"{fmt_size(freed)}" + ) + return { + "deleted": deleted, + "empty_dirs": empty_removed, + "freed": freed, + "errors": errors, + } + + +# --------------------------------------------------------------------------- +# Deep cleanup (interactive — not called from plugin hooks) +# --------------------------------------------------------------------------- + +def deep( + confirm: Optional[callable] = None, +) -> Dict[str, Any]: + """Deep cleanup. + + Runs :func:`quick` first, then asks the *confirm* callable for each + risky item (research > 30d beyond 10 newest, chrome-profile > 14d, + any file > 500 MB). *confirm(item)* must return True to delete. + + Returns: ``{"quick": {...}, "deep_deleted": N, "deep_freed": bytes}``. + """ + quick_result = quick() + + if confirm is None: + # No interactive confirmer — deep stops after the quick pass. + return {"quick": quick_result, "deep_deleted": 0, "deep_freed": 0} + + tracked = load_tracked() + now = datetime.now(timezone.utc) + research, chrome, large = [], [], [] + + for item in tracked: + p = Path(item["path"]) + if not p.exists(): + continue + age = (now - datetime.fromisoformat(item["timestamp"])).days + cat = item["category"] + + if cat == "research" and age > 30: + research.append(item) + elif cat == "chrome-profile" and age > 14: + chrome.append(item) + elif item["size"] > 500 * 1024 * 1024: + large.append(item) + + research.sort(key=lambda x: x["timestamp"], reverse=True) + old_research = research[10:] + + freed, count = 0, 0 + to_remove: List[Dict] = [] + + for group in (old_research, chrome, large): + for item in group: + if confirm(item): + try: + p = Path(item["path"]) + if p.is_file(): + p.unlink() + elif p.is_dir(): + shutil.rmtree(p) + to_remove.append(item) + freed += item["size"] + count += 1 + _log( + f"DELETED: {p} ({item['category']}, " + f"{fmt_size(item['size'])})" + ) + except OSError as e: + _log(f"ERROR deleting {item['path']}: {e}") + + if to_remove: + remove_paths = {i["path"] for i in to_remove} + save_tracked([i for i in tracked if i["path"] not in remove_paths]) + + return {"quick": quick_result, "deep_deleted": count, "deep_freed": freed} + + +# --------------------------------------------------------------------------- +# Status +# --------------------------------------------------------------------------- + +def status() -> Dict[str, Any]: + """Return per-category breakdown and top 10 largest tracked files.""" + tracked = load_tracked() + cats: Dict[str, Dict] = {} + for item in tracked: + c = item["category"] + cats.setdefault(c, {"count": 0, "size": 0}) + cats[c]["count"] += 1 + cats[c]["size"] += item["size"] + + existing = [ + (i["path"], i["size"], i["category"]) + for i in tracked if Path(i["path"]).exists() + ] + existing.sort(key=lambda x: x[1], reverse=True) + + return { + "categories": cats, + "top10": existing[:10], + "total_tracked": len(tracked), + } + + +def format_status(s: Dict[str, Any]) -> str: + """Human-readable status string (for slash command output).""" + lines = [f"{'Category':<20} {'Files':>6} {'Size':>10}", "-" * 40] + cats = s["categories"] + for cat, d in sorted(cats.items(), key=lambda x: x[1]["size"], reverse=True): + lines.append(f"{cat:<20} {d['count']:>6} {fmt_size(d['size']):>10}") + + if not cats: + lines.append("(nothing tracked yet)") + + lines.append("") + lines.append("Top 10 largest tracked files:") + if not s["top10"]: + lines.append(" (none)") + else: + for rank, (path, size, cat) in enumerate(s["top10"], 1): + lines.append(f" {rank:>2}. {fmt_size(size):>8} [{cat}] {path}") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Auto-categorisation from tool-call inspection +# --------------------------------------------------------------------------- + +_TEST_PATTERNS = ("test_", "tmp_") +_TEST_SUFFIXES = (".test.py", ".test.js", ".test.ts", ".test.md") + + +def guess_category(path: Path) -> Optional[str]: + """Return a category label for *path*, or None if we shouldn't track it. + + Used by the ``post_tool_call`` hook to auto-track ephemeral files. + """ + if not is_safe_path(path): + return None + + # Skip the state dir itself, logs, memory files, sessions, config. + hermes_home = get_hermes_home() + try: + rel = path.resolve().relative_to(hermes_home) + top = rel.parts[0] if rel.parts else "" + if top in { + "disk-cleanup", "logs", "memories", "sessions", "config.yaml", + "skills", "plugins", ".env", "USER.md", "MEMORY.md", "SOUL.md", + "auth.json", "hermes-agent", + }: + return None + if top == "cron" or top == "cronjobs": + return "cron-output" + if top == "cache": + return "temp" + except ValueError: + # Path isn't under HERMES_HOME (e.g. /tmp/hermes-*) — fall through. + pass + + name = path.name + if name.startswith(_TEST_PATTERNS): + return "test" + if any(name.endswith(sfx) for sfx in _TEST_SUFFIXES): + return "test" + return None diff --git a/plugins/disk-cleanup/plugin.yaml b/plugins/disk-cleanup/plugin.yaml new file mode 100644 index 0000000000..fe005c8849 --- /dev/null +++ b/plugins/disk-cleanup/plugin.yaml @@ -0,0 +1,7 @@ +name: disk-cleanup +version: 2.0.0 +description: "Auto-track and clean up ephemeral files (test scripts, temp outputs, cron logs) created during Hermes sessions. Runs via plugin hooks — no agent action required." +author: "@LVT382009 (original), NousResearch (plugin port)" +hooks: + - post_tool_call + - on_session_end diff --git a/plugins/image_gen/openai/__init__.py b/plugins/image_gen/openai/__init__.py new file mode 100644 index 0000000000..c1a719f910 --- /dev/null +++ b/plugins/image_gen/openai/__init__.py @@ -0,0 +1,303 @@ +"""OpenAI image generation backend. + +Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an +:class:`ImageGenProvider` implementation. The tiers are implemented as +three virtual model IDs so the ``hermes tools`` model picker and the +``image_gen.model`` config key behave like any other multi-model backend: + + gpt-image-2-low ~15s fastest, good for iteration + gpt-image-2-medium ~40s default — balanced + gpt-image-2-high ~2min slowest, highest fidelity + +All three hit the same underlying API model (``gpt-image-2``) with a +different ``quality`` parameter. Output is base64 JSON → saved under +``$HERMES_HOME/cache/images/``. + +Selection precedence (first hit wins): + +1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests) +2. ``image_gen.openai.model`` in ``config.yaml`` +3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs) +4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium`` +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict, List, Optional, Tuple + +from agent.image_gen_provider import ( + DEFAULT_ASPECT_RATIO, + ImageGenProvider, + error_response, + resolve_aspect_ratio, + save_b64_image, + success_response, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Model catalog +# --------------------------------------------------------------------------- +# +# All three IDs resolve to the same underlying API model with a different +# ``quality`` setting. ``api_model`` is what gets sent to OpenAI; +# ``quality`` is the knob that changes generation time and output fidelity. + +API_MODEL = "gpt-image-2" + +_MODELS: Dict[str, Dict[str, Any]] = { + "gpt-image-2-low": { + "display": "GPT Image 2 (Low)", + "speed": "~15s", + "strengths": "Fast iteration, lowest cost", + "quality": "low", + }, + "gpt-image-2-medium": { + "display": "GPT Image 2 (Medium)", + "speed": "~40s", + "strengths": "Balanced — default", + "quality": "medium", + }, + "gpt-image-2-high": { + "display": "GPT Image 2 (High)", + "speed": "~2min", + "strengths": "Highest fidelity, strongest prompt adherence", + "quality": "high", + }, +} + +DEFAULT_MODEL = "gpt-image-2-medium" + +_SIZES = { + "landscape": "1536x1024", + "square": "1024x1024", + "portrait": "1024x1536", +} + + +def _load_openai_config() -> Dict[str, Any]: + """Read ``image_gen`` from config.yaml (returns {} on any failure).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get("image_gen") if isinstance(cfg, dict) else None + return section if isinstance(section, dict) else {} + except Exception as exc: + logger.debug("Could not load image_gen config: %s", exc) + return {} + + +def _resolve_model() -> Tuple[str, Dict[str, Any]]: + """Decide which tier to use and return ``(model_id, meta)``.""" + env_override = os.environ.get("OPENAI_IMAGE_MODEL") + if env_override and env_override in _MODELS: + return env_override, _MODELS[env_override] + + cfg = _load_openai_config() + openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {} + candidate: Optional[str] = None + if isinstance(openai_cfg, dict): + value = openai_cfg.get("model") + if isinstance(value, str) and value in _MODELS: + candidate = value + if candidate is None: + top = cfg.get("model") + if isinstance(top, str) and top in _MODELS: + candidate = top + + if candidate is not None: + return candidate, _MODELS[candidate] + + return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL] + + +# --------------------------------------------------------------------------- +# Provider +# --------------------------------------------------------------------------- + + +class OpenAIImageGenProvider(ImageGenProvider): + """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high.""" + + @property + def name(self) -> str: + return "openai" + + @property + def display_name(self) -> str: + return "OpenAI" + + def is_available(self) -> bool: + if not os.environ.get("OPENAI_API_KEY"): + return False + try: + import openai # noqa: F401 + except ImportError: + return False + return True + + def list_models(self) -> List[Dict[str, Any]]: + return [ + { + "id": model_id, + "display": meta["display"], + "speed": meta["speed"], + "strengths": meta["strengths"], + "price": "varies", + } + for model_id, meta in _MODELS.items() + ] + + def default_model(self) -> Optional[str]: + return DEFAULT_MODEL + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "OpenAI", + "badge": "paid", + "tag": "gpt-image-2 at low/medium/high quality tiers", + "env_vars": [ + { + "key": "OPENAI_API_KEY", + "prompt": "OpenAI API key", + "url": "https://platform.openai.com/api-keys", + }, + ], + } + + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + prompt = (prompt or "").strip() + aspect = resolve_aspect_ratio(aspect_ratio) + + if not prompt: + return error_response( + error="Prompt is required and must be a non-empty string", + error_type="invalid_argument", + provider="openai", + aspect_ratio=aspect, + ) + + if not os.environ.get("OPENAI_API_KEY"): + return error_response( + error=( + "OPENAI_API_KEY not set. Run `hermes tools` → Image " + "Generation → OpenAI to configure, or `hermes setup` " + "to add the key." + ), + error_type="auth_required", + provider="openai", + aspect_ratio=aspect, + ) + + try: + import openai + except ImportError: + return error_response( + error="openai Python package not installed (pip install openai)", + error_type="missing_dependency", + provider="openai", + aspect_ratio=aspect, + ) + + tier_id, meta = _resolve_model() + size = _SIZES.get(aspect, _SIZES["square"]) + + # gpt-image-2 returns b64_json unconditionally and REJECTS + # ``response_format`` as an unknown parameter. Don't send it. + payload: Dict[str, Any] = { + "model": API_MODEL, + "prompt": prompt, + "size": size, + "n": 1, + "quality": meta["quality"], + } + + try: + client = openai.OpenAI() + response = client.images.generate(**payload) + except Exception as exc: + logger.debug("OpenAI image generation failed", exc_info=True) + return error_response( + error=f"OpenAI image generation failed: {exc}", + error_type="api_error", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + data = getattr(response, "data", None) or [] + if not data: + return error_response( + error="OpenAI returned no image data", + error_type="empty_response", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + first = data[0] + b64 = getattr(first, "b64_json", None) + url = getattr(first, "url", None) + revised_prompt = getattr(first, "revised_prompt", None) + + if b64: + try: + saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}") + except Exception as exc: + return error_response( + error=f"Could not save image to cache: {exc}", + error_type="io_error", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + image_ref = str(saved_path) + elif url: + # Defensive — gpt-image-2 returns b64 today, but fall back + # gracefully if the API ever changes. + image_ref = url + else: + return error_response( + error="OpenAI response contained neither b64_json nor URL", + error_type="empty_response", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]} + if revised_prompt: + extra["revised_prompt"] = revised_prompt + + return success_response( + image=image_ref, + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + provider="openai", + extra=extra, + ) + + +# --------------------------------------------------------------------------- +# Plugin entry point +# --------------------------------------------------------------------------- + + +def register(ctx) -> None: + """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry.""" + ctx.register_image_gen_provider(OpenAIImageGenProvider()) diff --git a/plugins/image_gen/openai/plugin.yaml b/plugins/image_gen/openai/plugin.yaml new file mode 100644 index 0000000000..18e4d86390 --- /dev/null +++ b/plugins/image_gen/openai/plugin.yaml @@ -0,0 +1,7 @@ +name: openai +version: 1.0.0 +description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/." +author: NousResearch +kind: backend +requires_env: + - OPENAI_API_KEY diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md index 024a993031..3fbdc2aba4 100644 --- a/plugins/memory/hindsight/README.md +++ b/plugins/memory/hindsight/README.md @@ -84,7 +84,10 @@ Config file: `~/.hermes/hindsight/config.json` | `retain_async` | `true` | Process retain asynchronously on the Hindsight server | | `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) | | `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories | -| `tags` | — | Tags applied when storing memories | +| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags | +| `retain_source` | — | Optional `metadata.source` attached to retained memories | +| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts | +| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts | ### Integration @@ -113,7 +116,7 @@ Available in `hybrid` and `tools` memory modes: | Tool | Description | |------|-------------| -| `hindsight_retain` | Store information with auto entity extraction | +| `hindsight_retain` | Store information with auto entity extraction; supports optional per-call `tags` | | `hindsight_recall` | Multi-strategy search (semantic + entity graph) | | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) | diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py index c39679b73c..2b233e265c 100644 --- a/plugins/memory/hindsight/__init__.py +++ b/plugins/memory/hindsight/__init__.py @@ -6,11 +6,15 @@ retrieval. Supports cloud (API key) and local modes. Original PR #1811 by benfrank241, adapted to MemoryProvider ABC. Config via environment variables: - HINDSIGHT_API_KEY — API key for Hindsight Cloud - HINDSIGHT_BANK_ID — memory bank identifier (default: hermes) - HINDSIGHT_BUDGET — recall budget: low/mid/high (default: mid) - HINDSIGHT_API_URL — API endpoint - HINDSIGHT_MODE — cloud or local (default: cloud) + HINDSIGHT_API_KEY — API key for Hindsight Cloud + HINDSIGHT_BANK_ID — memory bank identifier (default: hermes) + HINDSIGHT_BUDGET — recall budget: low/mid/high (default: mid) + HINDSIGHT_API_URL — API endpoint + HINDSIGHT_MODE — cloud or local (default: cloud) + HINDSIGHT_RETAIN_TAGS — comma-separated tags attached to retained memories + HINDSIGHT_RETAIN_SOURCE — metadata source value attached to retained memories + HINDSIGHT_RETAIN_USER_PREFIX — label used before user turns in retained transcripts + HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to ~/.hindsight/config.json (legacy, shared) for backward compatibility. @@ -24,7 +28,7 @@ import logging import os import threading -from hermes_constants import get_hermes_home +from datetime import datetime, timezone from typing import Any, Dict, List from agent.memory_provider import MemoryProvider @@ -99,6 +103,11 @@ RETAIN_SCHEMA = { "properties": { "content": {"type": "string", "description": "The information to store."}, "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."}, + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": "Optional per-call tags to merge with configured default retain tags.", + }, }, "required": ["content"], }, @@ -168,6 +177,10 @@ def _load_config() -> dict: return { "mode": os.environ.get("HINDSIGHT_MODE", "cloud"), "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""), + "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""), + "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""), + "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"), + "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"), "banks": { "hermes": { "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"), @@ -178,6 +191,48 @@ def _load_config() -> dict: } +def _normalize_retain_tags(value: Any) -> List[str]: + """Normalize tag config/tool values to a deduplicated list of strings.""" + if value is None: + return [] + + raw_items: list[Any] + if isinstance(value, list): + raw_items = value + elif isinstance(value, str): + text = value.strip() + if not text: + return [] + if text.startswith("["): + try: + parsed = json.loads(text) + except Exception: + parsed = None + if isinstance(parsed, list): + raw_items = parsed + else: + raw_items = text.split(",") + else: + raw_items = text.split(",") + else: + raw_items = [value] + + normalized = [] + seen = set() + for item in raw_items: + tag = str(item).strip() + if not tag or tag in seen: + continue + seen.add(tag) + normalized.append(tag) + return normalized + + +def _utc_timestamp() -> str: + """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix.""" + return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z") + + # --------------------------------------------------------------------------- # MemoryProvider implementation # --------------------------------------------------------------------------- @@ -195,6 +250,19 @@ class HindsightMemoryProvider(MemoryProvider): self._llm_base_url = "" self._memory_mode = "hybrid" # "context", "tools", or "hybrid" self._prefetch_method = "recall" # "recall" or "reflect" + self._retain_tags: List[str] = [] + self._retain_source = "" + self._retain_user_prefix = "User" + self._retain_assistant_prefix = "Assistant" + self._platform = "" + self._user_id = "" + self._user_name = "" + self._chat_id = "" + self._chat_name = "" + self._chat_type = "" + self._thread_id = "" + self._agent_identity = "" + self._turn_index = 0 self._client = None self._prefetch_result = "" self._prefetch_lock = threading.Lock() @@ -210,6 +278,7 @@ class HindsightMemoryProvider(MemoryProvider): # Retain controls self._auto_retain = True self._retain_every_n_turns = 1 + self._retain_async = True self._retain_context = "conversation between Hermes Agent and the User" self._turn_counter = 0 self._session_turns: list[str] = [] # accumulates ALL turns for the session @@ -224,7 +293,6 @@ class HindsightMemoryProvider(MemoryProvider): # Bank self._bank_mission = "" self._bank_retain_mission: str | None = None - self._retain_async = True @property def name(self) -> str: @@ -423,7 +491,10 @@ class HindsightMemoryProvider(MemoryProvider): {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]}, {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]}, {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]}, - {"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""}, + {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""}, + {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""}, + {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"}, + {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"}, {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""}, {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]}, {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True}, @@ -467,7 +538,7 @@ class HindsightMemoryProvider(MemoryProvider): return self._client def initialize(self, session_id: str, **kwargs) -> None: - self._session_id = session_id + self._session_id = str(session_id or "").strip() # Check client version and auto-upgrade if needed try: @@ -496,6 +567,16 @@ class HindsightMemoryProvider(MemoryProvider): pass # packaging not available or other issue — proceed anyway self._config = _load_config() + self._platform = str(kwargs.get("platform") or "").strip() + self._user_id = str(kwargs.get("user_id") or "").strip() + self._user_name = str(kwargs.get("user_name") or "").strip() + self._chat_id = str(kwargs.get("chat_id") or "").strip() + self._chat_name = str(kwargs.get("chat_name") or "").strip() + self._chat_type = str(kwargs.get("chat_type") or "").strip() + self._thread_id = str(kwargs.get("thread_id") or "").strip() + self._agent_identity = str(kwargs.get("agent_identity") or "").strip() + self._turn_index = 0 + self._session_turns = [] self._mode = self._config.get("mode", "cloud") # "local" is a legacy alias for "local_embedded" if self._mode == "local": @@ -513,7 +594,7 @@ class HindsightMemoryProvider(MemoryProvider): memory_mode = self._config.get("memory_mode", "hybrid") self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid" - prefetch_method = self._config.get("recall_prefetch_method", "recall") + prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall") self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall" # Bank options @@ -521,9 +602,22 @@ class HindsightMemoryProvider(MemoryProvider): self._bank_retain_mission = self._config.get("bank_retain_mission") or None # Tags - self._tags = self._config.get("tags") or None + self._retain_tags = _normalize_retain_tags( + self._config.get("retain_tags") + or os.environ.get("HINDSIGHT_RETAIN_TAGS", "") + ) + self._tags = self._retain_tags or None self._recall_tags = self._config.get("recall_tags") or None self._recall_tags_match = self._config.get("recall_tags_match", "any") + self._retain_source = str( + self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "") + ).strip() + self._retain_user_prefix = str( + self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User") + ).strip() or "User" + self._retain_assistant_prefix = str( + self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant") + ).strip() or "Assistant" # Retain controls self._auto_retain = self._config.get("auto_retain", True) @@ -547,11 +641,9 @@ class HindsightMemoryProvider(MemoryProvider): logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s", self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version) logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, " - "retain_async=%s, retain_context=%s, " - "recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s", + "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s", self._auto_retain, self._auto_recall, self._retain_every_n_turns, - self._retain_async, self._retain_context, - self._recall_max_tokens, self._recall_max_input_chars, + self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars, self._tags, self._recall_tags) # For local mode, start the embedded daemon in the background so it @@ -712,6 +804,78 @@ class HindsightMemoryProvider(MemoryProvider): self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch") self._prefetch_thread.start() + def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]: + now = datetime.now(timezone.utc).isoformat() + return [ + { + "role": "user", + "content": f"{self._retain_user_prefix}: {user_content}", + "timestamp": now, + }, + { + "role": "assistant", + "content": f"{self._retain_assistant_prefix}: {assistant_content}", + "timestamp": now, + }, + ] + + def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]: + metadata: Dict[str, str] = { + "retained_at": _utc_timestamp(), + "message_count": str(message_count), + "turn_index": str(turn_index), + } + if self._retain_source: + metadata["source"] = self._retain_source + if self._session_id: + metadata["session_id"] = self._session_id + if self._platform: + metadata["platform"] = self._platform + if self._user_id: + metadata["user_id"] = self._user_id + if self._user_name: + metadata["user_name"] = self._user_name + if self._chat_id: + metadata["chat_id"] = self._chat_id + if self._chat_name: + metadata["chat_name"] = self._chat_name + if self._chat_type: + metadata["chat_type"] = self._chat_type + if self._thread_id: + metadata["thread_id"] = self._thread_id + if self._agent_identity: + metadata["agent_identity"] = self._agent_identity + return metadata + + def _build_retain_kwargs( + self, + content: str, + *, + context: str | None = None, + document_id: str | None = None, + metadata: Dict[str, str] | None = None, + tags: List[str] | None = None, + retain_async: bool | None = None, + ) -> Dict[str, Any]: + kwargs: Dict[str, Any] = { + "bank_id": self._bank_id, + "content": content, + "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index), + } + if context is not None: + kwargs["context"] = context + if document_id: + kwargs["document_id"] = document_id + if retain_async is not None: + kwargs["retain_async"] = retain_async + merged_tags = _normalize_retain_tags(self._retain_tags) + for tag in _normalize_retain_tags(tags): + if tag not in merged_tags: + merged_tags.append(tag) + if merged_tags: + kwargs["tags"] = merged_tags + return kwargs + def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None: """Retain conversation turn in background (non-blocking). @@ -721,19 +885,14 @@ class HindsightMemoryProvider(MemoryProvider): logger.debug("sync_turn: skipped (auto_retain disabled)") return - from datetime import datetime, timezone - now = datetime.now(timezone.utc).isoformat() + if session_id: + self._session_id = str(session_id).strip() - messages = [ - {"role": "user", "content": user_content, "timestamp": now}, - {"role": "assistant", "content": assistant_content, "timestamp": now}, - ] - - turn = json.dumps(messages) + turn = json.dumps(self._build_turn_messages(user_content, assistant_content)) self._session_turns.append(turn) self._turn_counter += 1 + self._turn_index = self._turn_counter - # Only retain every N turns if self._turn_counter % self._retain_every_n_turns != 0: logger.debug("sync_turn: buffered turn %d (will retain at turn %d)", self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns)) @@ -741,19 +900,21 @@ class HindsightMemoryProvider(MemoryProvider): logger.debug("sync_turn: retaining %d turns, total session content %d chars", len(self._session_turns), sum(len(t) for t in self._session_turns)) - # Send the ENTIRE session as a single JSON array (document_id deduplicates). - # Each element in _session_turns is a JSON string of that turn's messages. content = "[" + ",".join(self._session_turns) + "]" def _sync(): try: client = self._get_client() - item: dict = { - "content": content, - "context": self._retain_context, - } - if self._tags: - item["tags"] = self._tags + item = self._build_retain_kwargs( + content, + context=self._retain_context, + metadata=self._build_metadata( + message_count=len(self._session_turns) * 2, + turn_index=self._turn_index, + ), + ) + item.pop("bank_id", None) + item.pop("retain_async", None) logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d", self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns)) _run_sync(client.aretain_batch( @@ -789,11 +950,11 @@ class HindsightMemoryProvider(MemoryProvider): return tool_error("Missing required parameter: content") context = args.get("context") try: - retain_kwargs: dict = { - "bank_id": self._bank_id, "content": content, "context": context, - } - if self._tags: - retain_kwargs["tags"] = self._tags + retain_kwargs = self._build_retain_kwargs( + content, + context=context, + tags=args.get("tags"), + ) logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s", self._bank_id, len(content), context) _run_sync(client.aretain(**retain_kwargs)) diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index ca44ce6019..6ca32c1dcb 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -19,6 +19,7 @@ import json import logging import re import threading +import time from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider @@ -206,13 +207,19 @@ class HonchoMemoryProvider(MemoryProvider): self._turn_count = 0 self._injection_frequency = "every-turn" # or "first-turn" self._context_cadence = 1 # minimum turns between context API calls - self._dialectic_cadence = 3 # minimum turns between dialectic API calls + self._dialectic_cadence = 1 # backwards-compat fallback; wizard writes 2 on new configs self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3) self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels - self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "medium", "high" + self._reasoning_heuristic: bool = True # scale base level by query length + self._reasoning_level_cap: str = "high" # ceiling for auto-selected level self._last_context_turn = -999 self._last_dialectic_turn = -999 + # Liveness + observability state + self._prefetch_thread_started_at: float = 0.0 # monotonic ts of current thread + self._prefetch_result_fired_at: int = -999 # turn the pending result was fired at + self._dialectic_empty_streak: int = 0 # consecutive empty returns + # Port #1957: lazy session init for tools-only mode self._session_initialized = False self._lazy_init_kwargs: Optional[dict] = None @@ -286,14 +293,6 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho not configured — plugin inactive") return - # Override peer_name with gateway user_id for per-user memory scoping. - # Only when no explicit peerName was configured — an explicit peerName - # means the user chose their identity; a raw user_id (e.g. Telegram - # chat ID) should not silently replace it. - _gw_user_id = kwargs.get("user_id") - if _gw_user_id and not cfg.peer_name: - cfg.peer_name = _gw_user_id - self._config = cfg # ----- B1: recall_mode from config ----- @@ -305,12 +304,16 @@ class HonchoMemoryProvider(MemoryProvider): raw = cfg.raw or {} self._injection_frequency = raw.get("injectionFrequency", "every-turn") self._context_cadence = int(raw.get("contextCadence", 1)) - self._dialectic_cadence = int(raw.get("dialecticCadence", 3)) + # Backwards-compat: unset dialecticCadence falls back to 1 + # (every turn) so existing honcho.json configs without the key + # behave as they did before. New setups via `hermes honcho setup` + # get dialecticCadence=2 written explicitly by the wizard. + self._dialectic_cadence = int(raw.get("dialecticCadence", 1)) self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3)) self._dialectic_depth_levels = cfg.dialectic_depth_levels - cap = raw.get("reasoningLevelCap") - if cap and cap in ("minimal", "low", "medium", "high"): - self._reasoning_level_cap = cap + self._reasoning_heuristic = cfg.reasoning_heuristic + if cfg.reasoning_level_cap in self._LEVEL_ORDER: + self._reasoning_level_cap = cfg.reasoning_level_cap except Exception as e: logger.debug("Honcho cost-awareness config parse error: %s", e) @@ -352,6 +355,7 @@ class HonchoMemoryProvider(MemoryProvider): honcho=client, config=cfg, context_tokens=cfg.context_tokens, + runtime_user_peer_name=kwargs.get("user_id") or None, ) # ----- B3: resolve_session_name ----- @@ -391,14 +395,45 @@ class HonchoMemoryProvider(MemoryProvider): except Exception as e: logger.debug("Honcho memory file migration skipped: %s", e) - # ----- B7: Pre-warming context at init ----- + # ----- B7: Pre-warming at init ----- + # Context prewarm warms peer.context() (base layer), consumed via + # pop_context_result() in prefetch(). Dialectic prewarm runs the + # full configured depth and writes into _prefetch_result so turn 1 + # consumes the result directly. if self._recall_mode in ("context", "hybrid"): try: self._manager.prefetch_context(self._session_key) - self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?") - logger.debug("Honcho pre-warm threads started for session: %s", self._session_key) except Exception as e: - logger.debug("Honcho pre-warm failed: %s", e) + logger.debug("Honcho context prewarm failed: %s", e) + + _prewarm_query = ( + "Summarize what you know about this user. " + "Focus on preferences, current projects, and working style." + ) + + def _prewarm_dialectic() -> None: + try: + r = self._run_dialectic_depth(_prewarm_query) + except Exception as exc: + logger.debug("Honcho dialectic prewarm failed: %s", exc) + self._dialectic_empty_streak += 1 + return + if r and r.strip(): + with self._prefetch_lock: + self._prefetch_result = r + self._prefetch_result_fired_at = 0 + # Treat prewarm as turn 0 so cadence gating starts clean. + self._last_dialectic_turn = 0 + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + + self._prefetch_thread_started_at = time.monotonic() + self._prefetch_thread = threading.Thread( + target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic" + ) + self._prefetch_thread.start() + logger.debug("Honcho pre-warm started for session: %s", self._session_key) def _ensure_session(self) -> bool: """Lazily initialize the Honcho session (for tools-only mode). @@ -487,7 +522,8 @@ class HonchoMemoryProvider(MemoryProvider): "# Honcho Memory\n" "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, " "honcho_search for raw excerpts, honcho_context for raw peer context, " - "honcho_reasoning for synthesized answers, " + "honcho_reasoning for synthesized answers (pass reasoning_level " + "minimal/low/medium/high/max — you pick the depth per call), " "honcho_conclude to save facts about the user. " "No automatic context injection — you must use tools to access memory." ) @@ -497,7 +533,8 @@ class HonchoMemoryProvider(MemoryProvider): "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. " "Use honcho_profile for a quick factual snapshot, " "honcho_search for raw excerpts, honcho_context for raw peer context, " - "honcho_reasoning for synthesized answers, " + "honcho_reasoning for synthesized answers (pass reasoning_level " + "minimal/low/medium/high/max — you pick the depth per call), " "honcho_conclude to save facts about the user." ) @@ -526,6 +563,10 @@ class HonchoMemoryProvider(MemoryProvider): if self._injection_frequency == "first-turn" and self._turn_count > 1: return "" + # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal. + if self._is_trivial_prompt(query): + return "" + parts = [] # ----- Layer 1: Base context (representation + card) ----- @@ -560,43 +601,72 @@ class HonchoMemoryProvider(MemoryProvider): # On the very first turn, no queue_prefetch() has run yet so the # dialectic result is empty. Run with a bounded timeout so a slow # Honcho connection doesn't block the first response indefinitely. - # On timeout the result is skipped and queue_prefetch() will pick it - # up at the next cadence-allowed turn. + # On timeout we let the thread keep running and write its result into + # _prefetch_result under the lock, so the next turn picks it up. + # + # Skip if the session-start prewarm already filled _prefetch_result — + # firing another .chat() would be duplicate work. + with self._prefetch_lock: + _prewarm_landed = bool(self._prefetch_result) + if _prewarm_landed and self._last_dialectic_turn == -999: + self._last_dialectic_turn = self._turn_count + if self._last_dialectic_turn == -999 and query: _first_turn_timeout = ( self._config.timeout if self._config and self._config.timeout else 8.0 ) - _result_holder: list[str] = [] + _fired_at = self._turn_count def _run_first_turn() -> None: try: - _result_holder.append(self._run_dialectic_depth(query)) + r = self._run_dialectic_depth(query) except Exception as exc: logger.debug("Honcho first-turn dialectic failed: %s", exc) - - _t = threading.Thread(target=_run_first_turn, daemon=True) - _t.start() - _t.join(timeout=_first_turn_timeout) - if not _t.is_alive(): - first_turn_dialectic = _result_holder[0] if _result_holder else "" - if first_turn_dialectic and first_turn_dialectic.strip(): + self._dialectic_empty_streak += 1 + return + if r and r.strip(): with self._prefetch_lock: - self._prefetch_result = first_turn_dialectic - self._last_dialectic_turn = self._turn_count - else: + self._prefetch_result = r + self._prefetch_result_fired_at = _fired_at + # Advance cadence only on a non-empty result so the next + # turn retries when the call returned nothing. + self._last_dialectic_turn = _fired_at + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + + self._prefetch_thread_started_at = time.monotonic() + self._prefetch_thread = threading.Thread( + target=_run_first_turn, daemon=True, name="honcho-prefetch-first" + ) + self._prefetch_thread.start() + self._prefetch_thread.join(timeout=_first_turn_timeout) + if self._prefetch_thread.is_alive(): logger.debug( - "Honcho first-turn dialectic timed out (%.1fs) — " - "will inject at next cadence-allowed turn", + "Honcho first-turn dialectic still running after %.1fs — " + "will surface on next turn", _first_turn_timeout, ) - # Don't update _last_dialectic_turn: queue_prefetch() will - # retry at the next cadence-allowed turn via the async path. if self._prefetch_thread and self._prefetch_thread.is_alive(): self._prefetch_thread.join(timeout=3.0) with self._prefetch_lock: dialectic_result = self._prefetch_result + fired_at = self._prefetch_result_fired_at self._prefetch_result = "" + self._prefetch_result_fired_at = -999 + + # Discard stale pending results: if the fire happened more than + # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns + # passed without consumption), the content likely no longer tracks + # the current conversational pivot. + stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER + if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit: + logger.debug( + "Honcho pending dialectic discarded as stale: fired_at=%d, " + "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit, + ) + dialectic_result = "" if dialectic_result and dialectic_result.strip(): parts.append(dialectic_result) @@ -641,6 +711,10 @@ class HonchoMemoryProvider(MemoryProvider): if self._recall_mode == "tools": return + # Trivial prompts don't warrant either a context refresh or a dialectic call. + if self._is_trivial_prompt(query): + return + # ----- Context refresh (base layer) — independent cadence ----- if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence: self._last_context_turn = self._turn_count @@ -650,24 +724,46 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho context prefetch failed: %s", e) # ----- Dialectic prefetch (supplement layer) ----- - # B5: cadence check — skip if too soon since last dialectic call - if self._dialectic_cadence > 1: - if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: - logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", - self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) - return + # Thread-alive guard with stale-thread recovery: a hung Honcho call + # older than timeout × multiplier is treated as dead so it can't + # block subsequent fires. + if self._thread_is_live(): + logger.debug("Honcho dialectic prefetch skipped: prior thread still running") + return - self._last_dialectic_turn = self._turn_count + # Cadence gate, widened by the empty-streak backoff so a persistently + # silent backend doesn't retry every turn forever. + effective = self._effective_cadence() + if (self._turn_count - self._last_dialectic_turn) < effective: + logger.debug( + "Honcho dialectic prefetch skipped: effective cadence %d " + "(base %d, empty streak %d), turns since last: %d", + effective, self._dialectic_cadence, self._dialectic_empty_streak, + self._turn_count - self._last_dialectic_turn, + ) + return + + # Cadence advances only on a non-empty result so empty returns + # (transient API error, sparse representation) retry next turn. + _fired_at = self._turn_count def _run(): try: result = self._run_dialectic_depth(query) - if result and result.strip(): - with self._prefetch_lock: - self._prefetch_result = result except Exception as e: logger.debug("Honcho prefetch failed: %s", e) + self._dialectic_empty_streak += 1 + return + if result and result.strip(): + with self._prefetch_lock: + self._prefetch_result = result + self._prefetch_result_fired_at = _fired_at + self._last_dialectic_turn = _fired_at + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + self._prefetch_thread_started_at = time.monotonic() self._prefetch_thread = threading.Thread( target=_run, daemon=True, name="honcho-prefetch" ) @@ -692,11 +788,91 @@ class HonchoMemoryProvider(MemoryProvider): _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max") - def _resolve_pass_level(self, pass_idx: int) -> str: + # Char-count thresholds for the query-length reasoning heuristic. + _HEURISTIC_LENGTH_MEDIUM = 120 + _HEURISTIC_LENGTH_HIGH = 400 + + # Liveness constants. A thread older than timeout × multiplier is treated + # as dead so a hung Honcho call can't block future retries indefinitely. + _STALE_THREAD_MULTIPLIER = 2.0 + # Pending result whose fire-turn is older than cadence × multiplier is + # discarded on read so we don't inject context for a stale conversational + # pivot after a gap of trivial-prompt turns. + _STALE_RESULT_MULTIPLIER = 2 + # Cap on the empty-streak backoff so a persistently silent backend + # eventually settles on a ceiling instead of unbounded widening. + _BACKOFF_MAX = 8 + + def _thread_is_live(self) -> bool: + """Thread-alive guard that treats threads older than the stale + threshold as dead, so a hung Honcho request can't block new fires.""" + if not self._prefetch_thread or not self._prefetch_thread.is_alive(): + return False + timeout = (self._config.timeout if self._config and self._config.timeout else 8.0) + age = time.monotonic() - self._prefetch_thread_started_at + if age > timeout * self._STALE_THREAD_MULTIPLIER: + logger.debug( + "Honcho prefetch thread age %.1fs exceeds stale threshold " + "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER, + ) + return False + return True + + def _effective_cadence(self) -> int: + """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base.""" + if self._dialectic_empty_streak <= 0: + return self._dialectic_cadence + widened = self._dialectic_cadence + self._dialectic_empty_streak + ceiling = self._dialectic_cadence * self._BACKOFF_MAX + return min(widened, ceiling) + + def liveness_snapshot(self) -> dict: + """In-process snapshot of dialectic liveness state for diagnostics. + + Returns current turn, last successful dialectic turn, pending-result + fire turn, empty streak, effective cadence, and thread status. + """ + thread_age = None + if self._prefetch_thread and self._prefetch_thread.is_alive(): + thread_age = time.monotonic() - self._prefetch_thread_started_at + return { + "turn_count": self._turn_count, + "last_dialectic_turn": self._last_dialectic_turn, + "pending_result_fired_at": self._prefetch_result_fired_at, + "empty_streak": self._dialectic_empty_streak, + "effective_cadence": self._effective_cadence(), + "thread_alive": thread_age is not None, + "thread_age_seconds": thread_age, + } + + def _apply_reasoning_heuristic(self, base: str, query: str) -> str: + """Scale `base` up by query length, clamped at reasoning_level_cap. + + Char-count heuristic: +1 at >=120 chars, +2 at >=400. + """ + if not self._reasoning_heuristic or not query: + return base + if base not in self._LEVEL_ORDER: + return base + n = len(query) + if n < self._HEURISTIC_LENGTH_MEDIUM: + bump = 0 + elif n < self._HEURISTIC_LENGTH_HIGH: + bump = 1 + else: + bump = 2 + base_idx = self._LEVEL_ORDER.index(base) + cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap) + return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)] + + def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str: """Resolve reasoning level for a given pass index. - Uses dialecticDepthLevels if configured, otherwise proportional - defaults relative to dialecticReasoningLevel. + Precedence: + 1. dialecticDepthLevels (explicit per-pass) — wins absolutely + 2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes) + 3. Base level = dialecticReasoningLevel, optionally scaled by the + reasoning heuristic when the mapping falls through to 'base' """ if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels): return self._dialectic_depth_levels[pass_idx] @@ -704,7 +880,7 @@ class HonchoMemoryProvider(MemoryProvider): base = (self._config.dialectic_reasoning_level if self._config else "low") mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx)) if mapping is None or mapping == "base": - return base + return self._apply_reasoning_heuristic(base, query) return mapping def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str: @@ -791,7 +967,7 @@ class HonchoMemoryProvider(MemoryProvider): break prompt = self._build_dialectic_prompt(i, results, is_cold) - level = self._resolve_pass_level(i) + level = self._resolve_pass_level(i, query=query) logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s", self._dialectic_depth, i, level, is_cold) @@ -808,6 +984,29 @@ class HonchoMemoryProvider(MemoryProvider): return r return "" + # Prompts that carry no semantic signal — trivial acknowledgements, slash + # commands, empty input. Skipping injection here saves tokens and prevents + # stale user-model context from derailing one-word replies. + _TRIVIAL_PROMPT_RE = re.compile( + r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|' + r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$', + re.IGNORECASE, + ) + + @classmethod + def _is_trivial_prompt(cls, text: str) -> bool: + """Return True if the prompt is too trivial to warrant context injection.""" + if not text: + return True + stripped = text.strip() + if not stripped: + return True + if stripped.startswith("/"): + return True + if cls._TRIVIAL_PROMPT_RE.match(stripped): + return True + return False + def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None: """Track turn count for cadence and injection_frequency logic.""" self._turn_count = turn_number diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index 536d34002d..5c829a4c98 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -460,17 +460,37 @@ def cmd_setup(args) -> None: pass # keep current # --- 7b. Dialectic cadence --- - current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3") + current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.") + print(" 1 = every turn, 2 = every other turn, 3+ = sparser.") + print(" Recommended: 1-5.") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) if val >= 1: hermes_host["dialecticCadence"] = val except (ValueError, TypeError): - hermes_host["dialecticCadence"] = 3 + hermes_host["dialecticCadence"] = 2 + + # --- 7c. Dialectic reasoning level --- + current_reasoning = ( + hermes_host.get("dialecticReasoningLevel") + or cfg.get("dialecticReasoningLevel") + or "low" + ) + print("\n Dialectic reasoning level:") + print(" Depth Honcho uses when synthesizing user context on auto-injected calls.") + print(" minimal -- quick factual lookups") + print(" low -- straightforward questions (default)") + print(" medium -- multi-aspect synthesis") + print(" high -- complex behavioral patterns") + print(" max -- thorough audit-level analysis") + new_reasoning = _prompt("Reasoning level", default=current_reasoning) + if new_reasoning in ("minimal", "low", "medium", "high", "max"): + hermes_host["dialecticReasoningLevel"] = new_reasoning + else: + hermes_host["dialecticReasoningLevel"] = "low" # --- 8. Session strategy --- current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") @@ -636,8 +656,11 @@ def cmd_status(args) -> None: print(f" Recall mode: {hcfg.recall_mode}") print(f" Context budget: {hcfg.context_tokens or '(uncapped)'} tokens") raw = getattr(hcfg, "raw", None) or {} - dialectic_cadence = raw.get("dialecticCadence") or 3 + dialectic_cadence = raw.get("dialecticCadence") or 1 print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}") + reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap + heuristic_on = "on" if hcfg.reasoning_heuristic else "off" + print(f" Reasoning: base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}") print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})") print(f" Write freq: {hcfg.write_frequency}") diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 2474d3a2b6..fef2e2d58f 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -251,6 +251,11 @@ class HonchoClientConfig: # matching dialectic_depth length. When None, uses proportional defaults # derived from dialectic_reasoning_level. dialectic_depth_levels: list[str] | None = None + # When true, the auto-injected dialectic scales reasoning level up on + # longer queries. See HonchoMemoryProvider for thresholds. + reasoning_heuristic: bool = True + # Ceiling for the heuristic-selected reasoning level. + reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) message_max_chars: int = 25000 @@ -446,6 +451,16 @@ class HonchoClientConfig: raw.get("dialecticDepthLevels"), depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")), ), + reasoning_heuristic=_resolve_bool( + host_block.get("reasoningHeuristic"), + raw.get("reasoningHeuristic"), + default=True, + ), + reasoning_level_cap=( + host_block.get("reasoningLevelCap") + or raw.get("reasoningLevelCap") + or "high" + ), message_max_chars=int( host_block.get("messageMaxChars") or raw.get("messageMaxChars") diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py index fd91ee3b3b..79625b5cd5 100644 --- a/plugins/memory/honcho/session.py +++ b/plugins/memory/honcho/session.py @@ -78,6 +78,7 @@ class HonchoSessionManager: honcho: Honcho | None = None, context_tokens: int | None = None, config: Any | None = None, + runtime_user_peer_name: str | None = None, ): """ Initialize the session manager. @@ -87,10 +88,12 @@ class HonchoSessionManager: context_tokens: Max tokens for context() calls (None = Honcho default). config: HonchoClientConfig from global config (provides peer_name, ai_peer, write_frequency, observation, etc.). + runtime_user_peer_name: Gateway user identity for per-user memory scoping. """ self._honcho = honcho self._context_tokens = context_tokens self._config = config + self._runtime_user_peer_name = runtime_user_peer_name self._cache: dict[str, HonchoSession] = {} self._peers_cache: dict[str, Any] = {} self._sessions_cache: dict[str, Any] = {} @@ -100,9 +103,11 @@ class HonchoSessionManager: self._write_frequency = write_frequency self._turn_counter: int = 0 - # Prefetch caches: session_key → last result (consumed once per turn) + # Prefetch cache: session_key → last context result (consumed once per turn). + # Dialectic results are cached on the plugin side (HonchoMemoryProvider + # ._prefetch_result) so session-start prewarm and turn-driven fires share + # one source of truth; see __init__.py _do_session_init for the prewarm. self._context_cache: dict[str, dict] = {} - self._dialectic_cache: dict[str, str] = {} self._prefetch_cache_lock = threading.Lock() self._dialectic_reasoning_level: str = ( config.dialectic_reasoning_level if config else "low" @@ -272,8 +277,10 @@ class HonchoSessionManager: logger.debug("Local session cache hit: %s", key) return self._cache[key] - # Use peer names from global config when available - if self._config and self._config.peer_name: + # Gateway sessions should use the runtime user identity when available. + if self._runtime_user_peer_name: + user_peer_id = self._sanitize_id(self._runtime_user_peer_name) + elif self._config and self._config.peer_name: user_peer_id = self._sanitize_id(self._config.peer_name) else: # Fallback: derive from session key @@ -499,8 +506,8 @@ class HonchoSessionManager: Query Honcho's dialectic endpoint about a peer. Runs an LLM on Honcho's backend against the target peer's full - representation. Higher latency than context() — call async via - prefetch_dialectic() to avoid blocking the response. + representation. Higher latency than context() — callers run this in + a background thread (see HonchoMemoryProvider) to avoid blocking. Args: session_key: The session key to query against. @@ -555,42 +562,6 @@ class HonchoSessionManager: logger.warning("Honcho dialectic query failed: %s", e) return "" - def prefetch_dialectic(self, session_key: str, query: str) -> None: - """ - Fire a dialectic_query in a background thread, caching the result. - - Non-blocking. The result is available via pop_dialectic_result() - on the next call (typically the following turn). Reasoning level - is selected dynamically based on query complexity. - - Args: - session_key: The session key to query against. - query: The user's current message, used as the query. - """ - def _run(): - result = self.dialectic_query(session_key, query) - if result: - self.set_dialectic_result(session_key, result) - - t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True) - t.start() - - def set_dialectic_result(self, session_key: str, result: str) -> None: - """Store a prefetched dialectic result in a thread-safe way.""" - if not result: - return - with self._prefetch_cache_lock: - self._dialectic_cache[session_key] = result - - def pop_dialectic_result(self, session_key: str) -> str: - """ - Return and clear the cached dialectic result for this session. - - Returns empty string if no result is ready yet. - """ - with self._prefetch_cache_lock: - return self._dialectic_cache.pop(session_key, "") - def prefetch_context(self, session_key: str, user_message: str | None = None) -> None: """ Fire get_prefetch_context in a background thread, caching the result. diff --git a/pyproject.toml b/pyproject.toml index bd83673651..992e548f9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector hermes_cli = ["web_dist/**/*"] [tool.setuptools.packages.find] -include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"] +include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 96f48e77f5..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -# NOTE: This file is maintained for convenience only. -# The canonical dependency list is in pyproject.toml. -# Preferred install: pip install -e ".[all]" - -# Core dependencies -openai -python-dotenv -fire -httpx -rich -tenacity -prompt_toolkit -pyyaml -requests -jinja2 -pydantic>=2.0 -PyJWT[crypto] -debugpy - -# Web tools -firecrawl-py -parallel-web>=0.4.2 - -# Image generation -fal-client - -# Text-to-speech (Edge TTS is free, no API key needed) -edge-tts - -# Optional: For cron expression parsing (cronjob scheduling) -croniter - -# Optional: For messaging platform integrations (gateway) -python-telegram-bot[webhooks]>=22.6 -discord.py>=2.0 -aiohttp>=3.9.0 diff --git a/run_agent.py b/run_agent.py index c87bd35152..ec5e86d786 100644 --- a/run_agent.py +++ b/run_agent.py @@ -48,6 +48,10 @@ from hermes_constants import get_hermes_home # Load .env from ~/.hermes/.env first, then project root as dev fallback. # User-managed env files should override stale shell exports on restart. from hermes_cli.env_loader import load_hermes_dotenv +from hermes_cli.timeouts import ( + get_provider_request_timeout, + get_provider_stale_timeout, +) _hermes_home = get_hermes_home() _project_env = Path(__file__).parent / '.env' @@ -96,6 +100,20 @@ from agent.subdirectory_hints import SubdirectoryHintTracker from agent.prompt_caching import apply_anthropic_cache_control from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE from agent.usage_pricing import estimate_usage_cost, normalize_usage +from agent.codex_responses_adapter import ( + _chat_content_to_responses_parts, + _chat_messages_to_responses_input as _codex_chat_messages_to_responses_input, + _derive_responses_function_call_id as _codex_derive_responses_function_call_id, + _deterministic_call_id as _codex_deterministic_call_id, + _extract_responses_message_text as _codex_extract_responses_message_text, + _extract_responses_reasoning_text as _codex_extract_responses_reasoning_text, + _normalize_codex_response as _codex_normalize_codex_response, + _preflight_codex_api_kwargs as _codex_preflight_codex_api_kwargs, + _preflight_codex_input_items as _codex_preflight_codex_input_items, + _responses_tools as _codex_responses_tools, + _split_responses_tool_id as _codex_split_responses_tool_id, + _summarize_user_message_for_log, +) from agent.display import ( KawaiiSpinner, build_tool_preview as _build_tool_preview, get_cute_tool_message as _get_cute_tool_message_impl, @@ -106,7 +124,7 @@ from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) -from utils import atomic_json_write, env_var_enabled +from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url @@ -159,6 +177,20 @@ class _SafeWriter: return getattr(self._inner, name) +def _get_proxy_from_env() -> Optional[str]: + """Read proxy URL from environment variables. + + Checks HTTPS_PROXY, HTTP_PROXY, ALL_PROXY (and lowercase variants) in order. + Returns the first valid proxy URL found, or None if no proxy is configured. + """ + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + value = os.environ.get(key, "").strip() + if value: + return normalize_proxy_url(value) + return None + + def _install_safe_stdio() -> None: """Wrap stdout/stderr so best-effort console output cannot crash the agent.""" for stream_name in ("stdout", "stderr"): @@ -353,6 +385,11 @@ def _sanitize_surrogates(text: str) -> str: return text +# _chat_content_to_responses_parts and _summarize_user_message_for_log are +# imported from agent.codex_responses_adapter (see import block above). +# They remain importable from run_agent for backward compatibility. + + def _sanitize_structure_surrogates(payload: Any) -> bool: """Replace surrogate code points in nested dict/list payloads in-place. @@ -454,6 +491,71 @@ def _sanitize_messages_surrogates(messages: list) -> bool: return found +def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str: + """Attempt to repair malformed tool_call argument JSON. + + Models like GLM-5.1 via Ollama can produce truncated JSON, trailing + commas, Python ``None``, etc. The API proxy rejects these with HTTP 400 + "invalid tool call arguments". This function applies common repairs; + if all fail it returns ``"{}"`` so the request succeeds (better than + crashing the session). All repairs are logged at WARNING level. + """ + raw_stripped = raw_args.strip() if isinstance(raw_args, str) else "" + + # Fast-path: empty / whitespace-only -> empty object + if not raw_stripped: + logger.warning("Sanitized empty tool_call arguments for %s", tool_name) + return "{}" + + # Python-literal None -> normalise to {} + if raw_stripped == "None": + logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name) + return "{}" + + # Attempt common JSON repairs + fixed = raw_stripped + # 1. Strip trailing commas before } or ] + fixed = re.sub(r',\s*([}\]])', r'\1', fixed) + # 2. Close unclosed structures + open_curly = fixed.count('{') - fixed.count('}') + open_bracket = fixed.count('[') - fixed.count(']') + if open_curly > 0: + fixed += '}' * open_curly + if open_bracket > 0: + fixed += ']' * open_bracket + # 3. Remove excess closing braces/brackets (bounded to 50 iterations) + for _ in range(50): + try: + json.loads(fixed) + break + except json.JSONDecodeError: + if fixed.endswith('}') and fixed.count('}') > fixed.count('{'): + fixed = fixed[:-1] + elif fixed.endswith(']') and fixed.count(']') > fixed.count('['): + fixed = fixed[:-1] + else: + break + + try: + json.loads(fixed) + logger.warning( + "Repaired malformed tool_call arguments for %s: %s → %s", + tool_name, raw_stripped[:80], fixed[:80], + ) + return fixed + except json.JSONDecodeError: + pass + + # Last resort: replace with empty object so the API request doesn't + # crash the entire session. + logger.warning( + "Unrepairable tool_call arguments for %s — " + "replaced with empty object (was: %s)", + tool_name, raw_stripped[:80], + ) + return "{}" + + def _strip_non_ascii(text: str) -> str: """Remove non-ASCII characters, replacing with closest ASCII equivalent or removing. @@ -601,6 +703,7 @@ class AIAgent: def base_url(self, value: str) -> None: self._base_url = value self._base_url_lower = value.lower() if value else "" + self._base_url_hostname = base_url_hostname(value) def __init__( self, @@ -648,6 +751,11 @@ class AIAgent: prefill_messages: List[Dict[str, Any]] = None, platform: str = None, user_id: str = None, + user_name: str = None, + chat_id: str = None, + chat_name: str = None, + chat_type: str = None, + thread_id: str = None, gateway_session_key: str = None, skip_context_files: bool = False, skip_memory: bool = False, @@ -717,6 +825,11 @@ class AIAgent: self.ephemeral_system_prompt = ephemeral_system_prompt self.platform = platform # "cli", "telegram", "discord", "whatsapp", etc. self._user_id = user_id # Platform user identifier (gateway sessions) + self._user_name = user_name + self._chat_id = chat_id + self._chat_name = chat_name + self._chat_type = chat_type + self._thread_id = thread_id self._gateway_session_key = gateway_session_key # Stable per-chat key (e.g. agent:main:telegram:dm:123) # Pluggable print function — CLI replaces this with _cprint so that # raw ANSI status lines are routed through prompt_toolkit's renderer @@ -742,13 +855,16 @@ class AIAgent: self.api_mode = "codex_responses" elif self.provider == "xai": self.api_mode = "codex_responses" - elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower: + elif (provider_name is None) and ( + self._base_url_hostname == "chatgpt.com" + and "/backend-api/codex" in self._base_url_lower + ): self.api_mode = "codex_responses" self.provider = "openai-codex" - elif (provider_name is None) and "api.x.ai" in self._base_url_lower: + elif (provider_name is None) and self._base_url_hostname == "api.x.ai": self.api_mode = "codex_responses" self.provider = "xai" - elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower): + elif self.provider == "anthropic" or (provider_name is None and self._base_url_hostname == "api.anthropic.com"): self.api_mode = "anthropic_messages" self.provider = "anthropic" elif self._base_url_lower.rstrip("/").endswith("/anthropic"): @@ -756,8 +872,12 @@ class AIAgent: # use a URL convention ending in /anthropic. Auto-detect these so the # Anthropic Messages API adapter is used instead of chat completions. self.api_mode = "anthropic_messages" - elif self.provider == "bedrock" or "bedrock-runtime" in self._base_url_lower: - # AWS Bedrock — auto-detect from provider name or base URL. + elif self.provider == "bedrock" or ( + self._base_url_hostname.startswith("bedrock-runtime.") + and base_url_host_matches(self._base_url_lower, "amazonaws.com") + ): + # AWS Bedrock — auto-detect from provider name or base URL + # (bedrock-runtime..amazonaws.com). self.api_mode = "bedrock_converse" else: self.api_mode = "chat_completions" @@ -877,13 +997,15 @@ class AIAgent: self.prefill_messages = prefill_messages or [] # Prefilled conversation turns self._force_ascii_payload = False - # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter. - # Reduces input costs by ~75% on multi-turn conversations by caching the - # conversation prefix. Uses system_and_3 strategy (4 breakpoints). - is_openrouter = self._is_openrouter_url() - is_claude = "claude" in self.model.lower() - is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic" - self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic + # Anthropic prompt caching: auto-enabled for Claude models on native + # Anthropic, OpenRouter, and third-party gateways that speak the + # Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces + # input costs by ~75% on multi-turn conversations. Uses system_and_3 + # strategy (4 breakpoints). See ``_anthropic_prompt_cache_policy`` + # for the layout-vs-transport decision. + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy() + ) self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) # Iteration budget: the LLM is only notified when it actually exhausts @@ -963,6 +1085,12 @@ class AIAgent: self._anthropic_client = None self._is_anthropic_oauth = False + # Resolve per-provider / per-model request timeout once up front so + # every client construction path below (Anthropic native, OpenAI-wire, + # router-based implicit auth) can apply it consistently. Bedrock + # Claude uses its own timeout path and is not covered here. + _provider_timeout = get_provider_request_timeout(self.provider, self.model) + if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity @@ -970,8 +1098,7 @@ class AIAgent: _is_bedrock_anthropic = self.provider == "bedrock" if _is_bedrock_anthropic: from agent.anthropic_adapter import build_anthropic_bedrock_client - import re as _re - _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") _br_region = _region_match.group(1) if _region_match else "us-east-1" self._bedrock_region = _br_region self._anthropic_client = build_anthropic_bedrock_client(_br_region) @@ -992,9 +1119,16 @@ class AIAgent: self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = base_url + # Only mark the session as OAuth-authenticated when the token + # genuinely belongs to native Anthropic. Third-party providers + # (MiniMax, Kimi, GLM, LiteLLM proxies) that accept the + # Anthropic protocol must never trip OAuth code paths — doing + # so injects Claude-Code identity headers and system prompts + # that cause 401/403 on their endpoints. Guards #1739 and + # the third-party identity-injection bug. from agent.anthropic_adapter import _is_oauth_token as _is_oat - self._is_anthropic_oauth = _is_oat(effective_key) - self._anthropic_client = build_anthropic_client(effective_key, base_url) + self._is_anthropic_oauth = _is_oat(effective_key) if _is_native_anthropic else False + self._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout) # No OpenAI client needed for Anthropic mode self.client = None self._client_kwargs = {} @@ -1005,8 +1139,7 @@ class AIAgent: elif self.api_mode == "bedrock_converse": # AWS Bedrock — uses boto3 directly, no OpenAI client needed. # Region is extracted from the base_url or defaults to us-east-1. - import re as _re - _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1" # Guardrail config — read from config.yaml at init time. self._bedrock_guardrail_config = None @@ -1034,26 +1167,31 @@ class AIAgent: # Explicit credentials from CLI/gateway — construct directly. # The runtime provider resolver already handled auth for us. client_kwargs = {"api_key": api_key, "base_url": base_url} + if _provider_timeout is not None: + client_kwargs["timeout"] = _provider_timeout if self.provider == "copilot-acp": client_kwargs["command"] = self.acp_command client_kwargs["args"] = self.acp_args effective_base = base_url - if "openrouter" in effective_base.lower(): + if base_url_host_matches(effective_base, "openrouter.ai"): client_kwargs["default_headers"] = { "HTTP-Referer": "https://hermes-agent.nousresearch.com", "X-OpenRouter-Title": "Hermes Agent", "X-OpenRouter-Categories": "productivity,cli-agent", } - elif "api.githubcopilot.com" in effective_base.lower(): + elif base_url_host_matches(effective_base, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers client_kwargs["default_headers"] = copilot_default_headers() - elif "api.kimi.com" in effective_base.lower(): + elif base_url_host_matches(effective_base, "api.kimi.com"): client_kwargs["default_headers"] = { - "User-Agent": "KimiCLI/1.30.0", + "User-Agent": "claude-code/0.1.0", } - elif "portal.qwen.ai" in effective_base.lower(): + elif base_url_host_matches(effective_base, "portal.qwen.ai"): client_kwargs["default_headers"] = _qwen_portal_headers() + elif base_url_host_matches(effective_base, "chatgpt.com"): + from agent.auxiliary_client import _codex_cloudflare_headers + client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key) else: # No explicit creds — use the centralized provider router from agent.auxiliary_client import resolve_provider_client @@ -1064,6 +1202,8 @@ class AIAgent: "api_key": _routed_client.api_key, "base_url": str(_routed_client.base_url), } + if _provider_timeout is not None: + client_kwargs["timeout"] = _provider_timeout # Preserve any default_headers the router set if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers: client_kwargs["default_headers"] = dict(_routed_client._default_headers) @@ -1105,7 +1245,7 @@ class AIAgent: # stream tool call arguments token-by-token, keeping the # connection alive. _effective_base = str(client_kwargs.get("base_url", "")).lower() - if "openrouter" in _effective_base and "claude" in (self.model or "").lower(): + if base_url_host_matches(_effective_base, "openrouter.ai") and "claude" in (self.model or "").lower(): headers = client_kwargs.get("default_headers") or {} existing_beta = headers.get("x-anthropic-beta", "") _FINE_GRAINED = "fine-grained-tool-streaming-2025-05-14" @@ -1199,7 +1339,12 @@ class AIAgent: # Show prompt caching status if self._use_prompt_caching and not self.quiet_mode: - source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter" + if self._use_native_cache_layout and self.provider == "anthropic": + source = "native Anthropic" + elif self._use_native_cache_layout: + source = "Anthropic-compatible endpoint" + else: + source = "Claude via OpenRouter" print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)") # Session logging setup - auto-save conversation trajectories for debugging @@ -1271,6 +1416,10 @@ class AIAgent: _agent_cfg = _load_agent_config() except Exception: _agent_cfg = {} + # Cache only the derived auxiliary compression context override that is + # needed later by the startup feasibility check. Avoid exposing a + # broad pseudo-public config object on the agent instance. + self._aux_compression_context_length_config = None # Persistent memory (MEMORY.md + USER.md) -- loaded from disk self._memory_store = None @@ -1306,31 +1455,6 @@ class AIAgent: try: _mem_provider_name = mem_config.get("provider", "") if mem_config else "" - # Auto-migrate: if Honcho was actively configured (enabled + - # credentials) but memory.provider is not set, activate the - # honcho plugin automatically. Just having the config file - # is not enough — the user may have disabled Honcho or the - # file may be from a different tool. - if not _mem_provider_name: - try: - from plugins.memory.honcho.client import HonchoClientConfig as _HCC - _hcfg = _HCC.from_global_config() - if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url): - _mem_provider_name = "honcho" - # Persist so this only auto-migrates once - try: - from hermes_cli.config import load_config as _lc, save_config as _sc - _cfg = _lc() - _cfg.setdefault("memory", {})["provider"] = "honcho" - _sc(_cfg) - except Exception: - pass - if not self.quiet_mode: - print(" ✓ Auto-migrated Honcho to memory provider plugin.") - print(" Your config and data are preserved.\n") - except Exception: - pass - if _mem_provider_name: from agent.memory_manager import MemoryManager as _MemoryManager from plugins.memory import load_memory_provider as _load_mem @@ -1339,11 +1463,10 @@ class AIAgent: if _mp and _mp.is_available(): self._memory_manager.add_provider(_mp) if self._memory_manager.providers: - from hermes_constants import get_hermes_home as _ghh _init_kwargs = { "session_id": self.session_id, "platform": platform or "cli", - "hermes_home": str(_ghh()), + "hermes_home": str(get_hermes_home()), "agent_context": "primary", } # Thread session title for memory provider scoping @@ -1358,6 +1481,16 @@ class AIAgent: # Thread gateway user identity for per-user memory scoping if self._user_id: _init_kwargs["user_id"] = self._user_id + if self._user_name: + _init_kwargs["user_name"] = self._user_name + if self._chat_id: + _init_kwargs["chat_id"] = self._chat_id + if self._chat_name: + _init_kwargs["chat_name"] = self._chat_name + if self._chat_type: + _init_kwargs["chat_type"] = self._chat_type + if self._thread_id: + _init_kwargs["thread_id"] = self._thread_id # Thread gateway session key for stable per-chat Honcho session isolation if self._gateway_session_key: _init_kwargs["gateway_session_key"] = self._gateway_session_key @@ -1426,6 +1559,24 @@ class AIAgent: compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20)) compression_protect_last = int(_compression_cfg.get("protect_last_n", 20)) + # Read optional explicit context_length override for the auxiliary + # compression model. Custom endpoints often cannot report this via + # /models, so the startup feasibility check needs the config hint. + try: + _aux_cfg = _agent_cfg.get("auxiliary", {}).get("compression", {}) + except Exception: + _aux_cfg = {} + if isinstance(_aux_cfg, dict): + _aux_context_config = _aux_cfg.get("context_length") + else: + _aux_context_config = None + if _aux_context_config is not None: + try: + _aux_context_config = int(_aux_context_config) + except (TypeError, ValueError): + _aux_context_config = None + self._aux_compression_context_length_config = _aux_context_config + # Read explicit context_length override from model config _model_cfg = _agent_cfg.get("model", {}) if isinstance(_model_cfg, dict): @@ -1442,7 +1593,6 @@ class AIAgent: "Falling back to auto-detection.", _config_context_length, ) - import sys print( f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n" f" Must be a plain integer (e.g. 256000, not '256K').\n" @@ -1484,7 +1634,6 @@ class AIAgent: "Falling back to auto-detection.", self.model, _cp_ctx, ) - import sys print( f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n" f" Must be a plain integer (e.g. 256000, not '256K').\n" @@ -1640,7 +1789,7 @@ class AIAgent: logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override) if self._ollama_num_ctx is None and self.base_url and is_local_endpoint(self.base_url): try: - _detected = query_ollama_num_ctx(self.model, self.base_url) + _detected = query_ollama_num_ctx(self.model, self.base_url, api_key=self.api_key or "") if _detected and _detected > 0: self._ollama_num_ctx = _detected except Exception as exc: @@ -1676,6 +1825,7 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, + "use_native_cache_layout": self._use_native_cache_layout, # Context engine state that _try_activate_fallback() overwrites. # Use getattr for model/base_url/api_key/provider since plugin # engines may not have these (they're ContextCompressor-specific). @@ -1746,8 +1896,6 @@ class AIAgent: change persists across turns (unlike fallback which is turn-scoped). """ - import logging - import re as _re from hermes_cli.providers import determine_api_mode # ── Determine api_mode if not provided ── @@ -1765,7 +1913,7 @@ class AIAgent: and isinstance(base_url, str) and base_url ): - base_url = _re.sub(r"/v1/?$", "", base_url) + base_url = re.sub(r"/v1/?$", "", base_url) old_model = self.model old_provider = self.provider @@ -1795,8 +1943,9 @@ class AIAgent: self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None) self._anthropic_client = build_anthropic_client( effective_key, self._anthropic_base_url, + timeout=get_provider_request_timeout(self.provider, self.model), ) - self._is_anthropic_oauth = _is_oauth_token(effective_key) + self._is_anthropic_oauth = _is_oauth_token(effective_key) if _is_native_anthropic else False self.client = None self._client_kwargs = {} else: @@ -1806,6 +1955,9 @@ class AIAgent: "api_key": effective_key, "base_url": effective_base, } + _sm_timeout = get_provider_request_timeout(self.provider, self.model) + if _sm_timeout is not None: + self._client_kwargs["timeout"] = _sm_timeout self.client = self._create_openai_client( dict(self._client_kwargs), reason="switch_model", @@ -1813,10 +1965,13 @@ class AIAgent: ) # ── Re-evaluate prompt caching ── - is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower()) - or is_native_anthropic + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy( + provider=new_provider, + base_url=self.base_url, + api_mode=api_mode, + model=new_model, + ) ) # ── Update context compressor ── @@ -1851,6 +2006,7 @@ class AIAgent: "api_key": getattr(self, "api_key", ""), "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, + "use_native_cache_layout": self._use_native_cache_layout, "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model, "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url, "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "", @@ -1869,6 +2025,22 @@ class AIAgent: self._fallback_activated = False self._fallback_index = 0 + # When the user deliberately swaps primary providers (e.g. openrouter + # → anthropic), drop any fallback entries that target the OLD primary + # or the NEW one. The chain was seeded from config at agent init for + # the original provider — without pruning, a failed turn on the new + # primary silently re-activates the provider the user just rejected, + # which is exactly what was reported during TUI v2 blitz testing + # ("switched to anthropic, tui keeps trying openrouter"). + old_norm = (old_provider or "").strip().lower() + new_norm = (new_provider or "").strip().lower() + if old_norm and new_norm and old_norm != new_norm: + self._fallback_chain = [ + entry for entry in self._fallback_chain + if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm} + ] + self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None + logging.info( "Model switched in-place: %s (%s) -> %s (%s)", old_model, old_provider, new_model, new_provider, @@ -1941,13 +2113,16 @@ class AIAgent: def _should_emit_quiet_tool_messages(self) -> bool: """Return True when quiet-mode tool summaries should print directly. - When the caller provides ``tool_progress_callback`` (for example the CLI - TUI or a gateway progress renderer), that callback owns progress display. - Emitting quiet-mode summary lines here duplicates progress and leaks tool - previews into flows that are expected to stay silent, such as - ``hermes chat -q``. + Quiet mode is used by both the interactive CLI and embedded/library + callers. The CLI may still want compact progress hints when no callback + owns rendering. Embedded/library callers, on the other hand, expect + quiet mode to be truly silent. """ - return self.quiet_mode and not self.tool_progress_callback + return ( + self.quiet_mode + and not self.tool_progress_callback + and getattr(self, "platform", "") == "cli" + ) def _emit_status(self, message: str) -> None: """Emit a lifecycle status message to both CLI and gateway channels. @@ -1997,7 +2172,10 @@ class AIAgent: return try: from agent.auxiliary_client import get_text_auxiliary_client - from agent.model_metadata import get_model_context_length + from agent.model_metadata import ( + MINIMUM_CONTEXT_LENGTH, + get_model_context_length, + ) client, aux_model = get_text_auxiliary_client( "compression", @@ -2020,45 +2198,61 @@ class AIAgent: aux_base_url = str(getattr(client, "base_url", "")) aux_api_key = str(getattr(client, "api_key", "")) - # Read user-configured context_length for the compression model. - # Custom endpoints often don't support /models API queries so - # get_model_context_length() falls through to the 128K default, - # ignoring the explicit config value. Pass it as the highest- - # priority hint so the configured value is always respected. - _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {}) - _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None - if _aux_context_config is not None: - try: - _aux_context_config = int(_aux_context_config) - except (TypeError, ValueError): - _aux_context_config = None - aux_context = get_model_context_length( aux_model, base_url=aux_base_url, api_key=aux_api_key, - config_context_length=_aux_context_config, + config_context_length=getattr(self, "_aux_compression_context_length_config", None), ) + # Hard floor: the auxiliary compression model must have at least + # MINIMUM_CONTEXT_LENGTH (64K) tokens of context. The main model + # is already required to meet this floor (checked earlier in + # __init__), so the compression model must too — otherwise it + # cannot summarise a full threshold-sized window of main-model + # content. Mirrors the main-model rejection pattern. + if aux_context and aux_context < MINIMUM_CONTEXT_LENGTH: + raise ValueError( + f"Auxiliary compression model {aux_model} has a context " + f"window of {aux_context:,} tokens, which is below the " + f"minimum {MINIMUM_CONTEXT_LENGTH:,} required by Hermes " + f"Agent. Choose a compression model with at least " + f"{MINIMUM_CONTEXT_LENGTH // 1000}K context (set " + f"auxiliary.compression.model in config.yaml), or set " + f"auxiliary.compression.context_length to override the " + f"detected value if it is wrong." + ) + threshold = self.context_compressor.threshold_tokens if aux_context < threshold: - # Suggest a threshold that would fit the aux model, - # rounded down to a clean percentage. - safe_pct = int((aux_context / self.context_compressor.context_length) * 100) + # Auto-correct: lower the live session threshold so + # compression actually works this session. The hard floor + # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH, + # so the new threshold is always >= 64K. + old_threshold = threshold + new_threshold = aux_context + self.context_compressor.threshold_tokens = new_threshold + # Keep threshold_percent in sync so future main-model + # context_length changes (update_model) re-derive from a + # sensible number rather than the original too-high value. + main_ctx = self.context_compressor.context_length + if main_ctx: + self.context_compressor.threshold_percent = ( + new_threshold / main_ctx + ) + safe_pct = int((aux_context / main_ctx) * 100) if main_ctx else 50 msg = ( - f"⚠ Compression model ({aux_model}) context " - f"is {aux_context:,} tokens, but the main model's " - f"compression threshold is {threshold:,} tokens. " - f"Context compression will not be possible — the " - f"content to summarise will exceed the auxiliary " - f"model's context window.\n" - f" Fix options (config.yaml):\n" + f"⚠ Compression model ({aux_model}) context is " + f"{aux_context:,} tokens, but the main model's " + f"compression threshold was {old_threshold:,} tokens. " + f"Auto-lowered this session's threshold to " + f"{new_threshold:,} tokens so compression can run.\n" + f" To make this permanent, edit config.yaml — either:\n" f" 1. Use a larger compression model:\n" f" auxiliary:\n" f" compression:\n" - f" model: \n" - f" 2. Lower the compression threshold to fit " - f"the current model:\n" + f" model: \n" + f" 2. Lower the compression threshold:\n" f" compression:\n" f" threshold: 0.{safe_pct:02d}" ) @@ -2067,12 +2261,17 @@ class AIAgent: logger.warning( "Auxiliary compression model %s has %d token context, " "below the main model's compression threshold of %d " - "tokens — compression summaries will fail or be " - "severely truncated.", + "tokens — auto-lowered session threshold to %d to " + "keep compression working.", aux_model, aux_context, - threshold, + old_threshold, + new_threshold, ) + except ValueError: + # Hard rejections (aux below minimum context) must propagate + # so the session refuses to start. + raise except Exception as exc: logger.debug( "Compression feasibility check failed (non-fatal): %s", exc @@ -2097,12 +2296,149 @@ class AIAgent: def _is_direct_openai_url(self, base_url: str = None) -> bool: """Return True when a base URL targets OpenAI's native API.""" - url = (base_url or self._base_url_lower).lower() - return "api.openai.com" in url and "openrouter" not in url + if base_url is not None: + hostname = base_url_hostname(base_url) + else: + hostname = getattr(self, "_base_url_hostname", "") or base_url_hostname( + getattr(self, "_base_url_lower", "") + ) + return hostname == "api.openai.com" + + def _resolved_api_call_timeout(self) -> float: + """Resolve the effective per-call request timeout in seconds. + + Priority: + 1. ``providers..models..timeout_seconds`` (per-model override) + 2. ``providers..request_timeout_seconds`` (provider-wide) + 3. ``HERMES_API_TIMEOUT`` env var (legacy escape hatch) + 4. 1800.0s default + + Used by OpenAI-wire chat completions (streaming and non-streaming) so + the per-provider config knob wins over the 1800s default. Without this + helper, the hardcoded ``HERMES_API_TIMEOUT`` fallback would always be + passed as a per-call ``timeout=`` kwarg, overriding the client-level + timeout the AIAgent.__init__ path configured. + """ + cfg = get_provider_request_timeout(self.provider, self.model) + if cfg is not None: + return cfg + return float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) + + def _resolved_api_call_stale_timeout_base(self) -> tuple[float, bool]: + """Resolve the base non-stream stale timeout and whether it is implicit. + + Priority: + 1. ``providers..models..stale_timeout_seconds`` + 2. ``providers..stale_timeout_seconds`` + 3. ``HERMES_API_CALL_STALE_TIMEOUT`` env var + 4. 300.0s default + + Returns ``(timeout_seconds, uses_implicit_default)`` so the caller can + preserve legacy behaviors that only apply when the user has *not* + explicitly configured a stale timeout, such as auto-disabling the + detector for local endpoints. + """ + cfg = get_provider_stale_timeout(self.provider, self.model) + if cfg is not None: + return cfg, False + + env_timeout = os.getenv("HERMES_API_CALL_STALE_TIMEOUT") + if env_timeout is not None: + return float(env_timeout), False + + return 300.0, True + + def _compute_non_stream_stale_timeout(self, messages: list[dict[str, Any]]) -> float: + """Compute the effective non-stream stale timeout for this request.""" + stale_base, uses_implicit_default = self._resolved_api_call_stale_timeout_base() + base_url = getattr(self, "_base_url", None) or self.base_url or "" + if uses_implicit_default and base_url and is_local_endpoint(base_url): + return float("inf") + + est_tokens = sum(len(str(v)) for v in messages) // 4 + if est_tokens > 100_000: + return max(stale_base, 600.0) + if est_tokens > 50_000: + return max(stale_base, 450.0) + return stale_base def _is_openrouter_url(self) -> bool: """Return True when the base URL targets OpenRouter.""" - return "openrouter" in self._base_url_lower + return base_url_host_matches(self._base_url_lower, "openrouter.ai") + + def _anthropic_prompt_cache_policy( + self, + *, + provider: Optional[str] = None, + base_url: Optional[str] = None, + api_mode: Optional[str] = None, + model: Optional[str] = None, + ) -> tuple[bool, bool]: + """Decide whether to apply Anthropic prompt caching and which layout to use. + + Returns ``(should_cache, use_native_layout)``: + * ``should_cache`` — inject ``cache_control`` breakpoints for this + request (applies to OpenRouter Claude, native Anthropic, and + third-party gateways that speak the native Anthropic protocol). + * ``use_native_layout`` — place markers on the *inner* content + blocks (native Anthropic accepts and requires this layout); + when False markers go on the message envelope (OpenRouter and + OpenAI-wire proxies expect the looser layout). + + Third-party providers using the native Anthropic transport + (``api_mode == 'anthropic_messages'`` + Claude-named model) get + caching with the native layout so they benefit from the same + cost reduction as direct Anthropic callers, provided their + gateway implements the Anthropic cache_control contract + (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do). + + Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct + Alibaba (DashScope) also honour Anthropic-style ``cache_control`` + markers on OpenAI-wire chat completions. Upstream pi-mono #3392 / + pi #3393 documented this for opencode-go Qwen. Without markers + these providers serve zero cache hits, re-billing the full prompt + on every turn. + """ + eff_provider = (provider if provider is not None else self.provider) or "" + eff_base_url = base_url if base_url is not None else (self.base_url or "") + eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "") + eff_model = (model if model is not None else self.model) or "" + + base_lower = eff_base_url.lower() + model_lower = eff_model.lower() + provider_lower = eff_provider.lower() + is_claude = "claude" in model_lower + is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai") + is_anthropic_wire = eff_api_mode == "anthropic_messages" + is_native_anthropic = ( + is_anthropic_wire + and (eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com") + ) + + if is_native_anthropic: + return True, True + if is_openrouter and is_claude: + return True, False + if is_anthropic_wire and is_claude: + # Third-party Anthropic-compatible gateway. + return True, True + + # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire + # transport that accepts Anthropic-style cache_control markers and + # rewards them with real cache hits. Without this branch + # qwen3.6-plus on opencode-go reports 0% cached tokens and burns + # through the subscription on every turn. + model_is_qwen = "qwen" in model_lower + provider_is_alibaba_family = provider_lower in { + "opencode", "opencode-zen", "opencode-go", "alibaba", + } + if provider_is_alibaba_family and model_is_qwen: + # Envelope layout (native_anthropic=False): markers on inner + # content parts, not top-level tool messages. Matches + # pi-mono's "alibaba" cacheControlFormat. + return True, False + + return False, False @staticmethod def _model_requires_responses_api(model: str) -> bool: @@ -2501,10 +2837,10 @@ class AIAgent: prompt = self._SKILL_REVIEW_PROMPT def _run_review(): - import contextlib, os as _os + import contextlib review_agent = None try: - with open(_os.devnull, "w") as _devnull, \ + with open(os.devnull, "w") as _devnull, \ contextlib.redirect_stdout(_devnull), \ contextlib.redirect_stderr(_devnull): review_agent = AIAgent( @@ -2634,7 +2970,7 @@ class AIAgent: role = msg.get("role", "unknown") content = msg.get("content") tool_calls_data = None - if hasattr(msg, "tool_calls") and msg.tool_calls: + if hasattr(msg, "tool_calls") and isinstance(msg.tool_calls, list) and msg.tool_calls: tool_calls_data = [ {"name": tc.function.name, "arguments": tc.function.arguments} for tc in msg.tool_calls @@ -2650,6 +2986,7 @@ class AIAgent: tool_call_id=msg.get("tool_call_id"), finish_reason=msg.get("finish_reason"), reasoning=msg.get("reasoning") if role == "assistant" else None, + reasoning_content=msg.get("reasoning_content") if role == "assistant" else None, reasoning_details=msg.get("reasoning_details") if role == "assistant" else None, codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None, ) @@ -2900,15 +3237,14 @@ class AIAgent: tag instead of dumping raw HTML. Falls back to a truncated str(error) for everything else. """ - import re as _re raw = str(error) # Cloudflare / proxy HTML pages: grab the <title> for a clean summary if "<!DOCTYPE" in raw or "<html" in raw: - m = _re.search(r"<title[^>]*>([^<]+)", raw, _re.IGNORECASE) + m = re.search(r"]*>([^<]+)", raw, re.IGNORECASE) title = m.group(1).strip() if m else "HTML error page (title not found)" # Also grab Cloudflare Ray ID if present - ray = _re.search(r"Cloudflare Ray ID:\s*]*>([^<]+)", raw) + ray = re.search(r"Cloudflare Ray ID:\s*]*>([^<]+)", raw) ray_id = ray.group(1).strip() if ray else None status_code = getattr(error, "status_code", None) parts = [] @@ -3401,7 +3737,7 @@ class AIAgent: existing = getattr(self, "_pending_steer", None) self._pending_steer = (existing + "\n" + steer_text) if existing else steer_text return - marker = f"\n\n[USER STEER (injected mid-run, not tool output): {steer_text}]" + marker = f"\n\nUser guidance: {steer_text}" existing_content = messages[target_idx].get("content", "") if not isinstance(existing_content, str): # Anthropic multimodal content blocks — preserve them and append @@ -3577,14 +3913,12 @@ class AIAgent: # 2. Clean terminal sandbox environments try: - from tools.terminal_tool import cleanup_vm cleanup_vm(task_id) except Exception: pass # 3. Clean browser daemon sessions try: - from tools.browser_tool import cleanup_browser cleanup_browser(task_id) except Exception: pass @@ -3995,27 +4329,6 @@ class AIAgent: if self._memory_store: self._memory_store.load_from_disk() - def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]: - """Convert chat-completions tool schemas to Responses function-tool schemas.""" - source_tools = tools if tools is not None else self.tools - if not source_tools: - return None - - converted: List[Dict[str, Any]] = [] - for item in source_tools: - fn = item.get("function", {}) if isinstance(item, dict) else {} - name = fn.get("name") - if not isinstance(name, str) or not name.strip(): - continue - converted.append({ - "type": "function", - "name": name, - "description": fn.get("description", ""), - "strict": False, - "parameters": fn.get("parameters", {"type": "object", "properties": {}}), - }) - return converted or None - @staticmethod def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str: """Generate a deterministic call_id from tool call content. @@ -4024,27 +4337,12 @@ class AIAgent: Deterministic IDs prevent cache invalidation — random UUIDs would make every API call's prefix unique, breaking OpenAI's prompt cache. """ - import hashlib - seed = f"{fn_name}:{arguments}:{index}" - digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12] - return f"call_{digest}" + return _codex_deterministic_call_id(fn_name, arguments, index) @staticmethod def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]: """Split a stored tool id into (call_id, response_item_id).""" - if not isinstance(raw_id, str): - return None, None - value = raw_id.strip() - if not value: - return None, None - if "|" in value: - call_id, response_item_id = value.split("|", 1) - call_id = call_id.strip() or None - response_item_id = response_item_id.strip() or None - return call_id, response_item_id - if value.startswith("fc_"): - return None, value - return value, None + return _codex_split_responses_tool_id(raw_id) def _derive_responses_function_call_id( self, @@ -4052,569 +4350,7 @@ class AIAgent: response_item_id: Optional[str] = None, ) -> str: """Build a valid Responses `function_call.id` (must start with `fc_`).""" - if isinstance(response_item_id, str): - candidate = response_item_id.strip() - if candidate.startswith("fc_"): - return candidate - - source = (call_id or "").strip() - if source.startswith("fc_"): - return source - if source.startswith("call_") and len(source) > len("call_"): - return f"fc_{source[len('call_'):]}" - - sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source) - if sanitized.startswith("fc_"): - return sanitized - if sanitized.startswith("call_") and len(sanitized) > len("call_"): - return f"fc_{sanitized[len('call_'):]}" - if sanitized: - return f"fc_{sanitized[:48]}" - - seed = source or str(response_item_id or "") or uuid.uuid4().hex - digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24] - return f"fc_{digest}" - - def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Convert internal chat-style messages to Responses input items.""" - items: List[Dict[str, Any]] = [] - seen_item_ids: set = set() - - for msg in messages: - if not isinstance(msg, dict): - continue - role = msg.get("role") - if role == "system": - continue - - if role in {"user", "assistant"}: - content = msg.get("content", "") - content_text = str(content) if content is not None else "" - - if role == "assistant": - # Replay encrypted reasoning items from previous turns - # so the API can maintain coherent reasoning chains. - codex_reasoning = msg.get("codex_reasoning_items") - has_codex_reasoning = False - if isinstance(codex_reasoning, list): - for ri in codex_reasoning: - if isinstance(ri, dict) and ri.get("encrypted_content"): - item_id = ri.get("id") - if item_id and item_id in seen_item_ids: - continue - # Strip the "id" field — with store=False the - # Responses API cannot look up items by ID and - # returns 404. The encrypted_content blob is - # self-contained for reasoning chain continuity. - replay_item = {k: v for k, v in ri.items() if k != "id"} - items.append(replay_item) - if item_id: - seen_item_ids.add(item_id) - has_codex_reasoning = True - - if content_text.strip(): - items.append({"role": "assistant", "content": content_text}) - elif has_codex_reasoning: - # The Responses API requires a following item after each - # reasoning item (otherwise: missing_following_item error). - # When the assistant produced only reasoning with no visible - # content, emit an empty assistant message as the required - # following item. - items.append({"role": "assistant", "content": ""}) - - tool_calls = msg.get("tool_calls") - if isinstance(tool_calls, list): - for tc in tool_calls: - if not isinstance(tc, dict): - continue - fn = tc.get("function", {}) - fn_name = fn.get("name") - if not isinstance(fn_name, str) or not fn_name.strip(): - continue - - embedded_call_id, embedded_response_item_id = self._split_responses_tool_id( - tc.get("id") - ) - call_id = tc.get("call_id") - if not isinstance(call_id, str) or not call_id.strip(): - call_id = embedded_call_id - if not isinstance(call_id, str) or not call_id.strip(): - if ( - isinstance(embedded_response_item_id, str) - and embedded_response_item_id.startswith("fc_") - and len(embedded_response_item_id) > len("fc_") - ): - call_id = f"call_{embedded_response_item_id[len('fc_'):]}" - else: - _raw_args = str(fn.get("arguments", "{}")) - call_id = self._deterministic_call_id(fn_name, _raw_args, len(items)) - call_id = call_id.strip() - - arguments = fn.get("arguments", "{}") - if isinstance(arguments, dict): - arguments = json.dumps(arguments, ensure_ascii=False) - elif not isinstance(arguments, str): - arguments = str(arguments) - arguments = arguments.strip() or "{}" - - items.append({ - "type": "function_call", - "call_id": call_id, - "name": fn_name, - "arguments": arguments, - }) - continue - - items.append({"role": role, "content": content_text}) - continue - - if role == "tool": - raw_tool_call_id = msg.get("tool_call_id") - call_id, _ = self._split_responses_tool_id(raw_tool_call_id) - if not isinstance(call_id, str) or not call_id.strip(): - if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip(): - call_id = raw_tool_call_id.strip() - if not isinstance(call_id, str) or not call_id.strip(): - continue - items.append({ - "type": "function_call_output", - "call_id": call_id, - "output": str(msg.get("content", "") or ""), - }) - - return items - - def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]: - if not isinstance(raw_items, list): - raise ValueError("Codex Responses input must be a list of input items.") - - normalized: List[Dict[str, Any]] = [] - seen_ids: set = set() - for idx, item in enumerate(raw_items): - if not isinstance(item, dict): - raise ValueError(f"Codex Responses input[{idx}] must be an object.") - - item_type = item.get("type") - if item_type == "function_call": - call_id = item.get("call_id") - name = item.get("name") - if not isinstance(call_id, str) or not call_id.strip(): - raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.") - if not isinstance(name, str) or not name.strip(): - raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.") - - arguments = item.get("arguments", "{}") - if isinstance(arguments, dict): - arguments = json.dumps(arguments, ensure_ascii=False) - elif not isinstance(arguments, str): - arguments = str(arguments) - arguments = arguments.strip() or "{}" - - normalized.append( - { - "type": "function_call", - "call_id": call_id.strip(), - "name": name.strip(), - "arguments": arguments, - } - ) - continue - - if item_type == "function_call_output": - call_id = item.get("call_id") - if not isinstance(call_id, str) or not call_id.strip(): - raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.") - output = item.get("output", "") - if output is None: - output = "" - if not isinstance(output, str): - output = str(output) - - normalized.append( - { - "type": "function_call_output", - "call_id": call_id.strip(), - "output": output, - } - ) - continue - - if item_type == "reasoning": - encrypted = item.get("encrypted_content") - if isinstance(encrypted, str) and encrypted: - item_id = item.get("id") - if isinstance(item_id, str) and item_id: - if item_id in seen_ids: - continue - seen_ids.add(item_id) - reasoning_item = {"type": "reasoning", "encrypted_content": encrypted} - # Do NOT include the "id" in the outgoing item — with - # store=False (our default) the API tries to resolve the - # id server-side and returns 404. The id is still used - # above for local deduplication via seen_ids. - summary = item.get("summary") - if isinstance(summary, list): - reasoning_item["summary"] = summary - else: - reasoning_item["summary"] = [] - normalized.append(reasoning_item) - continue - - role = item.get("role") - if role in {"user", "assistant"}: - content = item.get("content", "") - if content is None: - content = "" - if not isinstance(content, str): - content = str(content) - - normalized.append({"role": role, "content": content}) - continue - - raise ValueError( - f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})." - ) - - return normalized - - def _preflight_codex_api_kwargs( - self, - api_kwargs: Any, - *, - allow_stream: bool = False, - ) -> Dict[str, Any]: - if not isinstance(api_kwargs, dict): - raise ValueError("Codex Responses request must be a dict.") - - required = {"model", "instructions", "input"} - missing = [key for key in required if key not in api_kwargs] - if missing: - raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.") - - model = api_kwargs.get("model") - if not isinstance(model, str) or not model.strip(): - raise ValueError("Codex Responses request 'model' must be a non-empty string.") - model = model.strip() - - instructions = api_kwargs.get("instructions") - if instructions is None: - instructions = "" - if not isinstance(instructions, str): - instructions = str(instructions) - instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY - - normalized_input = self._preflight_codex_input_items(api_kwargs.get("input")) - - tools = api_kwargs.get("tools") - normalized_tools = None - if tools is not None: - if not isinstance(tools, list): - raise ValueError("Codex Responses request 'tools' must be a list when provided.") - normalized_tools = [] - for idx, tool in enumerate(tools): - if not isinstance(tool, dict): - raise ValueError(f"Codex Responses tools[{idx}] must be an object.") - if tool.get("type") != "function": - raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.") - - name = tool.get("name") - parameters = tool.get("parameters") - if not isinstance(name, str) or not name.strip(): - raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.") - if not isinstance(parameters, dict): - raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.") - - description = tool.get("description", "") - if description is None: - description = "" - if not isinstance(description, str): - description = str(description) - - strict = tool.get("strict", False) - if not isinstance(strict, bool): - strict = bool(strict) - - normalized_tools.append( - { - "type": "function", - "name": name.strip(), - "description": description, - "strict": strict, - "parameters": parameters, - } - ) - - store = api_kwargs.get("store", False) - if store is not False: - raise ValueError("Codex Responses contract requires 'store' to be false.") - - allowed_keys = { - "model", "instructions", "input", "tools", "store", - "reasoning", "include", "max_output_tokens", "temperature", - "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier", - "extra_headers", - } - normalized: Dict[str, Any] = { - "model": model, - "instructions": instructions, - "input": normalized_input, - "store": False, - } - if normalized_tools is not None: - normalized["tools"] = normalized_tools - - # Pass through reasoning config - reasoning = api_kwargs.get("reasoning") - if isinstance(reasoning, dict): - normalized["reasoning"] = reasoning - include = api_kwargs.get("include") - if isinstance(include, list): - normalized["include"] = include - service_tier = api_kwargs.get("service_tier") - if isinstance(service_tier, str) and service_tier.strip(): - normalized["service_tier"] = service_tier.strip() - - # Pass through max_output_tokens and temperature - max_output_tokens = api_kwargs.get("max_output_tokens") - if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0: - normalized["max_output_tokens"] = int(max_output_tokens) - temperature = api_kwargs.get("temperature") - if isinstance(temperature, (int, float)): - normalized["temperature"] = float(temperature) - - # Pass through tool_choice, parallel_tool_calls, prompt_cache_key - for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"): - val = api_kwargs.get(passthrough_key) - if val is not None: - normalized[passthrough_key] = val - - extra_headers = api_kwargs.get("extra_headers") - if extra_headers is not None: - if not isinstance(extra_headers, dict): - raise ValueError("Codex Responses request 'extra_headers' must be an object.") - normalized_headers: Dict[str, str] = {} - for key, value in extra_headers.items(): - if not isinstance(key, str) or not key.strip(): - raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.") - if value is None: - continue - normalized_headers[key.strip()] = str(value) - if normalized_headers: - normalized["extra_headers"] = normalized_headers - - if allow_stream: - stream = api_kwargs.get("stream") - if stream is not None and stream is not True: - raise ValueError("Codex Responses 'stream' must be true when set.") - if stream is True: - normalized["stream"] = True - allowed_keys.add("stream") - elif "stream" in api_kwargs: - raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.") - - unexpected = sorted(key for key in api_kwargs if key not in allowed_keys) - if unexpected: - raise ValueError( - f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}." - ) - - return normalized - - def _extract_responses_message_text(self, item: Any) -> str: - """Extract assistant text from a Responses message output item.""" - content = getattr(item, "content", None) - if not isinstance(content, list): - return "" - - chunks: List[str] = [] - for part in content: - ptype = getattr(part, "type", None) - if ptype not in {"output_text", "text"}: - continue - text = getattr(part, "text", None) - if isinstance(text, str) and text: - chunks.append(text) - return "".join(chunks).strip() - - def _extract_responses_reasoning_text(self, item: Any) -> str: - """Extract a compact reasoning text from a Responses reasoning item.""" - summary = getattr(item, "summary", None) - if isinstance(summary, list): - chunks: List[str] = [] - for part in summary: - text = getattr(part, "text", None) - if isinstance(text, str) and text: - chunks.append(text) - if chunks: - return "\n".join(chunks).strip() - text = getattr(item, "text", None) - if isinstance(text, str) and text: - return text.strip() - return "" - - def _normalize_codex_response(self, response: Any) -> tuple[Any, str]: - """Normalize a Responses API object to an assistant_message-like object.""" - output = getattr(response, "output", None) - if not isinstance(output, list) or not output: - # The Codex backend can return empty output when the answer was - # delivered entirely via stream events. Check output_text as a - # last-resort fallback before raising. - out_text = getattr(response, "output_text", None) - if isinstance(out_text, str) and out_text.strip(): - logger.debug( - "Codex response has empty output but output_text is present (%d chars); " - "synthesizing output item.", len(out_text.strip()), - ) - output = [SimpleNamespace( - type="message", role="assistant", status="completed", - content=[SimpleNamespace(type="output_text", text=out_text.strip())], - )] - response.output = output - else: - raise RuntimeError("Responses API returned no output items") - - response_status = getattr(response, "status", None) - if isinstance(response_status, str): - response_status = response_status.strip().lower() - else: - response_status = None - - if response_status in {"failed", "cancelled"}: - error_obj = getattr(response, "error", None) - if isinstance(error_obj, dict): - error_msg = error_obj.get("message") or str(error_obj) - else: - error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'" - raise RuntimeError(error_msg) - - content_parts: List[str] = [] - reasoning_parts: List[str] = [] - reasoning_items_raw: List[Dict[str, Any]] = [] - tool_calls: List[Any] = [] - has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} - saw_commentary_phase = False - saw_final_answer_phase = False - - for item in output: - item_type = getattr(item, "type", None) - item_status = getattr(item, "status", None) - if isinstance(item_status, str): - item_status = item_status.strip().lower() - else: - item_status = None - - if item_status in {"queued", "in_progress", "incomplete"}: - has_incomplete_items = True - - if item_type == "message": - item_phase = getattr(item, "phase", None) - if isinstance(item_phase, str): - normalized_phase = item_phase.strip().lower() - if normalized_phase in {"commentary", "analysis"}: - saw_commentary_phase = True - elif normalized_phase in {"final_answer", "final"}: - saw_final_answer_phase = True - message_text = self._extract_responses_message_text(item) - if message_text: - content_parts.append(message_text) - elif item_type == "reasoning": - reasoning_text = self._extract_responses_reasoning_text(item) - if reasoning_text: - reasoning_parts.append(reasoning_text) - # Capture the full reasoning item for multi-turn continuity. - # encrypted_content is an opaque blob the API needs back on - # subsequent turns to maintain coherent reasoning chains. - encrypted = getattr(item, "encrypted_content", None) - if isinstance(encrypted, str) and encrypted: - raw_item = {"type": "reasoning", "encrypted_content": encrypted} - item_id = getattr(item, "id", None) - if isinstance(item_id, str) and item_id: - raw_item["id"] = item_id - # Capture summary — required by the API when replaying reasoning items - summary = getattr(item, "summary", None) - if isinstance(summary, list): - raw_summary = [] - for part in summary: - text = getattr(part, "text", None) - if isinstance(text, str): - raw_summary.append({"type": "summary_text", "text": text}) - raw_item["summary"] = raw_summary - reasoning_items_raw.append(raw_item) - elif item_type == "function_call": - if item_status in {"queued", "in_progress", "incomplete"}: - continue - fn_name = getattr(item, "name", "") or "" - arguments = getattr(item, "arguments", "{}") - if not isinstance(arguments, str): - arguments = json.dumps(arguments, ensure_ascii=False) - raw_call_id = getattr(item, "call_id", None) - raw_item_id = getattr(item, "id", None) - embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) - call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id - if not isinstance(call_id, str) or not call_id.strip(): - call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls)) - call_id = call_id.strip() - response_item_id = raw_item_id if isinstance(raw_item_id, str) else None - response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) - tool_calls.append(SimpleNamespace( - id=call_id, - call_id=call_id, - response_item_id=response_item_id, - type="function", - function=SimpleNamespace(name=fn_name, arguments=arguments), - )) - elif item_type == "custom_tool_call": - fn_name = getattr(item, "name", "") or "" - arguments = getattr(item, "input", "{}") - if not isinstance(arguments, str): - arguments = json.dumps(arguments, ensure_ascii=False) - raw_call_id = getattr(item, "call_id", None) - raw_item_id = getattr(item, "id", None) - embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) - call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id - if not isinstance(call_id, str) or not call_id.strip(): - call_id = self._deterministic_call_id(fn_name, arguments, len(tool_calls)) - call_id = call_id.strip() - response_item_id = raw_item_id if isinstance(raw_item_id, str) else None - response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) - tool_calls.append(SimpleNamespace( - id=call_id, - call_id=call_id, - response_item_id=response_item_id, - type="function", - function=SimpleNamespace(name=fn_name, arguments=arguments), - )) - - final_text = "\n".join([p for p in content_parts if p]).strip() - if not final_text and hasattr(response, "output_text"): - out_text = getattr(response, "output_text", "") - if isinstance(out_text, str): - final_text = out_text.strip() - - assistant_message = SimpleNamespace( - content=final_text, - tool_calls=tool_calls, - reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None, - reasoning_content=None, - reasoning_details=None, - codex_reasoning_items=reasoning_items_raw or None, - ) - - if tool_calls: - finish_reason = "tool_calls" - elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): - finish_reason = "incomplete" - elif reasoning_items_raw and not final_text: - # Response contains only reasoning (encrypted thinking state) with - # no visible content or tool calls. The model is still thinking and - # needs another turn to produce the actual answer. Marking this as - # "stop" would send it into the empty-content retry loop which burns - # 3 retries then fails — treat it as incomplete instead so the Codex - # continuation path handles it correctly. - finish_reason = "incomplete" - else: - finish_reason = "stop" - return assistant_message, finish_reason + return _codex_derive_responses_function_call_id(call_id, response_item_id) def _thread_identity(self) -> str: thread = threading.current_thread() @@ -4666,6 +4402,30 @@ class AIAgent: return bool(getattr(http_client, "is_closed", False)) return False + @staticmethod + def _build_keepalive_http_client() -> Any: + try: + import httpx as _httpx + import socket as _socket + + _sock_opts = [(_socket.SOL_SOCKET, _socket.SO_KEEPALIVE, 1)] + if hasattr(_socket, "TCP_KEEPIDLE"): + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPIDLE, 30)) + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPINTVL, 10)) + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPCNT, 3)) + elif hasattr(_socket, "TCP_KEEPALIVE"): + _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30)) + # When a custom transport is provided, httpx won't auto-read proxy + # from env vars (allow_env_proxies = trust_env and transport is None). + # Explicitly read proxy settings to ensure HTTP_PROXY/HTTPS_PROXY work. + _proxy = _get_proxy_from_env() + return _httpx.Client( + transport=_httpx.HTTPTransport(socket_options=_sock_opts), + proxy=_proxy, + ) + except Exception: + return None + def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any: from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls # Treat client_kwargs as read-only. Callers pass self._client_kwargs (or shallow @@ -4706,6 +4466,27 @@ class AIAgent: self._client_log_context(), ) return client + if self.provider == "gemini": + from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url + + base_url = str(client_kwargs.get("base_url", "") or "") + if is_native_gemini_base_url(base_url): + safe_kwargs = { + k: v for k, v in client_kwargs.items() + if k in {"api_key", "base_url", "default_headers", "timeout", "http_client"} + } + if "http_client" not in safe_kwargs: + keepalive_http = self._build_keepalive_http_client() + if keepalive_http is not None: + safe_kwargs["http_client"] = keepalive_http + client = GeminiNativeClient(**safe_kwargs) + logger.info( + "Gemini native client created (%s, shared=%s) %s", + reason, + shared, + self._client_log_context(), + ) + return client # Inject TCP keepalives so the kernel detects dead provider connections # instead of letting them sit silently in CLOSE-WAIT (#10324). Without # this, a peer that drops mid-stream leaves the socket in a state where @@ -4724,23 +4505,9 @@ class AIAgent: # Tests in ``tests/run_agent/test_create_openai_client_reuse.py`` and # ``tests/run_agent/test_sequential_chats_live.py`` pin this invariant. if "http_client" not in client_kwargs: - try: - import httpx as _httpx - import socket as _socket - _sock_opts = [(_socket.SOL_SOCKET, _socket.SO_KEEPALIVE, 1)] - if hasattr(_socket, "TCP_KEEPIDLE"): - # Linux - _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPIDLE, 30)) - _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPINTVL, 10)) - _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPCNT, 3)) - elif hasattr(_socket, "TCP_KEEPALIVE"): - # macOS (uses TCP_KEEPALIVE instead of TCP_KEEPIDLE) - _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30)) - client_kwargs["http_client"] = _httpx.Client( - transport=_httpx.HTTPTransport(socket_options=_sock_opts), - ) - except Exception: - pass # Fall through to default transport if socket opts fail + keepalive_http = self._build_keepalive_http_client() + if keepalive_http is not None: + client_kwargs["http_client"] = keepalive_http client = OpenAI(**client_kwargs) logger.info( "OpenAI client created (%s, shared=%s) %s", @@ -5077,7 +4844,7 @@ class AIAgent: active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback") fallback_kwargs = dict(api_kwargs) fallback_kwargs["stream"] = True - fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True) + fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True) stream_or_response = active_client.responses.create(**fallback_kwargs) # Compatibility shim for mocks or providers that still return a concrete response. @@ -5242,31 +5009,44 @@ class AIAgent: pass try: - self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None)) + self._anthropic_client = build_anthropic_client( + new_token, + getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), + ) except Exception as exc: logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc) return False self._anthropic_api_key = new_token - # Update OAuth flag — token type may have changed (API key ↔ OAuth) + # Update OAuth flag — token type may have changed (API key ↔ OAuth). + # Only treat as OAuth on native Anthropic; third-party endpoints using + # the Anthropic protocol must not trip OAuth paths (#1739 & third-party + # identity-injection guard). from agent.anthropic_adapter import _is_oauth_token - self._is_anthropic_oauth = _is_oauth_token(new_token) + self._is_anthropic_oauth = _is_oauth_token(new_token) if self.provider == "anthropic" else False return True def _apply_client_headers_for_base_url(self, base_url: str) -> None: - from agent.auxiliary_client import _OR_HEADERS + from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS - normalized = (base_url or "").lower() - if "openrouter" in normalized: + if base_url_host_matches(base_url, "openrouter.ai"): self._client_kwargs["default_headers"] = dict(_OR_HEADERS) - elif "api.githubcopilot.com" in normalized: + elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"): + self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS) + elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers self._client_kwargs["default_headers"] = copilot_default_headers() - elif "api.kimi.com" in normalized: - self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} - elif "portal.qwen.ai" in normalized: + elif base_url_host_matches(base_url, "api.kimi.com"): + self._client_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(base_url, "portal.qwen.ai"): self._client_kwargs["default_headers"] = _qwen_portal_headers() + elif base_url_host_matches(base_url, "chatgpt.com"): + from agent.auxiliary_client import _codex_cloudflare_headers + self._client_kwargs["default_headers"] = _codex_cloudflare_headers( + self._client_kwargs.get("api_key", "") + ) else: self._client_kwargs.pop("default_headers", None) @@ -5284,8 +5064,11 @@ class AIAgent: self._anthropic_api_key = runtime_key self._anthropic_base_url = runtime_base - self._anthropic_client = build_anthropic_client(runtime_key, runtime_base) - self._is_anthropic_oauth = _is_oauth_token(runtime_key) + self._anthropic_client = build_anthropic_client( + runtime_key, runtime_base, + timeout=get_provider_request_timeout(self.provider, self.model), + ) + self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False self.api_key = runtime_key self.base_url = runtime_base return @@ -5441,18 +5224,9 @@ class AIAgent: # httpx timeout (default 1800s) with zero feedback. The stale # detector kills the connection early so the main retry loop can # apply richer recovery (credential rotation, provider fallback). - _stale_base = float(os.getenv("HERMES_API_CALL_STALE_TIMEOUT", 300.0)) - _base_url = getattr(self, "_base_url", None) or "" - if _stale_base == 300.0 and _base_url and is_local_endpoint(_base_url): - _stale_timeout = float("inf") - else: - _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4 - if _est_tokens > 100_000: - _stale_timeout = max(_stale_base, 600.0) - elif _est_tokens > 50_000: - _stale_timeout = max(_stale_base, 450.0) - else: - _stale_timeout = _stale_base + _stale_timeout = self._compute_non_stream_stale_timeout( + api_kwargs.get("messages", []) + ) _call_start = time.time() self._touch_activity("waiting for non-streaming API response") @@ -5496,6 +5270,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), ) else: rc = request_client_holder.get("client") @@ -5527,6 +5302,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), ) else: request_client = request_client_holder.get("client") @@ -5743,18 +5519,30 @@ class AIAgent: def _call_chat_completions(): """Stream a chat completions response.""" import httpx as _httpx - _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) - _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) - # Local providers (Ollama, llama.cpp, vLLM) can take minutes for - # prefill on large contexts before producing the first token. - # Auto-increase the httpx read timeout unless the user explicitly - # overrode HERMES_STREAM_READ_TIMEOUT. - if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url): - _stream_read_timeout = _base_timeout - logger.debug( - "Local provider detected (%s) — stream read timeout raised to %.0fs", - self.base_url, _stream_read_timeout, - ) + # Per-provider / per-model request_timeout_seconds (from config.yaml) + # wins over the HERMES_API_TIMEOUT env default if the user set it. + _provider_timeout_cfg = get_provider_request_timeout(self.provider, self.model) + _base_timeout = ( + _provider_timeout_cfg + if _provider_timeout_cfg is not None + else float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) + ) + # Read timeout: config wins here too. Otherwise use + # HERMES_STREAM_READ_TIMEOUT (default 120s) for cloud providers. + if _provider_timeout_cfg is not None: + _stream_read_timeout = _provider_timeout_cfg + else: + _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 120.0)) + # Local providers (Ollama, llama.cpp, vLLM) can take minutes for + # prefill on large contexts before producing the first token. + # Auto-increase the httpx read timeout unless the user explicitly + # overrode HERMES_STREAM_READ_TIMEOUT. + if _stream_read_timeout == 120.0 and self.base_url and is_local_endpoint(self.base_url): + _stream_read_timeout = _base_timeout + logger.debug( + "Local provider detected (%s) — stream read timeout raised to %.0fs", + self.base_url, _stream_read_timeout, + ) stream_kwargs = { **api_kwargs, "stream": True, @@ -6255,6 +6043,7 @@ class AIAgent: self._anthropic_client = build_anthropic_client( self._anthropic_api_key, getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), ) else: request_client = request_client_holder.get("client") @@ -6365,8 +6154,9 @@ class AIAgent: fb_base_url_hint = (fb.get("base_url") or "").strip() or None fb_api_key_hint = (fb.get("api_key") or "").strip() or None # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env - # when no explicit key is in the fallback config. - if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint: + # when no explicit key is in the fallback config. Host match + # (not substring) — see GHSA-76xc-57q6-vm5m. + if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint: fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None fb_client, _resolved_fb_model = resolve_provider_client( fb_provider, model=fb_model, raw_codex=True, @@ -6401,7 +6191,10 @@ class AIAgent: # provider-specific exceptions like Copilot gpt-5-mini on # chat completions. fb_api_mode = "codex_responses" - elif fb_provider == "bedrock" or "bedrock-runtime" in fb_base_url.lower(): + elif fb_provider == "bedrock" or ( + base_url_hostname(fb_base_url).startswith("bedrock-runtime.") + and base_url_host_matches(fb_base_url, "amazonaws.com") + ): fb_api_mode = "bedrock_converse" old_model = self.model @@ -6411,6 +6204,11 @@ class AIAgent: self.api_mode = fb_api_mode self._fallback_activated = True + # Honor per-provider / per-model request_timeout_seconds for the + # fallback target (same knob the primary client uses). None = use + # SDK default. + _fb_timeout = get_provider_request_timeout(fb_provider, fb_model) + if fb_api_mode == "anthropic_messages": # Build native Anthropic client instead of using OpenAI client from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token @@ -6418,8 +6216,10 @@ class AIAgent: self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = fb_base_url - self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url) - self._is_anthropic_oauth = _is_oauth_token(effective_key) + self._anthropic_client = build_anthropic_client( + effective_key, self._anthropic_base_url, timeout=_fb_timeout, + ) + self._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False self.client = None self._client_kwargs = {} else: @@ -6442,12 +6242,21 @@ class AIAgent: "base_url": fb_base_url, **({"default_headers": dict(fb_headers)} if fb_headers else {}), } + if _fb_timeout is not None: + self._client_kwargs["timeout"] = _fb_timeout + # Rebuild the shared OpenAI client so the configured + # timeout takes effect on the very next fallback request, + # not only after a later credential-rotation rebuild. + self._replace_primary_openai_client(reason="fallback_timeout_apply") # Re-evaluate prompt caching for the new provider/model - is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic" - self._use_prompt_caching = ( - ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()) - or is_native_anthropic + self._use_prompt_caching, self._use_native_cache_layout = ( + self._anthropic_prompt_cache_policy( + provider=fb_provider, + base_url=fb_base_url, + api_mode=fb_api_mode, + model=fb_model, + ) ) # Update context compressor limits for the fallback model. @@ -6507,6 +6316,12 @@ class AIAgent: self.api_key = rt["api_key"] self._client_kwargs = dict(rt["client_kwargs"]) self._use_prompt_caching = rt["use_prompt_caching"] + # Default to native layout when the restored snapshot predates the + # native-vs-proxy split (older sessions saved before this PR). + self._use_native_cache_layout = rt.get( + "use_native_cache_layout", + self.api_mode == "anthropic_messages" and self.provider == "anthropic", + ) # ── Rebuild client for the primary provider ── if self.api_mode == "anthropic_messages": @@ -6515,6 +6330,7 @@ class AIAgent: self._anthropic_base_url = rt["anthropic_base_url"] self._anthropic_client = build_anthropic_client( rt["anthropic_api_key"], rt["anthropic_base_url"], + timeout=get_provider_request_timeout(self.provider, self.model), ) self._is_anthropic_oauth = rt["is_anthropic_oauth"] self.client = None @@ -6611,6 +6427,7 @@ class AIAgent: self._anthropic_base_url = rt["anthropic_base_url"] self._anthropic_client = build_anthropic_client( rt["anthropic_api_key"], rt["anthropic_base_url"], + timeout=get_provider_request_timeout(self.provider, self.model), ) self._is_anthropic_oauth = rt["is_anthropic_oauth"] self.client = None @@ -6760,6 +6577,42 @@ class AIAgent: return suffix return "[A multimodal message was converted to text for Anthropic compatibility.]" + def _get_anthropic_transport(self): + """Return the cached AnthropicTransport instance (lazy singleton).""" + t = getattr(self, "_anthropic_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("anthropic_messages") + self._anthropic_transport = t + return t + + def _get_codex_transport(self): + """Return the cached ResponsesApiTransport instance (lazy singleton).""" + t = getattr(self, "_codex_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("codex_responses") + self._codex_transport = t + return t + + def _get_chat_completions_transport(self): + """Return the cached ChatCompletionsTransport instance (lazy singleton).""" + t = getattr(self, "_chat_completions_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("chat_completions") + self._chat_completions_transport = t + return t + + def _get_bedrock_transport(self): + """Return the cached BedrockTransport instance (lazy singleton).""" + t = getattr(self, "_bedrock_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("bedrock_converse") + self._bedrock_transport = t + return t + def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list: if not any( isinstance(msg, dict) and self._content_has_image_parts(msg.get("content")) @@ -6782,15 +6635,35 @@ class AIAgent: Alibaba/DashScope keeps dots (e.g. qwen3.5-plus). MiniMax keeps dots (e.g. MiniMax-M2.7). OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free). - ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1).""" - if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}: + ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1). + AWS Bedrock uses dotted inference-profile IDs + (e.g. ``global.anthropic.claude-opus-4-7``, + ``us.anthropic.claude-sonnet-4-5-20250929-v1:0``) and rejects + the hyphenated form with + ``HTTP 400 The provided model identifier is invalid``. + Regression for #11976; mirrors the opencode-go fix for #5211 + (commit f77be22c), which extended this same allowlist.""" + if (getattr(self, "provider", "") or "").lower() in { + "alibaba", "minimax", "minimax-cn", + "opencode-go", "opencode-zen", + "zai", "bedrock", + }: return True base = (getattr(self, "base_url", "") or "").lower() - return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base + return ( + "dashscope" in base + or "aliyuncs" in base + or "minimax" in base + or "opencode.ai/zen/" in base + or "bigmodel.cn" in base + # AWS Bedrock runtime endpoints — defense-in-depth when + # ``provider`` is unset but ``base_url`` still names Bedrock. + or "bedrock-runtime." in base + ) def _is_qwen_portal(self) -> bool: """Return True when the base URL targets Qwen Portal.""" - return "portal.qwen.ai" in self._base_url_lower + return base_url_host_matches(self._base_url_lower, "portal.qwen.ai") def _qwen_prepare_chat_messages(self, api_messages: list) -> list: prepared = copy.deepcopy(api_messages) @@ -6856,20 +6729,14 @@ class AIAgent: def _build_api_kwargs(self, api_messages: list) -> dict: """Build the keyword arguments dict for the active API mode.""" if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs + _transport = self._get_anthropic_transport() anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages) - # Pass context_length (total input+output window) so the adapter can - # clamp max_tokens (output cap) when the user configured a smaller - # context window than the model's native output limit. ctx_len = getattr(self, "context_compressor", None) ctx_len = ctx_len.context_length if ctx_len else None - # _ephemeral_max_output_tokens is set for one call when the API - # returns "max_tokens too large given prompt" — it caps output to - # the available window space without touching context_length. ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) if ephemeral_out is not None: self._ephemeral_max_output_tokens = None # consume immediately - return build_anthropic_kwargs( + return _transport.build_kwargs( model=self.model, messages=anthropic_messages, tools=self.tools, @@ -6885,305 +6752,144 @@ class AIAgent: # AWS Bedrock native Converse API — bypasses the OpenAI client entirely. # The adapter handles message/tool conversion and boto3 calls directly. if self.api_mode == "bedrock_converse": - from agent.bedrock_adapter import build_converse_kwargs + _bt = self._get_bedrock_transport() region = getattr(self, "_bedrock_region", None) or "us-east-1" guardrail = getattr(self, "_bedrock_guardrail_config", None) - return { - "__bedrock_converse__": True, - "__bedrock_region__": region, - **build_converse_kwargs( - model=self.model, - messages=api_messages, - tools=self.tools, - max_tokens=self.max_tokens or 4096, - temperature=None, # Let the model use its default - guardrail_config=guardrail, - ), - } + return _bt.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + max_tokens=self.max_tokens or 4096, + region=region, + guardrail_config=guardrail, + ) if self.api_mode == "codex_responses": - instructions = "" - payload_messages = api_messages - if api_messages and api_messages[0].get("role") == "system": - instructions = str(api_messages[0].get("content") or "").strip() - payload_messages = api_messages[1:] - if not instructions: - instructions = DEFAULT_AGENT_IDENTITY - + _ct = self._get_codex_transport() is_github_responses = ( - "models.github.ai" in self.base_url.lower() - or "api.githubcopilot.com" in self.base_url.lower() + base_url_host_matches(self.base_url, "models.github.ai") + or base_url_host_matches(self.base_url, "api.githubcopilot.com") ) is_codex_backend = ( self.provider == "openai-codex" - or "chatgpt.com/backend-api/codex" in self.base_url.lower() + or ( + self._base_url_hostname == "chatgpt.com" + and "/backend-api/codex" in self._base_url_lower + ) + ) + is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai" + return _ct.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + reasoning_config=self.reasoning_config, + session_id=getattr(self, "session_id", None), + max_tokens=self.max_tokens, + request_overrides=self.request_overrides, + is_github_responses=is_github_responses, + is_codex_backend=is_codex_backend, + is_xai_responses=is_xai_responses, + github_reasoning_extra=self._github_models_reasoning_extra_body() if is_github_responses else None, ) - # Resolve reasoning effort: config > default (medium) - reasoning_effort = "medium" - reasoning_enabled = True - if self.reasoning_config and isinstance(self.reasoning_config, dict): - if self.reasoning_config.get("enabled") is False: - reasoning_enabled = False - elif self.reasoning_config.get("effort"): - reasoning_effort = self.reasoning_config["effort"] + # ── chat_completions (default) ───────────────────────────────────── + _ct = self._get_chat_completions_transport() - # Clamp effort levels not supported by the Responses API model. - # GPT-5.4 supports none/low/medium/high/xhigh but not "minimal". - # "minimal" is valid on OpenRouter and GPT-5 but fails on 5.2/5.4. - _effort_clamp = {"minimal": "low"} - reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort) + # Provider detection flags + _is_qwen = self._is_qwen_portal() + _is_or = self._is_openrouter_url() + _is_gh = ( + base_url_host_matches(self._base_url_lower, "models.github.ai") + or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com") + ) + _is_nous = "nousresearch" in self._base_url_lower + _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower + _is_kimi = ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) - kwargs = { - "model": self.model, - "instructions": instructions, - "input": self._chat_messages_to_responses_input(payload_messages), - "tools": self._responses_tools(), - "tool_choice": "auto", - "parallel_tool_calls": True, - "store": False, - } - - if not is_github_responses: - kwargs["prompt_cache_key"] = self.session_id - - is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower() - - if reasoning_enabled and is_xai_responses: - # xAI reasons automatically — no effort param, just include encrypted content - kwargs["include"] = ["reasoning.encrypted_content"] - elif reasoning_enabled: - if is_github_responses: - # Copilot's Responses route advertises reasoning-effort support, - # but not OpenAI-specific prompt cache or encrypted reasoning - # fields. Keep the payload to the documented subset. - github_reasoning = self._github_models_reasoning_extra_body() - if github_reasoning is not None: - kwargs["reasoning"] = github_reasoning - else: - kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} - kwargs["include"] = ["reasoning.encrypted_content"] - elif not is_github_responses and not is_xai_responses: - kwargs["include"] = [] - - if self.request_overrides: - kwargs.update(self.request_overrides) - - if self.max_tokens is not None and not is_codex_backend: - kwargs["max_output_tokens"] = self.max_tokens - - if is_xai_responses and getattr(self, "session_id", None): - kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id} - - return kwargs - - sanitized_messages = api_messages - needs_sanitization = False - for msg in api_messages: - if not isinstance(msg, dict): - continue - if "codex_reasoning_items" in msg: - needs_sanitization = True - break - - tool_calls = msg.get("tool_calls") - if isinstance(tool_calls, list): - for tool_call in tool_calls: - if not isinstance(tool_call, dict): - continue - if "call_id" in tool_call or "response_item_id" in tool_call: - needs_sanitization = True - break - if needs_sanitization: - break - - if needs_sanitization: - sanitized_messages = copy.deepcopy(api_messages) - for msg in sanitized_messages: - if not isinstance(msg, dict): - continue - - # Codex-only replay state must not leak into strict chat-completions APIs. - msg.pop("codex_reasoning_items", None) - - tool_calls = msg.get("tool_calls") - if isinstance(tool_calls, list): - for tool_call in tool_calls: - if isinstance(tool_call, dict): - tool_call.pop("call_id", None) - tool_call.pop("response_item_id", None) - - # Qwen portal: normalize content to list-of-dicts, inject cache_control. - # Must run AFTER codex sanitization so we transform the final messages. - # If sanitization already deepcopied, reuse that copy (in-place). - if self._is_qwen_portal(): - if sanitized_messages is api_messages: - # No sanitization was done — we need our own copy. - sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages) - else: - # Already a deepcopy — transform in place to avoid a second deepcopy. - self._qwen_prepare_chat_messages_inplace(sanitized_messages) - - # GPT-5 and Codex models respond better to 'developer' than 'system' - # for instruction-following. Swap the role at the API boundary so - # internal message representation stays uniform ("system"). - _model_lower = (self.model or "").lower() - if ( - sanitized_messages - and sanitized_messages[0].get("role") == "system" - and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS) - ): - # Shallow-copy the list + first message only — rest stays shared. - sanitized_messages = list(sanitized_messages) - sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"} - - provider_preferences = {} - if self.providers_allowed: - provider_preferences["only"] = self.providers_allowed - if self.providers_ignored: - provider_preferences["ignore"] = self.providers_ignored - if self.providers_order: - provider_preferences["order"] = self.providers_order - if self.provider_sort: - provider_preferences["sort"] = self.provider_sort - if self.provider_require_parameters: - provider_preferences["require_parameters"] = True - if self.provider_data_collection: - provider_preferences["data_collection"] = self.provider_data_collection - - api_kwargs = { - "model": self.model, - "messages": sanitized_messages, - "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)), - } + # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE + # sentinel (temperature omitted entirely), a numeric override, or None. try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE + _ft = _fixed_temperature_for_model(self.model, self.base_url) + _omit_temp = _ft is OMIT_TEMPERATURE + _fixed_temp = _ft if not _omit_temp else None except Exception: - _fixed_temperature_for_model = None - if _fixed_temperature_for_model is not None: - fixed_temperature = _fixed_temperature_for_model(self.model) - if fixed_temperature is not None: - api_kwargs["temperature"] = fixed_temperature - if self._is_qwen_portal(): - api_kwargs["metadata"] = { - "sessionId": self.session_id or "hermes", - "promptId": str(uuid.uuid4()), - } - if self.tools: - api_kwargs["tools"] = self.tools + _omit_temp = False + _fixed_temp = None - # ── max_tokens for chat_completions ────────────────────────────── - # Priority: ephemeral override (error recovery / length-continuation - # boost) > user-configured max_tokens > provider-specific defaults. - _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) - if _ephemeral_out is not None: - self._ephemeral_max_output_tokens = None # consume immediately - api_kwargs.update(self._max_tokens_param(_ephemeral_out)) - elif self.max_tokens is not None: - api_kwargs.update(self._max_tokens_param(self.max_tokens)) - elif "integrate.api.nvidia.com" in self._base_url_lower: - # NVIDIA NIM defaults to a very low max_tokens when omitted, - # causing models like GLM-4.7 to truncate immediately (thinking - # tokens alone exhaust the budget). 16384 provides adequate room. - api_kwargs.update(self._max_tokens_param(16384)) - elif self._is_qwen_portal(): - # Qwen Portal defaults to a very low max_tokens when omitted. - # Reasoning models (qwen3-coder-plus) exhaust that budget on - # thinking tokens alone, causing the portal to return - # finish_reason="stop" with truncated output — the agent sees - # this as an intentional stop and exits the loop. Send 65536 - # (the documented max output for qwen3-coder models) so the - # model has adequate output budget for tool calls. - api_kwargs.update(self._max_tokens_param(65536)) - elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): - # OpenRouter and Nous Portal translate requests to Anthropic's - # Messages API, which requires max_tokens as a mandatory field. - # When we omit it, the proxy picks a default that can be too - # low — the model spends its output budget on thinking and has - # almost nothing left for the actual response (especially large - # tool calls like write_file). Sending the model's real output - # limit ensures full capacity. + # Provider preferences (OpenRouter-specific) + _prefs: Dict[str, Any] = {} + if self.providers_allowed: + _prefs["only"] = self.providers_allowed + if self.providers_ignored: + _prefs["ignore"] = self.providers_ignored + if self.providers_order: + _prefs["order"] = self.providers_order + if self.provider_sort: + _prefs["sort"] = self.provider_sort + if self.provider_require_parameters: + _prefs["require_parameters"] = True + if self.provider_data_collection: + _prefs["data_collection"] = self.provider_data_collection + + # Anthropic max output for Claude on OpenRouter/Nous + _ant_max = None + if (_is_or or _is_nous) and "claude" in (self.model or "").lower(): try: from agent.anthropic_adapter import _get_anthropic_max_output - _model_output_limit = _get_anthropic_max_output(self.model) - api_kwargs["max_tokens"] = _model_output_limit + _ant_max = _get_anthropic_max_output(self.model) except Exception: pass # fail open — let the proxy pick its default - extra_body = {} + # Qwen session metadata precomputed here (promptId is per-call random) + _qwen_meta = None + if _is_qwen: + _qwen_meta = { + "sessionId": self.session_id or "hermes", + "promptId": str(uuid.uuid4()), + } - _is_openrouter = self._is_openrouter_url() - _is_github_models = ( - "models.github.ai" in self._base_url_lower - or "api.githubcopilot.com" in self._base_url_lower + # Ephemeral max output override — consume immediately so the next + # turn doesn't inherit it. + _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if _ephemeral_out is not None: + self._ephemeral_max_output_tokens = None + + return _ct.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + timeout=self._resolved_api_call_timeout(), + max_tokens=self.max_tokens, + ephemeral_max_output_tokens=_ephemeral_out, + max_tokens_param_fn=self._max_tokens_param, + reasoning_config=self.reasoning_config, + request_overrides=self.request_overrides, + session_id=getattr(self, "session_id", None), + model_lower=(self.model or "").lower(), + is_openrouter=_is_or, + is_nous=_is_nous, + is_qwen_portal=_is_qwen, + is_github_models=_is_gh, + is_nvidia_nim=_is_nvidia, + is_kimi=_is_kimi, + is_custom_provider=self.provider == "custom", + ollama_num_ctx=self._ollama_num_ctx, + provider_preferences=_prefs or None, + qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None, + qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None, + qwen_session_metadata=_qwen_meta, + fixed_temperature=_fixed_temp, + omit_temperature=_omit_temp, + supports_reasoning=self._supports_reasoning_extra_body(), + github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None, + anthropic_max_output=_ant_max, ) - # Provider preferences (only, ignore, order, sort) are OpenRouter- - # specific. Only send to OpenRouter-compatible endpoints. - # TODO: Nous Portal will add transparent proxy support — re-enable - # for _is_nous when their backend is updated. - if provider_preferences and _is_openrouter: - extra_body["provider"] = provider_preferences - _is_nous = "nousresearch" in self._base_url_lower - - if self._supports_reasoning_extra_body(): - if _is_github_models: - github_reasoning = self._github_models_reasoning_extra_body() - if github_reasoning is not None: - extra_body["reasoning"] = github_reasoning - else: - if self.reasoning_config is not None: - rc = dict(self.reasoning_config) - # Nous Portal requires reasoning enabled — don't send - # enabled=false to it (would cause 400). - if _is_nous and rc.get("enabled") is False: - pass # omit reasoning entirely for Nous when disabled - else: - extra_body["reasoning"] = rc - else: - extra_body["reasoning"] = { - "enabled": True, - "effort": "medium" - } - - # Nous Portal product attribution - if _is_nous: - extra_body["tags"] = ["product=hermes-agent"] - - # Ollama num_ctx: override the 2048 default so the model actually - # uses the context window it was trained for. Passed via the OpenAI - # SDK's extra_body → options.num_ctx, which Ollama's OpenAI-compat - # endpoint forwards to the runner as --ctx-size. - if self._ollama_num_ctx: - options = extra_body.get("options", {}) - options["num_ctx"] = self._ollama_num_ctx - extra_body["options"] = options - - # Ollama / custom provider: pass think=false when reasoning is disabled. - # Ollama does not recognise the OpenRouter-style `reasoning` extra_body - # field, so we use its native `think` parameter instead. - # This prevents thinking-capable models (Qwen3, etc.) from generating - # blocks and producing empty-response errors when the user has - # set reasoning_effort: none. - if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict): - _effort = (self.reasoning_config.get("effort") or "").strip().lower() - _enabled = self.reasoning_config.get("enabled", True) - if _effort == "none" or _enabled is False: - extra_body["think"] = False - - if self._is_qwen_portal(): - extra_body["vl_high_resolution_images"] = True - - if extra_body: - api_kwargs["extra_body"] = extra_body - - # Priority Processing / generic request overrides (e.g. service_tier). - # Applied last so overrides win over any defaults set above. - if self.request_overrides: - api_kwargs.update(self.request_overrides) - - return api_kwargs - def _supports_reasoning_extra_body(self) -> bool: """Return True when reasoning extra_body is safe to send for this route/model. @@ -7191,11 +6897,14 @@ class AIAgent: Some providers/routes reject `reasoning` with 400s, so gate it to known reasoning-capable model families and direct Nous Portal. """ - if "nousresearch" in self._base_url_lower: + if base_url_host_matches(self._base_url_lower, "nousresearch.com"): return True - if "ai-gateway.vercel.sh" in self._base_url_lower: + if base_url_host_matches(self._base_url_lower, "ai-gateway.vercel.sh"): return True - if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower: + if ( + base_url_host_matches(self._base_url_lower, "models.github.ai") + or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com") + ): try: from hermes_cli.models import github_model_reasoning_efforts @@ -7315,6 +7024,11 @@ class AIAgent: "finish_reason": finish_reason, } + if hasattr(assistant_message, "reasoning_content"): + raw_reasoning_content = getattr(assistant_message, "reasoning_content", None) + if raw_reasoning_content is not None: + msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content) + if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: # Pass reasoning_details back unmodified so providers (OpenRouter, # Anthropic, OpenAI) can maintain reasoning continuity across turns. @@ -7389,6 +7103,30 @@ class AIAgent: return msg + def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None: + """Copy provider-facing reasoning fields onto an API replay message.""" + if source_msg.get("role") != "assistant": + return + + explicit_reasoning = source_msg.get("reasoning_content") + if isinstance(explicit_reasoning, str): + api_msg["reasoning_content"] = explicit_reasoning + return + + normalized_reasoning = source_msg.get("reasoning") + if isinstance(normalized_reasoning, str) and normalized_reasoning: + api_msg["reasoning_content"] = normalized_reasoning + return + + kimi_requires_reasoning = ( + self.provider in {"kimi-coding", "kimi-coding-cn"} + or base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) + if kimi_requires_reasoning and source_msg.get("tool_calls"): + api_msg["reasoning_content"] = "" + @staticmethod def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: """Strip Codex Responses API fields from tool_calls for strict providers. @@ -7472,10 +7210,7 @@ class AIAgent: api_messages = [] for msg in messages: api_msg = msg.copy() - if msg.get("role") == "assistant": - reasoning = msg.get("reasoning") - if reasoning: - api_msg["reasoning_content"] = reasoning + self._copy_reasoning_content_for_api(msg, api_msg) api_msg.pop("reasoning", None) api_msg.pop("finish_reason", None) api_msg.pop("_flush_sentinel", None) @@ -7503,12 +7238,19 @@ class AIAgent: from agent.auxiliary_client import ( call_llm as _call_llm, _fixed_temperature_for_model, + OMIT_TEMPERATURE, ) _aux_available = True - # Use the fixed-temperature override (e.g. kimi-for-coding → 0.6) if - # the model has a strict contract; otherwise the historical 0.3 default. - _flush_temperature = _fixed_temperature_for_model(self.model) - if _flush_temperature is None: + # Kimi models manage temperature server-side — omit it entirely. + # Other models with a fixed contract get that value; everyone else + # gets the historical 0.3 default. + _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url) + _omit_temperature = _fixed_temp is OMIT_TEMPERATURE + if _omit_temperature: + _flush_temperature = None + elif _fixed_temp is not None: + _flush_temperature = _fixed_temp + else: _flush_temperature = 0.3 try: response = _call_llm( @@ -7526,15 +7268,18 @@ class AIAgent: if not _aux_available and self.api_mode == "codex_responses": # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) - codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) - codex_kwargs["temperature"] = _flush_temperature + codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def]) + if _flush_temperature is not None: + codex_kwargs["temperature"] = _flush_temperature + else: + codex_kwargs.pop("temperature", None) if "max_output_tokens" in codex_kwargs: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) elif not _aux_available and self.api_mode == "anthropic_messages": - # Native Anthropic — use the Anthropic client directly - from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs - ant_kwargs = _build_ant_kwargs( + # Native Anthropic — use the transport for kwargs + _tflush = self._get_anthropic_transport() + ant_kwargs = _tflush.build_kwargs( model=self.model, messages=api_messages, tools=[memory_tool_def], max_tokens=5120, reasoning_config=None, @@ -7546,9 +7291,10 @@ class AIAgent: "model": self.model, "messages": api_messages, "tools": [memory_tool_def], - "temperature": _flush_temperature, **self._max_tokens_param(5120), } + if _flush_temperature is not None: + api_kwargs["temperature"] = _flush_temperature from agent.auxiliary_client import _get_task_timeout response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create( **api_kwargs, timeout=_get_task_timeout("flush_memories") @@ -7557,14 +7303,25 @@ class AIAgent: # Extract tool calls from the response, handling all API formats tool_calls = [] if self.api_mode == "codex_responses" and not _aux_available: - assistant_msg, _ = self._normalize_codex_response(response) - if assistant_msg and assistant_msg.tool_calls: - tool_calls = assistant_msg.tool_calls + _ct_flush = self._get_codex_transport() + _cnr_flush = _ct_flush.normalize_response(response) + if _cnr_flush and _cnr_flush.tool_calls: + tool_calls = [ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in _cnr_flush.tool_calls + ] elif self.api_mode == "anthropic_messages" and not _aux_available: - from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush - _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth) - if _flush_msg and _flush_msg.tool_calls: - tool_calls = _flush_msg.tool_calls + _tfn = self._get_anthropic_transport() + _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) + if _flush_nr and _flush_nr.tool_calls: + tool_calls = [ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in _flush_nr.tool_calls + ] elif hasattr(response, "choices") and response.choices: assistant_message = response.choices[0].message if assistant_message.tool_calls: @@ -7724,8 +7481,27 @@ class AIAgent: finally: self._executing_tools = False + def _dispatch_delegate_task(self, function_args: dict) -> str: + """Single call site for delegate_task dispatch. + + New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all + invocation paths (concurrent, sequential, inline). + """ + from tools.delegate_tool import delegate_task as _delegate_task + return _delegate_task( + goal=function_args.get("goal"), + context=function_args.get("context"), + toolsets=function_args.get("toolsets"), + tasks=function_args.get("tasks"), + max_iterations=function_args.get("max_iterations"), + acp_command=function_args.get("acp_command"), + acp_args=function_args.get("acp_args"), + role=function_args.get("role"), + parent_agent=self, + ) + def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str, - tool_call_id: Optional[str] = None) -> str: + tool_call_id: Optional[str] = None, messages: list = None) -> str: """Invoke a single tool and return the result string. No display logic. Handles both agent-level tools (todo, memory, etc.) and registry-dispatched @@ -7793,15 +7569,7 @@ class AIAgent: callback=self.clarify_callback, ) elif function_name == "delegate_task": - from tools.delegate_tool import delegate_task as _delegate_task - return _delegate_task( - goal=function_args.get("goal"), - context=function_args.get("context"), - toolsets=function_args.get("toolsets"), - tasks=function_args.get("tasks"), - max_iterations=function_args.get("max_iterations"), - parent_agent=self, - ) + return self._dispatch_delegate_task(function_args) else: return handle_function_call( function_name, function_args, effective_task_id, @@ -7949,8 +7717,7 @@ class AIAgent: # the tool returns True on the next poll. if self._interrupt_requested: try: - from tools.interrupt import set_interrupt as _sif - _sif(True, _worker_tid) + _set_interrupt(True, _worker_tid) except Exception: pass # Set the activity callback on THIS worker thread so @@ -7964,7 +7731,7 @@ class AIAgent: pass start = time.time() try: - result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id) + result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages) except Exception as tool_error: result = f"Error executing tool '{function_name}': {tool_error}" logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True) @@ -7981,8 +7748,7 @@ class AIAgent: with self._tool_worker_threads_lock: self._tool_worker_threads.discard(_worker_tid) try: - from tools.interrupt import set_interrupt as _sif - _sif(False, _worker_tid) + _set_interrupt(False, _worker_tid) except Exception: pass @@ -8123,6 +7889,11 @@ class AIAgent: } messages.append(tool_msg) + # ── Per-tool /steer drain ─────────────────────────────────── + # Same as the sequential path: drain between each collected + # result so the steer lands as early as possible. + self._apply_pending_steer_to_tool_results(messages, 1) + # ── Per-turn aggregate budget enforcement ───────────────────────── num_tools = len(parsed_calls) if num_tools > 0: @@ -8312,7 +8083,6 @@ class AIAgent: if self._should_emit_quiet_tool_messages(): self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}") elif function_name == "delegate_task": - from tools.delegate_tool import delegate_task as _delegate_task tasks_arg = function_args.get("tasks") if tasks_arg and isinstance(tasks_arg, list): spinner_label = f"🔀 delegating {len(tasks_arg)} tasks" @@ -8327,14 +8097,7 @@ class AIAgent: self._delegate_spinner = spinner _delegate_result = None try: - function_result = _delegate_task( - goal=function_args.get("goal"), - context=function_args.get("context"), - toolsets=function_args.get("toolsets"), - tasks=tasks_arg, - max_iterations=function_args.get("max_iterations"), - parent_agent=self, - ) + function_result = self._dispatch_delegate_task(function_args) _delegate_result = function_result finally: self._delegate_spinner = None @@ -8347,7 +8110,7 @@ class AIAgent: elif self._context_engine_tool_names and function_name in self._context_engine_tool_names: # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.) spinner = None - if self.quiet_mode and not self.tool_progress_callback: + if self._should_emit_quiet_tool_messages(): face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) preview = _build_tool_preview(function_name, function_args) or function_name @@ -8365,7 +8128,7 @@ class AIAgent: cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result) if spinner: spinner.stop(cute_msg) - elif self.quiet_mode: + elif self._should_emit_quiet_tool_messages(): self._vprint(f" {cute_msg}") elif self._memory_manager and self._memory_manager.has_tool(function_name): # Memory provider tools (hindsight_retain, honcho_search, etc.) @@ -8486,6 +8249,12 @@ class AIAgent: } messages.append(tool_msg) + # ── Per-tool /steer drain ─────────────────────────────────── + # Drain pending steer BETWEEN individual tool calls so the + # injection lands as soon as a tool finishes — not after the + # entire batch. The model sees it on the next API iteration. + self._apply_pending_steer_to_tool_results(messages, 1) + if not self.quiet_mode: if self.verbose_logging: print(f" ✅ Tool {i} completed in {tool_duration:.2f}s") @@ -8559,14 +8328,17 @@ class AIAgent: summary_extra_body = {} try: - from agent.auxiliary_client import _fixed_temperature_for_model + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE as _OMIT_TEMP except Exception: _fixed_temperature_for_model = None - _summary_temperature = ( - _fixed_temperature_for_model(self.model) + _OMIT_TEMP = None + _raw_summary_temp = ( + _fixed_temperature_for_model(self.model, self.base_url) if _fixed_temperature_for_model is not None else None ) + _omit_summary_temperature = _raw_summary_temp is _OMIT_TEMP + _summary_temperature = None if _omit_summary_temperature else _raw_summary_temp _is_nous = "nousresearch" in self._base_url_lower if self._supports_reasoning_extra_body(): if self.reasoning_config is not None: @@ -8583,8 +8355,9 @@ class AIAgent: codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs.pop("tools", None) summary_response = self._run_codex_stream(codex_kwargs) - assistant_message, _ = self._normalize_codex_response(summary_response) - final_response = (assistant_message.content or "").strip() if assistant_message else "" + _ct_sum = self._get_codex_transport() + _cnr_sum = _ct_sum.normalize_response(summary_response) + final_response = (_cnr_sum.content or "").strip() else: summary_kwargs = { "model": self.model, @@ -8612,14 +8385,14 @@ class AIAgent: summary_kwargs["extra_body"] = summary_extra_body if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar - _ant_kw = _bak(model=self.model, messages=api_messages, tools=None, + _tsum = self._get_anthropic_transport() + _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None, max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, is_oauth=self._is_anthropic_oauth, preserve_dots=self._anthropic_preserve_dots()) summary_response = self._anthropic_messages_create(_ant_kw) - _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth) - final_response = (_msg.content or "").strip() + _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth) + final_response = (_sum_nr.content or "").strip() else: summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs) @@ -8641,17 +8414,18 @@ class AIAgent: codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs.pop("tools", None) retry_response = self._run_codex_stream(codex_kwargs) - retry_msg, _ = self._normalize_codex_response(retry_response) - final_response = (retry_msg.content or "").strip() if retry_msg else "" + _ct_retry = self._get_codex_transport() + _cnr_retry = _ct_retry.normalize_response(retry_response) + final_response = (_cnr_retry.content or "").strip() elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2 - _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None, + _tretry = self._get_anthropic_transport() + _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None, is_oauth=self._is_anthropic_oauth, max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, preserve_dots=self._anthropic_preserve_dots()) retry_response = self._anthropic_messages_create(_ant_kw2) - _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth) - final_response = (_retry_msg.content or "").strip() + _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth) + final_response = (_retry_nr.content or "").strip() else: summary_kwargs = { "model": self.model, @@ -8753,6 +8527,11 @@ class AIAgent: self._persist_user_message_override = persist_user_message # Generate unique task_id if not provided to isolate VMs between concurrent tasks effective_task_id = task_id or str(uuid.uuid4()) + # Expose the active task_id so tools running mid-turn (e.g. delegate_task + # in delegate_tool.py) can identify this agent for the cross-agent file + # state registry. Set BEFORE any tool dispatch so snapshots taken at + # child-launch time see the parent's real id, not None. + self._current_task_id = effective_task_id # Reset retry counters and iteration budget at the start of each turn # so subagent usage from a previous turn doesn't eat into the next one. @@ -8793,7 +8572,8 @@ class AIAgent: self.iteration_budget = IterationBudget(self.max_iterations) # Log conversation turn start for debugging/observability - _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message + _preview_text = _summarize_user_message_for_log(user_message) + _msg_preview = (_preview_text[:80] + "...") if len(_preview_text) > 80 else _preview_text _msg_preview = _msg_preview.replace("\n", " ") logger.info( "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r", @@ -8841,7 +8621,8 @@ class AIAgent: self._persist_user_message_idx = current_turn_user_idx if not self.quiet_mode: - self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'") + _print_preview = _summarize_user_message_for_log(user_message) + self._safe_print(f"💬 Starting conversation: '{_print_preview[:60]}{'...' if len(_print_preview) > 60 else ''}'") # ── System prompt (cached per session for prefix caching) ── # Built once on first call, reused for all subsequent calls. @@ -9110,6 +8891,56 @@ class AIAgent: and "skill_manage" in self.valid_tool_names): self._iters_since_skill += 1 + # ── Pre-API-call /steer drain ────────────────────────────────── + # If a /steer arrived during the previous API call (while the model + # was thinking), drain it now — before we build api_messages — so + # the model sees the steer text on THIS iteration. Without this, + # steers sent during an API call only land after the NEXT tool batch, + # which may never come if the model returns a final response. + # + # We scan backwards for the last tool-role message in the messages + # list. If found, the steer is appended there. If not (first + # iteration, no tools yet), the steer stays pending for the next + # tool batch — injecting into a user message would break role + # alternation, and there's no tool output to piggyback on. + _pre_api_steer = self._drain_pending_steer() + if _pre_api_steer: + _injected = False + for _si in range(len(messages) - 1, -1, -1): + _sm = messages[_si] + if isinstance(_sm, dict) and _sm.get("role") == "tool": + marker = f"\n\nUser guidance: {_pre_api_steer}" + existing = _sm.get("content", "") + if isinstance(existing, str): + _sm["content"] = existing + marker + else: + # Multimodal content blocks — append text block + try: + blocks = list(existing) if existing else [] + blocks.append({"type": "text", "text": marker}) + _sm["content"] = blocks + except Exception: + pass + _injected = True + logger.debug( + "Pre-API-call steer drain: injected into tool msg at index %d", + _si, + ) + break + if not _injected: + # No tool message to inject into — put it back so + # the post-tool-execution drain picks it up later. + _lock = getattr(self, "_pending_steer_lock", None) + if _lock is not None: + with _lock: + if self._pending_steer: + self._pending_steer = self._pending_steer + "\n" + _pre_api_steer + else: + self._pending_steer = _pre_api_steer + else: + existing = getattr(self, "_pending_steer", None) + self._pending_steer = (existing + "\n" + _pre_api_steer) if existing else _pre_api_steer + # Prepare messages for API call # If we have an ephemeral system prompt, prepend it to the messages # Note: Reasoning is embedded in content via tags for trajectory storage. @@ -9139,11 +8970,7 @@ class AIAgent: # For ALL assistant messages, pass reasoning back to the API # This ensures multi-turn reasoning context is preserved - if msg.get("role") == "assistant": - reasoning_text = msg.get("reasoning") - if reasoning_text: - # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter) - api_msg["reasoning_content"] = reasoning_text + self._copy_reasoning_content_for_api(msg, api_msg) # Remove 'reasoning' field - it's for trajectory storage only # We've copied it to 'reasoning_content' for the API above @@ -9185,12 +9012,19 @@ class AIAgent: for idx, pfm in enumerate(self.prefill_messages): api_messages.insert(sys_offset + idx, pfm.copy()) - # Apply Anthropic prompt caching for Claude models via OpenRouter. - # Auto-detected: if model name contains "claude" and base_url is OpenRouter, - # inject cache_control breakpoints (system + last 3 messages) to reduce - # input token costs by ~75% on multi-turn conversations. + # Apply Anthropic prompt caching for Claude models on native + # Anthropic, OpenRouter, and third-party Anthropic-compatible + # gateways. Auto-detected: if ``_use_prompt_caching`` is set, + # inject cache_control breakpoints (system + last 3 messages) + # to reduce input token costs by ~75% on multi-turn + # conversations. Layout is chosen per endpoint by + # ``_anthropic_prompt_cache_policy``. if self._use_prompt_caching: - api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages')) + api_messages = apply_anthropic_cache_control( + api_messages, + cache_ttl=self._cache_ttl, + native_anthropic=self._use_native_cache_layout, + ) # Safety net: strip orphaned tool results / add stubs for missing # results before sending to the API. Runs unconditionally — not @@ -9224,7 +9058,10 @@ class AIAgent: ), }} except Exception: - pass + tc["function"]["arguments"] = _repair_tool_call_arguments( + tc["function"]["arguments"], + tc["function"].get("name", "?"), + ) new_tcs.append(tc) am["tool_calls"] = new_tcs @@ -9337,7 +9174,7 @@ class AIAgent: if self._force_ascii_payload: _sanitize_structure_non_ascii(api_kwargs) if self.api_mode == "codex_responses": - api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) + api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False) try: from hermes_cli.plugins import invoke_hook as _invoke_hook @@ -9425,51 +9262,53 @@ class AIAgent: response_invalid = False error_details = [] if self.api_mode == "codex_responses": - output_items = getattr(response, "output", None) if response is not None else None - if response is None: - response_invalid = True - error_details.append("response is None") - elif not isinstance(output_items, list): - response_invalid = True - error_details.append("response.output is not a list") - elif not output_items: - # Stream backfill may have failed, but - # _normalize_codex_response can still recover - # from response.output_text. Only mark invalid - # when that fallback is also absent. - _out_text = getattr(response, "output_text", None) - _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" - if _out_text_stripped: - logger.debug( - "Codex response.output is empty but output_text is present " - "(%d chars); deferring to normalization.", - len(_out_text_stripped), - ) - else: - _resp_status = getattr(response, "status", None) - _resp_incomplete = getattr(response, "incomplete_details", None) - logger.warning( - "Codex response.output is empty after stream backfill " - "(status=%s, incomplete_details=%s, model=%s). %s", - _resp_status, _resp_incomplete, - getattr(response, "model", None), - f"api_mode={self.api_mode} provider={self.provider}", - ) + _ct_v = self._get_codex_transport() + if not _ct_v.validate_response(response): + if response is None: response_invalid = True - error_details.append("response.output is empty") + error_details.append("response is None") + else: + # output_text fallback: stream backfill may have failed + # but normalize can still recover from output_text + _out_text = getattr(response, "output_text", None) + _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" + if _out_text_stripped: + logger.debug( + "Codex response.output is empty but output_text is present " + "(%d chars); deferring to normalization.", + len(_out_text_stripped), + ) + else: + _resp_status = getattr(response, "status", None) + _resp_incomplete = getattr(response, "incomplete_details", None) + logger.warning( + "Codex response.output is empty after stream backfill " + "(status=%s, incomplete_details=%s, model=%s). %s", + _resp_status, _resp_incomplete, + getattr(response, "model", None), + f"api_mode={self.api_mode} provider={self.provider}", + ) + response_invalid = True + error_details.append("response.output is empty") elif self.api_mode == "anthropic_messages": - content_blocks = getattr(response, "content", None) if response is not None else None - if response is None: + _tv = self._get_anthropic_transport() + if not _tv.validate_response(response): response_invalid = True - error_details.append("response is None") - elif not isinstance(content_blocks, list): + if response is None: + error_details.append("response is None") + else: + error_details.append("response.content invalid (not a non-empty list)") + elif self.api_mode == "bedrock_converse": + _btv = self._get_bedrock_transport() + if not _btv.validate_response(response): response_invalid = True - error_details.append("response.content is not a list") - elif not content_blocks: - response_invalid = True - error_details.append("response.content is empty") + if response is None: + error_details.append("response is None") + else: + error_details.append("Bedrock response invalid (no output or choices)") else: - if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices: + _ctv = self._get_chat_completions_transport() + if not _ctv.validate_response(response): response_invalid = True if response is None: error_details.append("response is None") @@ -9628,8 +9467,12 @@ class AIAgent: else: finish_reason = "stop" elif self.api_mode == "anthropic_messages": - stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"} - finish_reason = stop_reason_map.get(response.stop_reason, "stop") + _tfr = self._get_anthropic_transport() + finish_reason = _tfr.map_finish_reason(response.stop_reason) + elif self.api_mode == "bedrock_converse": + # Bedrock response is already normalized at dispatch — finish_reason + # is already in OpenAI format via normalize_converse_response() + finish_reason = response.choices[0].finish_reason if hasattr(response, "choices") and response.choices else "stop" else: finish_reason = response.choices[0].finish_reason assistant_message = response.choices[0].message @@ -9647,25 +9490,44 @@ class AIAgent: if finish_reason == "length": self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True) + # Normalize the truncated response to a single OpenAI-style + # message shape so text-continuation and tool-call retry + # work uniformly across chat_completions, bedrock_converse, + # and anthropic_messages. For Anthropic we use the same + # adapter the agent loop already relies on so the rebuilt + # interim assistant message is byte-identical to what + # would have been appended in the non-truncated path. + _trunc_msg = None + if self.api_mode in ("chat_completions", "bedrock_converse"): + _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None + elif self.api_mode == "anthropic_messages": + _trunc_nr = self._get_anthropic_transport().normalize_response( + response, strip_tool_prefix=self._is_anthropic_oauth + ) + _trunc_msg = SimpleNamespace( + content=_trunc_nr.content, + tool_calls=[ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in (_trunc_nr.tool_calls or []) + ] or None, + reasoning=_trunc_nr.reasoning, + reasoning_content=None, + reasoning_details=( + _trunc_nr.provider_data.get("reasoning_details") + if _trunc_nr.provider_data else None + ), + ) + + _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None + _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False + # ── Detect thinking-budget exhaustion ────────────── # When the model spends ALL output tokens on reasoning # and has none left for the response, continuation # retries are pointless. Detect this early and give a # targeted error instead of wasting 3 API calls. - _trunc_content = None - _trunc_has_tool_calls = False - if self.api_mode in ("chat_completions", "bedrock_converse"): - _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None - _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None - _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False - elif self.api_mode == "anthropic_messages": - # Anthropic response.content is a list of blocks - _text_parts = [] - for _blk in getattr(response, "content", []): - if getattr(_blk, "type", None) == "text": - _text_parts.append(getattr(_blk, "text", "")) - _trunc_content = "\n".join(_text_parts) if _text_parts else None - # A response is "thinking exhausted" only when the model # actually produced reasoning blocks but no visible text after # them. Models that do not use tags (e.g. GLM-4.7 on @@ -9722,9 +9584,9 @@ class AIAgent: "error": _exhaust_error, } - if self.api_mode in ("chat_completions", "bedrock_converse"): - assistant_message = response.choices[0].message - if not assistant_message.tool_calls: + if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assistant_message = _trunc_msg + if assistant_message is not None and not _trunc_has_tool_calls: length_continue_retries += 1 interim_msg = self._build_assistant_message(assistant_message, finish_reason) messages.append(interim_msg) @@ -9762,9 +9624,9 @@ class AIAgent: "error": "Response remained truncated after 3 continuation attempts", } - if self.api_mode in ("chat_completions", "bedrock_converse"): - assistant_message = response.choices[0].message - if assistant_message.tool_calls: + if self.api_mode in ("chat_completions", "bedrock_converse", "anthropic_messages"): + assistant_message = _trunc_msg + if assistant_message is not None and _trunc_has_tool_calls: if truncated_tool_call_retries < 1: truncated_tool_call_retries += 1 self._vprint( @@ -9905,6 +9767,7 @@ class AIAgent: billing_mode="subscription_included" if cost_result.status == "included" else None, model=self.model, + api_call_count=1, ) except Exception: pass # never block the agent loop @@ -9912,21 +9775,27 @@ class AIAgent: if self.verbose_logging: logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") - # Log cache hit stats when prompt caching is active - if self._use_prompt_caching: - if self.api_mode == "anthropic_messages": - # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens - cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0 - written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0 - else: - # OpenRouter uses prompt_tokens_details.cached_tokens - details = getattr(response.usage, 'prompt_tokens_details', None) - cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0 - written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0 - prompt = usage_dict["prompt_tokens"] + # Surface cache hit stats for any provider that reports + # them — not just those where we inject cache_control + # markers. OpenAI/Kimi/DeepSeek/Qwen all do automatic + # server-side prefix caching and return + # ``prompt_tokens_details.cached_tokens``; users + # previously could not see their cache % because this + # line was gated on ``_use_prompt_caching``, which is + # only True for Anthropic-style marker injection. + # ``canonical_usage`` is already normalised from all + # three API shapes (Anthropic / Codex / OpenAI-chat) + # so we can rely on its values directly. + cached = canonical_usage.cache_read_tokens + written = canonical_usage.cache_write_tokens + prompt = usage_dict["prompt_tokens"] + if (cached or written) and not self.quiet_mode: hit_pct = (cached / prompt * 100) if prompt > 0 else 0 - if not self.quiet_mode: - self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") + self._vprint( + f"{self.log_prefix} 💾 Cache: " + f"{cached:,}/{prompt:,} tokens " + f"({hit_pct:.0f}% hit, {written:,} written)" + ) has_retried_429 = False # Reset on success # Clear Nous rate limit state on successful request — @@ -10175,6 +10044,27 @@ class AIAgent: if self._try_refresh_nous_client_credentials(force=True): print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...") continue + # Credential refresh didn't help — show diagnostic info. + # Most common causes: Portal OAuth expired/revoked, + # account out of credits, or agent key blocked. + from hermes_constants import display_hermes_home as _dhh_fn + _dhh = _dhh_fn() + _body_text = "" + try: + _body = getattr(api_error, "body", None) or getattr(api_error, "response", None) + if _body is not None: + _body_text = str(_body)[:200] + except Exception: + pass + print(f"{self.log_prefix}🔐 Nous 401 — Portal authentication failed.") + if _body_text: + print(f"{self.log_prefix} Response: {_body_text}") + print(f"{self.log_prefix} Most likely: Portal OAuth expired, account out of credits, or agent key revoked.") + print(f"{self.log_prefix} Troubleshooting:") + print(f"{self.log_prefix} • Re-authenticate: hermes login --provider nous") + print(f"{self.log_prefix} • Check credits / billing: https://portal.nousresearch.com") + print(f"{self.log_prefix} • Verify stored credentials: {_dhh}/auth.json") + print(f"{self.log_prefix} • Switch providers temporarily: /model --provider openrouter") if ( self.api_mode == "anthropic_messages" and status_code == 401 @@ -10665,7 +10555,7 @@ class AIAgent: self._vprint(f"{self.log_prefix} 💡 Your API key was rejected by the provider. Check:", force=True) self._vprint(f"{self.log_prefix} • Is the key valid? Run: hermes setup", force=True) self._vprint(f"{self.log_prefix} • Does your account have access to {_model}?", force=True) - if "openrouter" in str(_base).lower(): + if base_url_host_matches(str(_base), "openrouter.ai"): self._vprint(f"{self.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True) else: self._vprint(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True) @@ -10860,12 +10750,66 @@ class AIAgent: try: if self.api_mode == "codex_responses": - assistant_message, finish_reason = self._normalize_codex_response(response) + _ct = self._get_codex_transport() + _cnr = _ct.normalize_response(response) + # Back-compat shim: downstream expects SimpleNamespace with + # codex-specific fields (.codex_reasoning_items, .reasoning_details, + # and .call_id/.response_item_id on tool calls). + _tc_list = None + if _cnr.tool_calls: + _tc_list = [] + for tc in _cnr.tool_calls: + _tc_ns = SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) + if tc.provider_data: + if tc.provider_data.get("call_id"): + _tc_ns.call_id = tc.provider_data["call_id"] + if tc.provider_data.get("response_item_id"): + _tc_ns.response_item_id = tc.provider_data["response_item_id"] + _tc_list.append(_tc_ns) + assistant_message = SimpleNamespace( + content=_cnr.content, + tool_calls=_tc_list or None, + reasoning=_cnr.reasoning, + reasoning_content=None, + codex_reasoning_items=( + _cnr.provider_data.get("codex_reasoning_items") + if _cnr.provider_data else None + ), + reasoning_details=( + _cnr.provider_data.get("reasoning_details") + if _cnr.provider_data else None + ), + ) + finish_reason = _cnr.finish_reason elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import normalize_anthropic_response - assistant_message, finish_reason = normalize_anthropic_response( + _transport = self._get_anthropic_transport() + _nr = _transport.normalize_response( response, strip_tool_prefix=self._is_anthropic_oauth ) + # Back-compat shim: downstream code expects SimpleNamespace with + # .content, .tool_calls, .reasoning, .reasoning_content, + # .reasoning_details attributes. + assistant_message = SimpleNamespace( + content=_nr.content, + tool_calls=[ + SimpleNamespace( + id=tc.id, + type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) + for tc in (_nr.tool_calls or []) + ] or None, + reasoning=_nr.reasoning, + reasoning_content=None, + reasoning_details=( + _nr.provider_data.get("reasoning_details") + if _nr.provider_data else None + ), + ) + finish_reason = _nr.finish_reason else: assistant_message = response.choices[0].message @@ -11209,17 +11153,10 @@ class AIAgent: self._last_content_tools_all_housekeeping = _all_housekeeping if _all_housekeeping and self._has_stream_consumers(): self._mute_post_response = True - elif self.quiet_mode: + elif self._should_emit_quiet_tool_messages(): clean = self._strip_think_blocks(turn_content).strip() if clean: - relayed = False - if ( - self.tool_progress_callback - and getattr(self, "platform", "") == "tui" - ): - relayed = True - if not relayed: - self._vprint(f" ┊ 💬 {clean}") + self._vprint(f" ┊ 💬 {clean}") # Pop thinking-only prefill message(s) before appending # (tool-call path — same rationale as the final-response path). @@ -11300,10 +11237,12 @@ class AIAgent: # should_compress(0) never fires. (#2153) _compressor = self.context_compressor if _compressor.last_prompt_tokens > 0: - _real_tokens = ( - _compressor.last_prompt_tokens - + _compressor.last_completion_tokens - ) + # Only use prompt_tokens — completion/reasoning + # tokens don't consume context window space. + # Thinking models (GLM-5.1, QwQ, DeepSeek R1) + # inflate completion_tokens with reasoning, + # causing premature compression. (#12026) + _real_tokens = _compressor.last_prompt_tokens else: _real_tokens = estimate_messages_tokens_rough(messages) @@ -11702,8 +11641,9 @@ class AIAgent: # Determine if conversation completed successfully completed = final_response is not None and api_call_count < self.max_iterations - # Save trajectory if enabled - self._save_trajectory(messages, user_message, completed) + # Save trajectory if enabled. ``user_message`` may be a multimodal + # list of parts; the trajectory format wants a plain string. + self._save_trajectory(messages, _summarize_user_message_for_log(user_message), completed) # Clean up VM and browser for this task after conversation completes self._cleanup_task_resources(effective_task_id) @@ -11936,7 +11876,7 @@ def main( # Handle tool listing if list_tools: - from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets + from model_tools import get_all_tool_names, get_available_toolsets from toolsets import get_all_toolsets, get_toolset_info print("📋 Available Tools & Toolsets:") diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 80ed53cce8..144113d5a0 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -630,7 +630,7 @@ function Copy-ConfigTemplates { New-Item -ItemType Directory -Force -Path "$HermesHome\audio_cache" | Out-Null New-Item -ItemType Directory -Force -Path "$HermesHome\memories" | Out-Null New-Item -ItemType Directory -Force -Path "$HermesHome\skills" | Out-Null - New-Item -ItemType Directory -Force -Path "$HermesHome\whatsapp\session" | Out-Null + # Create .env $envPath = "$HermesHome\.env" @@ -735,19 +735,7 @@ function Install-NodeDeps { Pop-Location } - # Install WhatsApp bridge dependencies - $bridgeDir = "$InstallDir\scripts\whatsapp-bridge" - if (Test-Path "$bridgeDir\package.json") { - Write-Info "Installing WhatsApp bridge dependencies..." - Push-Location $bridgeDir - try { - npm install --silent 2>&1 | Out-Null - Write-Success "WhatsApp bridge dependencies installed" - } catch { - Write-Warn "WhatsApp bridge npm install failed (WhatsApp may not work)" - } - Pop-Location - } + Pop-Location } diff --git a/scripts/install.sh b/scripts/install.sh index c6524cefcb..166d984fac 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -297,7 +297,7 @@ check_python() { if command -v python >/dev/null 2>&1; then PYTHON_PATH="$(command -v python)" if "$PYTHON_PATH" -c 'import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)' 2>/dev/null; then - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python found: $PYTHON_FOUND_VERSION" return 0 fi @@ -306,7 +306,7 @@ check_python() { log_info "Installing Python via pkg..." pkg install -y python >/dev/null PYTHON_PATH="$(command -v python)" - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python installed: $PYTHON_FOUND_VERSION" return 0 fi @@ -315,18 +315,17 @@ check_python() { # Let uv handle Python — it can download and manage Python versions # First check if a suitable Python is already available - if $UV_CMD python find "$PYTHON_VERSION" &> /dev/null; then - PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION") - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python found: $PYTHON_FOUND_VERSION" return 0 fi # Python not found — use uv to install it (no sudo needed!) log_info "Python $PYTHON_VERSION not found, installing via uv..." - if $UV_CMD python install "$PYTHON_VERSION"; then - PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION") - PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null) + if "$UV_CMD" python install "$PYTHON_VERSION"; then + PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")" + PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)" log_success "Python installed: $PYTHON_FOUND_VERSION" else log_error "Failed to install Python $PYTHON_VERSION" @@ -1052,7 +1051,7 @@ copy_config_templates() { log_info "Setting up configuration files..." # Create ~/.hermes directory structure (config at top level, code in subdir) - mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills,whatsapp/session} + mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills} # Create .env at ~/.hermes/.env (top level, easy to find) if [ ! -f "$HERMES_HOME/.env" ]; then @@ -1122,7 +1121,7 @@ install_node_deps() { if [ "$DISTRO" = "termux" ]; then log_info "Skipping automatic Node/browser dependency setup on Termux" - log_info "Browser automation and WhatsApp bridge are not part of the tested Termux install path yet." + log_info "Browser automation is not part of the tested Termux install path yet." log_info "If you want to experiment manually later, run: cd $INSTALL_DIR && npm install" return 0 fi @@ -1204,15 +1203,7 @@ install_node_deps() { log_success "TUI dependencies installed" fi - # Install WhatsApp bridge dependencies - if [ -f "$INSTALL_DIR/scripts/whatsapp-bridge/package.json" ]; then - log_info "Installing WhatsApp bridge dependencies..." - cd "$INSTALL_DIR/scripts/whatsapp-bridge" - npm install --silent 2>/dev/null || { - log_warn "WhatsApp bridge npm install failed (WhatsApp may not work)" - } - log_success "WhatsApp bridge dependencies installed" - fi + } run_setup_wizard() { diff --git a/scripts/release.py b/scripts/release.py index 90c2a13d0b..8d213ea070 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -44,15 +44,24 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1", # contributors (from noreply pattern) + "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243", "snreynolds2506@gmail.com": "snreynolds", "35742124+0xbyt4@users.noreply.github.com": "0xbyt4", + "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo", + "massivemassimo@users.noreply.github.com": "MassiveMassimo", "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor", + "keifergu@tencent.com": "keifergu", "kshitijk4poor@users.noreply.github.com": "kshitijk4poor", + "abner.the.foreman@agentmail.to": "Abnertheforeman", + "harryykyle1@gmail.com": "hharry11", "kshitijk4poor@gmail.com": "kshitijk4poor", "16443023+stablegenius49@users.noreply.github.com": "stablegenius49", "185121704+stablegenius49@users.noreply.github.com": "stablegenius49", "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit", "valdi.jorge@gmail.com": "jvcl", + "francip@gmail.com": "francip", + "omni@comelse.com": "omnissiah-comelse", + "oussama.redcode@gmail.com": "mavrickdeveloper", "126368201+vilkasdev@users.noreply.github.com": "vilkasdev", "137614867+cutepawss@users.noreply.github.com": "cutepawss", "96793918+memosr@users.noreply.github.com": "memosr", @@ -66,7 +75,10 @@ AUTHOR_MAP = { "104278804+Sertug17@users.noreply.github.com": "Sertug17", "112503481+caentzminger@users.noreply.github.com": "caentzminger", "258577966+voidborne-d@users.noreply.github.com": "voidborne-d", + "sir_even@icloud.com": "sirEven", + "36056348+sirEven@users.noreply.github.com": "sirEven", "70424851+insecurejezza@users.noreply.github.com": "insecurejezza", + "254021826+dodo-reach@users.noreply.github.com": "dodo-reach", "259807879+Bartok9@users.noreply.github.com": "Bartok9", "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter", "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1", @@ -76,32 +88,62 @@ AUTHOR_MAP = { "39405770+yyq4193@users.noreply.github.com": "yyq4193", "Asunfly@users.noreply.github.com": "Asunfly", "2500400+honghua@users.noreply.github.com": "honghua", + "462836+jplew@users.noreply.github.com": "jplew", "nish3451@users.noreply.github.com": "nish3451", + "Mibayy@users.noreply.github.com": "Mibayy", + "mibayy@users.noreply.github.com": "Mibayy", + "135070653+sgaofen@users.noreply.github.com": "sgaofen", + "nocoo@users.noreply.github.com": "nocoo", + "30841158+n-WN@users.noreply.github.com": "n-WN", + "tsuijinglei@gmail.com": "hiddenpuppy", + "jerome@clawwork.ai": "HiddenPuppy", + "leoyuan0099@gmail.com": "keyuyuan", + "bxzt2006@163.com": "Only-Code-A", + "i@troy-y.org": "TroyMitchell911", + "mygamez@163.com": "zhongyueming1121", + "hansnow@users.noreply.github.com": "hansnow", + "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY", + "ben.burtenshaw@gmail.com": "burtenshaw", + "roopaknijhara@gmail.com": "rnijhara", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", "dmayhem93@gmail.com": "dmahan93", + "fr@tecompanytea.com": "ifrederico", + "cdanis@gmail.com": "cdanis", "samherring99@gmail.com": "samherring99", "desaiaum08@gmail.com": "Aum08Desai", "shannon.sands.1979@gmail.com": "shannonsands", "shannon@nousresearch.com": "shannonsands", + "abdi.moya@gmail.com": "AxDSan", "eri@plasticlabs.ai": "Erosika", "hjcpuro@gmail.com": "hjc-puro", "xaydinoktay@gmail.com": "aydnOktay", "abdullahfarukozden@gmail.com": "Farukest", "lovre.pesut@gmail.com": "rovle", + "xjtumj@gmail.com": "mengjian-github", "kevinskysunny@gmail.com": "kevinskysunny", "xiewenxuan462@gmail.com": "yule975", "yiweimeng.dlut@hotmail.com": "meng93", "hakanerten02@hotmail.com": "teyrebaz33", + "linux2010@users.noreply.github.com": "Linux2010", + "elmatadorgh@users.noreply.github.com": "elmatadorgh", + "alexazzjjtt@163.com": "alexzhu0", + "1180176+Swift42@users.noreply.github.com": "Swift42", "ruzzgarcn@gmail.com": "Ruzzgar", + "yukipukikedy@gmail.com": "Yukipukii1", "alireza78.crypto@gmail.com": "alireza78a", "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson", + "withapurpose37@gmail.com": "StefanIsMe", "4317663+helix4u@users.noreply.github.com": "helix4u", + "ifkellx@users.noreply.github.com": "Ifkellx", "331214+counterposition@users.noreply.github.com": "counterposition", "blspear@gmail.com": "BrennerSpear", "akhater@gmail.com": "akhater", "239876380+handsdiff@users.noreply.github.com": "handsdiff", + "hesapacicam112@gmail.com": "etherman-os", + "mark.ramsell@rivermounts.com": "mark-ramsell", + "taeng02@icloud.com": "taeng0204", "gpickett00@gmail.com": "gpickett00", "mcosma@gmail.com": "wakamex", "clawdia.nash@proton.me": "clawdia-nash", @@ -112,6 +154,7 @@ AUTHOR_MAP = { "noonou7@gmail.com": "HenkDz", "dean.kerr@gmail.com": "deankerr", "socrates1024@gmail.com": "socrates1024", + "seanalt555@gmail.com": "Salt-555", "satelerd@gmail.com": "satelerd", "numman.ali@gmail.com": "nummanali", "0xNyk@users.noreply.github.com": "0xNyk", @@ -123,12 +166,14 @@ AUTHOR_MAP = { "aryan@synvoid.com": "aryansingh", "johnsonblake1@gmail.com": "blakejohnson", "hcn518@gmail.com": "pedh", + "haileymarshall005@gmail.com": "haileymarshall", "greer.guthrie@gmail.com": "g-guthrie", "kennyx102@gmail.com": "bobashopcashier", "shokatalishaikh95@gmail.com": "areu01or00", "bryan@intertwinesys.com": "bryanyoung", "christo.mitov@gmail.com": "christomitov", "hermes@nousresearch.com": "NousResearch", + "hermes@noushq.ai": "benbarclay", "chinmingcock@gmail.com": "ChimingLiu", "openclaw@sparklab.ai": "openclaw", "semihcvlk53@gmail.com": "Himess", @@ -143,16 +188,21 @@ AUTHOR_MAP = { "jack.47@gmail.com": "JackTheGit", "dalvidjr2022@gmail.com": "Jr-kenny", "m@statecraft.systems": "mbierling", - "balyan.sid@gmail.com": "balyansid", + "balyan.sid@gmail.com": "alt-glitch", "oluwadareab12@gmail.com": "bennytimz", "simon@simonmarcus.org": "simon-marcus", "xowiekk@gmail.com": "Xowiek", "1243352777@qq.com": "zons-zhaozhy", + "e.silacandmr@gmail.com": "Es1la", # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply # crossref, and GH contributor list matching (April 2026 audit) ── "1115117931@qq.com": "aaronagent", "1506751656@qq.com": "hqhq1025", "364939526@qq.com": "luyao618", + "hgk324@gmail.com": "houziershi", + "176644217+PStarH@users.noreply.github.com": "PStarH", + "51058514+Sanjays2402@users.noreply.github.com": "Sanjays2402", + "906014227@qq.com": "bingo906", "aaronwong1999@icloud.com": "AaronWong1999", "agents@kylefrench.dev": "DeployFaith", "angelos@oikos.lan.home.malaiwah.com": "angelos", @@ -172,9 +222,12 @@ AUTHOR_MAP = { "don.rhm@gmail.com": "donrhmexe", "dorukardahan@hotmail.com": "dorukardahan", "dsocolobsky@gmail.com": "dsocolobsky", + "dylan.socolobsky@lambdaclass.com": "dsocolobsky", + "ignacio.avecilla@lambdaclass.com": "IAvecilla", "duerzy@gmail.com": "duerzy", "emozilla@nousresearch.com": "emozilla", "fancydirty@gmail.com": "fancydirty", + "farion1231@gmail.com": "farion1231", "floptopbot33@gmail.com": "flobo3", "fontana.pedro93@gmail.com": "pefontana", "francis.x.fitzpatrick@gmail.com": "fxfitz", @@ -193,6 +246,7 @@ AUTHOR_MAP = { "kagura.chen28@gmail.com": "kagura-agent", "1342088860@qq.com": "youngDoo", "kamil@gwozdz.me": "kamil-gwozdz", + "skmishra1991@gmail.com": "bugkill3r", "karamusti912@gmail.com": "MustafaKara7", "kira@ariaki.me": "kira-ariaki", "knopki@duck.com": "knopki", @@ -203,6 +257,7 @@ AUTHOR_MAP = { "82095453+iacker@users.noreply.github.com": "iacker", "sontianye@users.noreply.github.com": "sontianye", "jackjin1997@users.noreply.github.com": "jackjin1997", + "1037461232@qq.com": "jackjin1997", "danieldoderlein@users.noreply.github.com": "danieldoderlein", "lrawnsley@users.noreply.github.com": "lrawnsley", "taeuk178@users.noreply.github.com": "taeuk178", @@ -211,10 +266,12 @@ AUTHOR_MAP = { "ygd58@users.noreply.github.com": "ygd58", "vominh1919@users.noreply.github.com": "vominh1919", "iamagenius00@users.noreply.github.com": "iamagenius00", + "9219265+cresslank@users.noreply.github.com": "cresslank", "trevmanthony@gmail.com": "trevthefoolish", "ziliangpeng@users.noreply.github.com": "ziliangpeng", "centripetal-star@users.noreply.github.com": "centripetal-star", "LeonSGP43@users.noreply.github.com": "LeonSGP43", + "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", "Lubrsy706@users.noreply.github.com": "Lubrsy706", "niyant@spicefi.xyz": "spniyant", "olafthiele@gmail.com": "olafthiele", @@ -254,6 +311,7 @@ AUTHOR_MAP = { "ywt000818@gmail.com": "OwenYWT", "dhandhalyabhavik@gmail.com": "v1k22", "rucchizhao@zhaochenfeideMacBook-Pro.local": "RucchiZ", + "tannerfokkens@Mac.attlocal.net": "tannerfokkens-maker", "lehaolin98@outlook.com": "LehaoLin", "yuewang1@microsoft.com": "imink", "1736355688@qq.com": "hedgeho9X", @@ -264,12 +322,25 @@ AUTHOR_MAP = { "anthhub@163.com": "anthhub", "shenuu@gmail.com": "shenuu", "xiayh17@gmail.com": "xiayh0107", + "zhujianxyz@gmail.com": "opriz", "asurla@nvidia.com": "anniesurla", "limkuan24@gmail.com": "WideLee", "aviralarora002@gmail.com": "AviArora02-commits", + "draixagent@gmail.com": "draix", "junminliu@gmail.com": "JimLiu", "jarvischer@gmail.com": "maxchernin", "levantam.98.2324@gmail.com": "LVT382009", + "zhurongcheng@rcrai.com": "heykb", + "withapurpose37@gmail.com": "StefanIsMe", + "261797239+lumenradley@users.noreply.github.com": "lumenradley", + "166376523+sjz-ks@users.noreply.github.com": "sjz-ks", + "haileymarshall005@gmail.com": "haileymarshall", + "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80", + "zheng.jerilyn@gmail.com": "jerilynzheng", + "asslaenn5@gmail.com": "Aslaaen", + "shalompmc0505@naver.com": "pinion05", + "105142614+VTRiot@users.noreply.github.com": "VTRiot", + "vivien000812@gmail.com": "iamagenius00", } diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 70cf8e95d9..d1aeb73722 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -229,6 +229,14 @@ async function startSocket() { // Check allowlist for messages from others (resolve LID ↔ phone aliases) if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) { + try { + console.log(JSON.stringify({ + event: 'ignored', + reason: 'allowlist_mismatch', + chatId, + senderId, + })); + } catch {} continue; } @@ -364,6 +372,37 @@ async function startSocket() { const app = express(); app.use(express.json()); +// Host-header validation — defends against DNS rebinding. +// The bridge binds loopback-only (127.0.0.1) but a victim browser on +// the same machine could be tricked into fetching from an attacker +// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host +// header doesn't resolve to a loopback alias. +// See GHSA-ppp5-vxwm-4cf7. +const _ACCEPTED_HOST_VALUES = new Set([ + 'localhost', + '127.0.0.1', + '[::1]', + '::1', +]); + +app.use((req, res, next) => { + const raw = (req.headers.host || '').trim(); + if (!raw) { + return res.status(400).json({ error: 'Missing Host header' }); + } + // Strip port suffix: "localhost:3000" → "localhost" + const hostOnly = (raw.includes(':') + ? raw.substring(0, raw.lastIndexOf(':')) + : raw + ).replace(/^\[|\]$/g, '').toLowerCase(); + if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) { + return res.status(400).json({ + error: 'Invalid Host header. Bridge accepts loopback hosts only.', + }); + } + next(); +}); + // Poll for new messages (long-poll style) app.get('/messages', (req, res) => { const msgs = messageQueue.splice(0, messageQueue.length); diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index 362841f395..d19471c80d 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -338,7 +338,6 @@ Edit with `hermes config edit` or `hermes config set section.key value`. | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` | | `security` | `tirith_enabled`, `website_blocklist` | | `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` | -| `smart_model_routing` | `enabled`, `cheap_model` | | `checkpoints` | `enabled`, `max_snapshots` (50) | Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/configuration diff --git a/skills/creative/baoyu-comic/PORT_NOTES.md b/skills/creative/baoyu-comic/PORT_NOTES.md new file mode 100644 index 0000000000..637b7befb5 --- /dev/null +++ b/skills/creative/baoyu-comic/PORT_NOTES.md @@ -0,0 +1,77 @@ +# Port Notes — baoyu-comic + +Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1. + +## Changes from upstream + +### SKILL.md adaptations + +| Change | Upstream | Hermes | +|--------|----------|--------| +| Metadata namespace | `openclaw` | `hermes` (with `tags` + `homepage`) | +| Trigger | Slash commands / CLI flags | Natural language skill matching | +| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra | +| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) | +| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory | +| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only | +| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only | +| File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) | + +### Structural removals + +- **`references/config/` directory** (removed entirely): + - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md + - `preferences-schema.md` — EXTEND.md YAML schema + - `watermark-guide.md` — watermark config (tied to EXTEND.md) +- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs. +- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8. +- **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2. +- **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly. + +### Image generation strategy changes + +`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured: + +- **Character sheet PNG** is still generated for multi-page comics, but it is repositioned as a **human-facing review artifact** (for visual verification) and a reference for later regenerations / manual prompt edits. Page prompts themselves are built from the **text descriptions** in `characters/characters.md` (embedded inline during Step 5). `image_generate` never sees the PNG as a visual input. +- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`. +- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency. +- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "" -o .png`) and verified before the workflow advances. + +### SKILL.md reductions + +- CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions. +- Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references. +- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed. +- `auto-selection.md`: priority order dropped the EXTEND.md tier. +- `analysis-framework.md`: language-priority comment updated (user option → conversation → source). + +### File naming convention + +Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention. + +### What was preserved verbatim + +- All 6 art-style definitions (`references/art-styles/`) +- All 7 tone definitions (`references/tones/`) +- All 7 layout definitions (`references/layouts/`) +- Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md` +- Preset bodies (only the first few intro lines adapted; special rules unchanged) +- Author, version, homepage attribution + +## Syncing with upstream + +To pull upstream updates: + +```bash +# Compare versions +curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/SKILL.md | head -5 +# Look for the version: line + +# Diff a reference file +diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/references/art-styles/manga.md) \ + references/art-styles/manga.md +``` + +Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations. + +If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`. diff --git a/skills/creative/baoyu-comic/SKILL.md b/skills/creative/baoyu-comic/SKILL.md new file mode 100644 index 0000000000..d3c89ed4c7 --- /dev/null +++ b/skills/creative/baoyu-comic/SKILL.md @@ -0,0 +1,246 @@ +--- +name: baoyu-comic +description: Knowledge comic creator supporting multiple art styles and tones. Creates original educational comics with detailed panel layouts and sequential image generation. Use when user asks to create "知识漫画", "教育漫画", "biography comic", "tutorial comic", or "Logicomix-style comic". +version: 1.56.1 +author: 宝玉 (JimLiu) +license: MIT +metadata: + hermes: + tags: [comic, knowledge-comic, creative, image-generation] + homepage: https://github.com/JimLiu/baoyu-skills#baoyu-comic +--- + +# Knowledge Comic Creator + +Adapted from [baoyu-comic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem. + +Create original knowledge comics with flexible art style × tone combinations. + +## When to Use + +Trigger this skill when the user asks to create a knowledge/educational comic, biography comic, tutorial comic, or uses terms like "知识漫画", "教育漫画", or "Logicomix-style". The user provides content (text, file path, URL, or topic) and optionally specifies art style, tone, layout, aspect ratio, or language. + +## Reference Images + +Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt: + +**Intake**: Accept file paths when the user provides them (or pastes images in conversation). +- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance +- Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback +- No reference → skip this section + +**Usage modes** (per reference): + +| Usage | Effect | +|-------|--------| +| `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body | +| `palette` | Extract hex colors and append to every page's prompt body | +| `scene` | Extract scene composition or subject notes and append to the relevant page(s) | + +**Record in each page's prompt frontmatter** when refs exist: + +```yaml +references: + - ref_id: 01 + filename: 01-ref-scene.png + usage: style + traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds" +``` + +Character consistency is driven by **text descriptions** in `characters/characters.md` (written in Step 3) that get embedded inline in every page prompt (Step 5). The optional PNG character sheet generated in Step 7.1 is a human-facing review artifact, not an input to `image_generate`. + +## Options + +### Visual Dimensions + +| Option | Values | Description | +|--------|--------|-------------| +| Art | ligne-claire (default), manga, realistic, ink-brush, chalk, minimalist | Art style / rendering technique | +| Tone | neutral (default), warm, dramatic, romantic, energetic, vintage, action | Mood / atmosphere | +| Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement | +| Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio | +| Language | auto (default), zh, en, ja, etc. | Output language | +| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. | + +### Partial Workflow Options + +| Option | Description | +|--------|-------------| +| Storyboard only | Generate storyboard only, skip prompts and images | +| Prompts only | Generate storyboard + prompts, skip images | +| Images only | Generate images from existing prompts directory | +| Regenerate N | Regenerate specific page(s) only (e.g., `3` or `2,5,8`) | + +Details: [references/partial-workflows.md](references/partial-workflows.md) + +### Art, Tone & Preset Catalogue + +- **Art styles** (6): `ligne-claire`, `manga`, `realistic`, `ink-brush`, `chalk`, `minimalist`. Full definitions at `references/art-styles/