Merge branch 'main' into main

This commit is contained in:
GatewayJ 2026-04-18 19:43:37 +08:00 committed by GitHub
commit 03e2ca1efe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
759 changed files with 114681 additions and 11073 deletions

View file

@ -24,6 +24,15 @@
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
# =============================================================================
# LLM PROVIDER (Ollama Cloud)
# =============================================================================
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
# Get your key at: https://ollama.com/settings
# OLLAMA_API_KEY=your_ollama_key_here
# Optional base URL override (default: https://ollama.com/v1)
# OLLAMA_BASE_URL=https://ollama.com/v1
# =============================================================================
# LLM PROVIDER (z.ai / GLM)
# =============================================================================
@ -145,6 +154,10 @@
# Only override here if you need to force a backend without touching config.yaml:
# TERMINAL_ENV=local
# Override the container runtime binary (e.g. to use Podman instead of Docker).
# Useful on systems where Docker's storage driver is broken or unavailable.
# HERMES_DOCKER_BINARY=/usr/local/bin/podman
# Container images (for singularity/docker/modal backends)
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20

4
.envrc
View file

@ -1 +1,5 @@
watch_file pyproject.toml uv.lock
watch_file ui-tui/package-lock.json ui-tui/package.json
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
use flake

View file

@ -1,11 +1,12 @@
name: Deploy Site
on:
release:
types: [published]
push:
branches: [main]
paths:
- 'website/**'
- 'landingpage/**'
- 'skills/**'
- 'optional-skills/**'
- '.github/workflows/deploy-site.yml'
@ -20,8 +21,14 @@ concurrency:
cancel-in-progress: false
jobs:
build-and-deploy:
# Only run on the upstream repository, not on forks
deploy-vercel:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- name: Trigger Vercel Deploy
run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
deploy-docs:
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
environment:
@ -65,12 +72,7 @@ jobs:
- name: Stage deployment
run: |
mkdir -p _site/docs
# Landing page at root
cp -r landingpage/* _site/
# Docusaurus at /docs/
cp -r website/build/* _site/docs/
# CNAME so GitHub Pages keeps the custom domain between deploys
echo "hermes-agent.nousresearch.com" > _site/CNAME
- name: Upload artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3

1
.gitignore vendored
View file

@ -60,5 +60,6 @@ mini-swe-agent/
# Nix
.direnv/
.nix-stamps/
result
website/static/api/skills-index.json

View file

@ -105,3 +105,4 @@ tesseracttars-creator <tesseracttars@gmail.com> <tesseracttars@gmail.com>
xinbenlv <zzn+pa@zzn.im> <zzn+pa@zzn.im>
SaulJWu <saul.jj.wu@gmail.com> <saul.jj.wu@gmail.com>
angelos <angelos@oikos.lan.home.malaiwah.com> <angelos@oikos.lan.home.malaiwah.com>
MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> <MestreY0d4-Uninter@users.noreply.github.com>

118
AGENTS.md
View file

@ -13,7 +13,7 @@ source venv/bin/activate # ALWAYS activate before running Python
```
hermes-agent/
├── run_agent.py # AIAgent class — core conversation loop
├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call()
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
├── cli.py # HermesCLI class — interactive CLI orchestrator
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
@ -56,6 +56,19 @@ hermes-agent/
│ ├── run.py # Main loop, slash commands, message dispatch
│ ├── session.py # SessionStore — conversation persistence
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
│ ├── src/entry.tsx # TTY gate + render()
│ ├── src/app.tsx # Main state machine and UI
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
├── tui_gateway/ # Python JSON-RPC backend for the TUI
│ ├── entry.py # stdio entrypoint
│ ├── server.py # RPC handlers and session logic
│ ├── render.py # Optional rich/ANSI bridge
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
├── cron/ # Scheduler (jobs.py, scheduler.py)
├── environments/ # RL training environments (Atropos)
@ -179,9 +192,62 @@ if canonical == "mycommand":
---
## TUI Architecture (ui-tui + tui_gateway)
The TUI is a full replacement for the classic (prompt_toolkit) CLI, activated via `hermes --tui` or `HERMES_TUI=1`.
### Process Model
```
hermes --tui
└─ Node (Ink) ──stdio JSON-RPC── Python (tui_gateway)
│ └─ AIAgent + tools + sessions
└─ renders transcript, composer, prompts, activity
```
TypeScript owns the screen. Python owns sessions, tools, model calls, and slash command logic.
### Transport
Newline-delimited JSON-RPC over stdio. Requests from Ink, events from Python. See `tui_gateway/server.py` for the full method/event catalog.
### Key Surfaces
| Surface | Ink component | Gateway method |
|---------|---------------|----------------|
| Chat streaming | `app.tsx` + `messageLine.tsx` | `prompt.submit``message.delta/complete` |
| Tool activity | `thinking.tsx` | `tool.start/progress/complete` |
| Approvals | `prompts.tsx` | `approval.respond``approval.request` |
| Clarify/sudo/secret | `prompts.tsx`, `maskedPrompt.tsx` | `clarify/sudo/secret.respond` |
| Session picker | `sessionPicker.tsx` | `session.list/resume` |
| Slash commands | Local handler + fallthrough | `slash.exec``_SlashWorker`, `command.dispatch` |
| Completions | `useCompletion` hook | `complete.slash`, `complete.path` |
| Theming | `theme.ts` + `branding.tsx` | `gateway.ready` with skin data |
### Slash Command Flow
1. Built-in client commands (`/help`, `/quit`, `/clear`, `/resume`, `/copy`, `/paste`, etc.) handled locally in `app.tsx`
2. Everything else → `slash.exec` (runs in persistent `_SlashWorker` subprocess) → `command.dispatch` fallback
### Dev Commands
```bash
cd ui-tui
npm install # first time
npm run dev # watch mode (rebuilds hermes-ink + tsx --watch)
npm start # production
npm run build # full build (hermes-ink + tsc)
npm run type-check # typecheck only (tsc --noEmit)
npm run lint # eslint
npm run fmt # prettier
npm test # vitest
```
---
## Adding New Tools
Requires changes in **3 files**:
Requires changes in **2 files**:
**1. Create `tools/your_tool.py`:**
```python
@ -204,9 +270,9 @@ registry.register(
)
```
**2. Add import** in `model_tools.py` `_discover_tools()` list.
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
@ -458,13 +524,45 @@ def profile_env(tmp_path, monkeypatch):
## Testing
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
developer machine with API keys set diverges from CI in ways that have caused
multiple "works locally, fails in CI" incidents (and the reverse).
```bash
source venv/bin/activate
python -m pytest tests/ -q # Full suite (~3000 tests, ~3 min)
python -m pytest tests/test_model_tools.py -q # Toolset resolution
python -m pytest tests/test_cli_init.py -q # CLI config loading
python -m pytest tests/gateway/ -q # Gateway tests
python -m pytest tests/tools/ -q # Tool-level tests
scripts/run_tests.sh # full suite, CI-parity
scripts/run_tests.sh tests/gateway/ # one directory
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
```
### Why the wrapper (and why the old "just call pytest" doesn't work)
Five real sources of local-vs-CI drift the script closes:
| | Without wrapper | With wrapper |
|---|---|---|
| Provider API keys | Whatever is in your env (auto-detects pool) | All `*_API_KEY`/`*_TOKEN`/etc. unset |
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
| Timezone | Local TZ (PDT etc.) | UTC |
| Locale | Whatever is set | C.UTF-8 |
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
is belt-and-suspenders.
### Running without the wrapper (only if you must)
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
pytest directly), at minimum activate the venv and pass `-n 4`:
```bash
source venv/bin/activate
python -m pytest tests/ -q -n 4
```
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.

View file

@ -13,7 +13,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
<table>
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@ -141,11 +141,18 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration
We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.
Quick start for contributors:
Quick start for contributors — clone and go with `setup-hermes.sh`:
```bash
git clone https://github.com/NousResearch/hermes-agent.git
cd hermes-agent
./setup-hermes.sh # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
./hermes # auto-detects the venv, no need to `source` first
```
Manual path (equivalent to the above):
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv venv --python 3.11
source venv/bin/activate

27
RELEASE_v0.10.0.md Normal file
View file

@ -0,0 +1,27 @@
# Hermes Agent v0.10.0 (v2026.4.16)
**Release Date:** April 16, 2026
> The Tool Gateway release — paid Nous Portal subscribers can now use web search, image generation, text-to-speech, and browser automation through their existing subscription with zero additional API keys.
---
## ✨ Highlights
- **Nous Tool Gateway** — Paid [Nous Portal](https://portal.nousresearch.com) subscribers now get automatic access to **web search** (Firecrawl), **image generation** (FAL / FLUX 2 Pro), **text-to-speech** (OpenAI TTS), and **browser automation** (Browser Use) through their existing subscription. No separate API keys needed — just run `hermes model`, select Nous Portal, and pick which tools to enable. Per-tool opt-in via `use_gateway` config, full integration with `hermes tools` and `hermes status`, and the runtime correctly prefers the gateway even when direct API keys exist. Replaces the old hidden `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env var with clean subscription-based detection. ([#11206](https://github.com/NousResearch/hermes-agent/pull/11206), based on work by @jquesnelle; docs: [#11208](https://github.com/NousResearch/hermes-agent/pull/11208))
---
## 🐛 Bug Fixes & Improvements
This release includes 180+ commits with numerous bug fixes, platform improvements, and reliability enhancements across the agent core, gateway, CLI, and tool system. Full details will be published in the v0.11.0 changelog.
---
## 👥 Contributors
- **@jquesnelle** (emozilla) — Original Tool Gateway implementation ([#10799](https://github.com/NousResearch/hermes-agent/pull/10799)), salvaged and shipped in this release
---
**Full Changelog**: [v2026.4.13...v2026.4.16](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.16)

84
SECURITY.md Normal file
View file

@ -0,0 +1,84 @@
# Hermes Agent Security Policy
This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project.
## 1. Vulnerability Reporting
Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities.
### Required Submission Details
- **Title & Severity:** Concise description and CVSS score/rating.
- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`).
- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version.
- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release.
- **Impact:** Explanation of what trust boundary was crossed.
---
## 2. Trust Model
The core assumption is that Hermes is a **personal agent** with one trusted operator.
### Operator & Session Trust
- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level.
- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries.
- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing.
### Dangerous Command Approval
The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`:
- `"on"` (default) — prompts the user to approve dangerous commands.
- `"auto"` — auto-approves after a configurable delay.
- `"off"` — disables the gate entirely (break-glass; see Section 3).
### Output Redaction
`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations.
### Skills vs. MCP Servers
- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands.
- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning.
### Code Execution Sandbox
The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls.
### Subagents
- **No recursive delegation:** The `delegate_task` tool is disabled for child agents.
- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected.
- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation.
---
## 3. Out of Scope (Non-Vulnerabilities)
The following scenarios are **not** considered security breaches:
- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox.
- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection.
- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files).
- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability.
- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production.
- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system).
---
## 4. Deployment Hardening & Best Practices
### Filesystem & Network
- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads.
- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs.
- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment.
### Skills & Supply Chain
- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal.
- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned.
- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns.
### Credential Storage
- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control.
- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases.
---
## 5. Disclosure Process
- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first.
- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com.
- **Credits:** Reporters are credited in release notes unless anonymity is requested.

View file

@ -49,6 +49,7 @@ def make_tool_progress_cb(
session_id: str,
loop: asyncio.AbstractEventLoop,
tool_call_ids: Dict[str, Deque[str]],
tool_call_meta: Dict[str, Dict[str, Any]],
) -> Callable:
"""Create a ``tool_progress_callback`` for AIAgent.
@ -84,6 +85,16 @@ def make_tool_progress_cb(
tool_call_ids[name] = queue
queue.append(tc_id)
snapshot = None
if name in {"write_file", "patch", "skill_manage"}:
try:
from agent.display import capture_local_edit_snapshot
snapshot = capture_local_edit_snapshot(name, args)
except Exception:
logger.debug("Failed to capture ACP edit snapshot for %s", name, exc_info=True)
tool_call_meta[tc_id] = {"args": args, "snapshot": snapshot}
update = build_tool_start(tc_id, name, args)
_send_update(conn, session_id, loop, update)
@ -119,6 +130,7 @@ def make_step_cb(
session_id: str,
loop: asyncio.AbstractEventLoop,
tool_call_ids: Dict[str, Deque[str]],
tool_call_meta: Dict[str, Dict[str, Any]],
) -> Callable:
"""Create a ``step_callback`` for AIAgent.
@ -132,10 +144,12 @@ def make_step_cb(
for tool_info in prev_tools:
tool_name = None
result = None
function_args = None
if isinstance(tool_info, dict):
tool_name = tool_info.get("name") or tool_info.get("function_name")
result = tool_info.get("result") or tool_info.get("output")
function_args = tool_info.get("arguments") or tool_info.get("args")
elif isinstance(tool_info, str):
tool_name = tool_info
@ -145,8 +159,13 @@ def make_step_cb(
tool_call_ids[tool_name] = queue
if tool_name and queue:
tc_id = queue.popleft()
meta = tool_call_meta.pop(tc_id, {})
update = build_tool_complete(
tc_id, tool_name, result=str(result) if result is not None else None
tc_id,
tool_name,
result=str(result) if result is not None else None,
function_args=function_args or meta.get("args"),
snapshot=meta.get("snapshot"),
)
_send_update(conn, session_id, loop, update)
if not queue:

View file

@ -26,6 +26,7 @@ from acp.schema import (
McpServerHttp,
McpServerSse,
McpServerStdio,
ModelInfo,
NewSessionResponse,
PromptResponse,
ResumeSessionResponse,
@ -36,6 +37,7 @@ from acp.schema import (
SessionCapabilities,
SessionForkCapabilities,
SessionListCapabilities,
SessionModelState,
SessionResumeCapabilities,
SessionInfo,
TextContentBlock,
@ -147,6 +149,98 @@ class HermesACPAgent(acp.Agent):
self._conn = conn
logger.info("ACP client connected")
@staticmethod
def _encode_model_choice(provider: str | None, model: str | None) -> str:
"""Encode a model selection so ACP clients can keep provider context."""
raw_model = str(model or "").strip()
if not raw_model:
return ""
raw_provider = str(provider or "").strip().lower()
if not raw_provider:
return raw_model
return f"{raw_provider}:{raw_model}"
def _build_model_state(self, state: SessionState) -> SessionModelState | None:
"""Return the ACP model selector payload for editors like Zed."""
model = str(state.model or getattr(state.agent, "model", "") or "").strip()
provider = getattr(state.agent, "provider", None) or detect_provider() or "openrouter"
try:
from hermes_cli.models import curated_models_for_provider, normalize_provider, provider_label
normalized_provider = normalize_provider(provider)
provider_name = provider_label(normalized_provider)
available_models: list[ModelInfo] = []
seen_ids: set[str] = set()
for model_id, description in curated_models_for_provider(normalized_provider):
rendered_model = str(model_id or "").strip()
if not rendered_model:
continue
choice_id = self._encode_model_choice(normalized_provider, rendered_model)
if choice_id in seen_ids:
continue
desc_parts = [f"Provider: {provider_name}"]
if description:
desc_parts.append(str(description).strip())
if rendered_model == model:
desc_parts.append("current")
available_models.append(
ModelInfo(
model_id=choice_id,
name=rendered_model,
description="".join(part for part in desc_parts if part),
)
)
seen_ids.add(choice_id)
current_model_id = self._encode_model_choice(normalized_provider, model)
if current_model_id and current_model_id not in seen_ids:
available_models.insert(
0,
ModelInfo(
model_id=current_model_id,
name=model,
description=f"Provider: {provider_name} • current",
),
)
if available_models:
return SessionModelState(
available_models=available_models,
current_model_id=current_model_id or available_models[0].model_id,
)
except Exception:
logger.debug("Could not build ACP model state", exc_info=True)
if not model:
return None
fallback_choice = self._encode_model_choice(provider, model)
return SessionModelState(
available_models=[ModelInfo(model_id=fallback_choice, name=model)],
current_model_id=fallback_choice,
)
@staticmethod
def _resolve_model_selection(raw_model: str, current_provider: str) -> tuple[str, str]:
"""Resolve ``provider:model`` input into the provider and normalized model id."""
target_provider = current_provider
new_model = raw_model.strip()
try:
from hermes_cli.models import detect_provider_for_model, parse_model_input
target_provider, new_model = parse_model_input(new_model, current_provider)
if target_provider == current_provider:
detected = detect_provider_for_model(new_model, current_provider)
if detected:
target_provider, new_model = detected
except Exception:
logger.debug("Provider detection failed, using model as-is", exc_info=True)
return target_provider, new_model
async def _register_session_mcp_servers(
self,
state: SessionState,
@ -273,7 +367,10 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
self._schedule_available_commands_update(state.session_id)
return NewSessionResponse(session_id=state.session_id)
return NewSessionResponse(
session_id=state.session_id,
models=self._build_model_state(state),
)
async def load_session(
self,
@ -289,7 +386,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Loaded session %s", session_id)
self._schedule_available_commands_update(session_id)
return LoadSessionResponse()
return LoadSessionResponse(models=self._build_model_state(state))
async def resume_session(
self,
@ -305,7 +402,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Resumed session %s", state.session_id)
self._schedule_available_commands_update(state.session_id)
return ResumeSessionResponse()
return ResumeSessionResponse(models=self._build_model_state(state))
async def cancel(self, session_id: str, **kwargs: Any) -> None:
state = self.session_manager.get_session(session_id)
@ -340,11 +437,20 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
infos = self.session_manager.list_sessions()
sessions = [
SessionInfo(session_id=s["session_id"], cwd=s["cwd"])
for s in infos
]
infos = self.session_manager.list_sessions(cwd=cwd)
sessions = []
for s in infos:
updated_at = s.get("updated_at")
if updated_at is not None and not isinstance(updated_at, str):
updated_at = str(updated_at)
sessions.append(
SessionInfo(
session_id=s["session_id"],
cwd=s["cwd"],
title=s.get("title"),
updated_at=updated_at,
)
)
return ListSessionsResponse(sessions=sessions)
# ---- Prompt (core) ------------------------------------------------------
@ -389,12 +495,13 @@ class HermesACPAgent(acp.Agent):
state.cancel_event.clear()
tool_call_ids: dict[str, Deque[str]] = defaultdict(deque)
tool_call_meta: dict[str, dict[str, Any]] = {}
previous_approval_cb = None
if conn:
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids)
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
thinking_cb = make_thinking_cb(conn, session_id, loop)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
message_cb = make_message_cb(conn, session_id, loop)
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
else:
@ -449,6 +556,19 @@ class HermesACPAgent(acp.Agent):
self.session_manager.save_session(session_id)
final_response = result.get("final_response", "")
if final_response:
try:
from agent.title_generator import maybe_auto_title
maybe_auto_title(
self.session_manager._get_db(),
session_id,
user_text,
final_response,
state.history,
)
except Exception:
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
if final_response and conn:
update = acp.update_agent_message_text(final_response)
await conn.session_update(session_id, update)
@ -556,27 +676,15 @@ class HermesACPAgent(acp.Agent):
provider = getattr(state.agent, "provider", None) or "auto"
return f"Current model: {model}\nProvider: {provider}"
new_model = args.strip()
target_provider = None
current_provider = getattr(state.agent, "provider", None) or "openrouter"
# Auto-detect provider for the requested model
try:
from hermes_cli.models import parse_model_input, detect_provider_for_model
target_provider, new_model = parse_model_input(new_model, current_provider)
if target_provider == current_provider:
detected = detect_provider_for_model(new_model, current_provider)
if detected:
target_provider, new_model = detected
except Exception:
logger.debug("Provider detection failed, using model as-is", exc_info=True)
target_provider, new_model = self._resolve_model_selection(args, current_provider)
state.model = new_model
state.agent = self.session_manager._make_agent(
session_id=state.session_id,
cwd=state.cwd,
model=new_model,
requested_provider=target_provider or current_provider,
requested_provider=target_provider,
)
self.session_manager.save_session(state.session_id)
provider_label = getattr(state.agent, "provider", None) or target_provider or current_provider
@ -678,20 +786,30 @@ class HermesACPAgent(acp.Agent):
"""Switch the model for a session (called by ACP protocol)."""
state = self.session_manager.get_session(session_id)
if state:
state.model = model_id
current_provider = getattr(state.agent, "provider", None)
current_base_url = getattr(state.agent, "base_url", None)
current_api_mode = getattr(state.agent, "api_mode", None)
requested_provider, resolved_model = self._resolve_model_selection(
model_id,
current_provider or "openrouter",
)
state.model = resolved_model
provider_changed = bool(current_provider and requested_provider != current_provider)
current_base_url = None if provider_changed else getattr(state.agent, "base_url", None)
current_api_mode = None if provider_changed else getattr(state.agent, "api_mode", None)
state.agent = self.session_manager._make_agent(
session_id=session_id,
cwd=state.cwd,
model=model_id,
requested_provider=current_provider,
model=resolved_model,
requested_provider=requested_provider,
base_url=current_base_url,
api_mode=current_api_mode,
)
self.session_manager.save_session(session_id)
logger.info("Session %s: model switched to %s", session_id, model_id)
logger.info(
"Session %s: model switched to %s via provider %s",
session_id,
resolved_model,
requested_provider,
)
return SetSessionModelResponse()
logger.warning("Session %s: model switch requested for missing session", session_id)
return None

View file

@ -13,8 +13,12 @@ from hermes_constants import get_hermes_home
import copy
import json
import logging
import os
import re
import sys
import time
import uuid
from datetime import datetime, timezone
from dataclasses import dataclass, field
from threading import Lock
from typing import Any, Dict, List, Optional
@ -22,6 +26,64 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
def _normalize_cwd_for_compare(cwd: str | None) -> str:
raw = str(cwd or ".").strip()
if not raw:
raw = "."
expanded = os.path.expanduser(raw)
# Normalize Windows drive paths into the equivalent WSL mount form so
# ACP history filters match the same workspace across Windows and WSL.
match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
if match:
drive = match.group(1).lower()
tail = match.group(2).replace("\\", "/")
expanded = f"/mnt/{drive}/{tail}"
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
return os.path.normpath(expanded)
def _build_session_title(title: Any, preview: Any, cwd: str | None) -> str:
explicit = str(title or "").strip()
if explicit:
return explicit
preview_text = str(preview or "").strip()
if preview_text:
return preview_text
leaf = os.path.basename(str(cwd or "").rstrip("/\\"))
return leaf or "New thread"
def _format_updated_at(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str) and value.strip():
return value
try:
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
except Exception:
return None
def _updated_at_sort_key(value: Any) -> float:
if value is None:
return float("-inf")
if isinstance(value, (int, float)):
return float(value)
raw = str(value).strip()
if not raw:
return float("-inf")
try:
return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp()
except Exception:
try:
return float(raw)
except Exception:
return float("-inf")
def _acp_stderr_print(*args, **kwargs) -> None:
"""Best-effort human-readable output sink for ACP stdio sessions.
@ -162,47 +224,78 @@ class SessionManager:
logger.info("Forked ACP session %s -> %s", session_id, new_id)
return state
def list_sessions(self) -> List[Dict[str, Any]]:
def list_sessions(self, cwd: str | None = None) -> List[Dict[str, Any]]:
"""Return lightweight info dicts for all sessions (memory + database)."""
normalized_cwd = _normalize_cwd_for_compare(cwd) if cwd else None
db = self._get_db()
persisted_rows: dict[str, dict[str, Any]] = {}
if db is not None:
try:
for row in db.list_sessions_rich(source="acp", limit=1000):
persisted_rows[str(row["id"])] = dict(row)
except Exception:
logger.debug("Failed to load ACP sessions from DB", exc_info=True)
# Collect in-memory sessions first.
with self._lock:
seen_ids = set(self._sessions.keys())
results = [
{
"session_id": s.session_id,
"cwd": s.cwd,
"model": s.model,
"history_len": len(s.history),
}
for s in self._sessions.values()
]
results = []
for s in self._sessions.values():
history_len = len(s.history)
if history_len <= 0:
continue
if normalized_cwd and _normalize_cwd_for_compare(s.cwd) != normalized_cwd:
continue
persisted = persisted_rows.get(s.session_id, {})
preview = next(
(
str(msg.get("content") or "").strip()
for msg in s.history
if msg.get("role") == "user" and str(msg.get("content") or "").strip()
),
persisted.get("preview") or "",
)
results.append(
{
"session_id": s.session_id,
"cwd": s.cwd,
"model": s.model,
"history_len": history_len,
"title": _build_session_title(persisted.get("title"), preview, s.cwd),
"updated_at": _format_updated_at(
persisted.get("last_active") or persisted.get("started_at") or time.time()
),
}
)
# Merge any persisted sessions not currently in memory.
db = self._get_db()
if db is not None:
try:
rows = db.search_sessions(source="acp", limit=1000)
for row in rows:
sid = row["id"]
if sid in seen_ids:
continue
# Extract cwd from model_config JSON.
cwd = "."
mc = row.get("model_config")
if mc:
try:
cwd = json.loads(mc).get("cwd", ".")
except (json.JSONDecodeError, TypeError):
pass
results.append({
"session_id": sid,
"cwd": cwd,
"model": row.get("model") or "",
"history_len": row.get("message_count") or 0,
})
except Exception:
logger.debug("Failed to list ACP sessions from DB", exc_info=True)
for sid, row in persisted_rows.items():
if sid in seen_ids:
continue
message_count = int(row.get("message_count") or 0)
if message_count <= 0:
continue
# Extract cwd from model_config JSON.
session_cwd = "."
mc = row.get("model_config")
if mc:
try:
session_cwd = json.loads(mc).get("cwd", ".")
except (json.JSONDecodeError, TypeError):
pass
if normalized_cwd and _normalize_cwd_for_compare(session_cwd) != normalized_cwd:
continue
results.append({
"session_id": sid,
"cwd": session_cwd,
"model": row.get("model") or "",
"history_len": message_count,
"title": _build_session_title(row.get("title"), row.get("preview"), session_cwd),
"updated_at": _format_updated_at(row.get("last_active") or row.get("started_at")),
})
results.sort(key=lambda item: _updated_at_sort_key(item.get("updated_at")), reverse=True)
return results
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:

View file

@ -2,6 +2,7 @@
from __future__ import annotations
import json
import uuid
from typing import Any, Dict, List, Optional
@ -96,6 +97,170 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
return tool_name
def _build_patch_mode_content(patch_text: str) -> List[Any]:
"""Parse V4A patch mode input into ACP diff blocks when possible."""
if not patch_text:
return [acp.tool_content(acp.text_block(""))]
try:
from tools.patch_parser import OperationType, parse_v4a_patch
operations, error = parse_v4a_patch(patch_text)
if error or not operations:
return [acp.tool_content(acp.text_block(patch_text))]
content: List[Any] = []
for op in operations:
if op.operation == OperationType.UPDATE:
old_chunks: list[str] = []
new_chunks: list[str] = []
for hunk in op.hunks:
old_lines = [line.content for line in hunk.lines if line.prefix in (" ", "-")]
new_lines = [line.content for line in hunk.lines if line.prefix in (" ", "+")]
if old_lines or new_lines:
old_chunks.append("\n".join(old_lines))
new_chunks.append("\n".join(new_lines))
old_text = "\n...\n".join(chunk for chunk in old_chunks if chunk)
new_text = "\n...\n".join(chunk for chunk in new_chunks if chunk)
if old_text or new_text:
content.append(
acp.tool_diff_content(
path=op.file_path,
old_text=old_text or None,
new_text=new_text or "",
)
)
continue
if op.operation == OperationType.ADD:
added_lines = [line.content for hunk in op.hunks for line in hunk.lines if line.prefix == "+"]
content.append(
acp.tool_diff_content(
path=op.file_path,
new_text="\n".join(added_lines),
)
)
continue
if op.operation == OperationType.DELETE:
content.append(
acp.tool_diff_content(
path=op.file_path,
old_text=f"Delete file: {op.file_path}",
new_text="",
)
)
continue
if op.operation == OperationType.MOVE:
content.append(
acp.tool_content(acp.text_block(f"Move file: {op.file_path} -> {op.new_path}"))
)
return content or [acp.tool_content(acp.text_block(patch_text))]
except Exception:
return [acp.tool_content(acp.text_block(patch_text))]
def _strip_diff_prefix(path: str) -> str:
raw = str(path or "").strip()
if raw.startswith(("a/", "b/")):
return raw[2:]
return raw
def _parse_unified_diff_content(diff_text: str) -> List[Any]:
"""Convert unified diff text into ACP diff content blocks."""
if not diff_text:
return []
content: List[Any] = []
current_old_path: Optional[str] = None
current_new_path: Optional[str] = None
old_lines: list[str] = []
new_lines: list[str] = []
def _flush() -> None:
nonlocal current_old_path, current_new_path, old_lines, new_lines
if current_old_path is None and current_new_path is None:
return
path = current_new_path if current_new_path and current_new_path != "/dev/null" else current_old_path
if not path or path == "/dev/null":
current_old_path = None
current_new_path = None
old_lines = []
new_lines = []
return
content.append(
acp.tool_diff_content(
path=_strip_diff_prefix(path),
old_text="\n".join(old_lines) if old_lines else None,
new_text="\n".join(new_lines),
)
)
current_old_path = None
current_new_path = None
old_lines = []
new_lines = []
for line in diff_text.splitlines():
if line.startswith("--- "):
_flush()
current_old_path = line[4:].strip()
continue
if line.startswith("+++ "):
current_new_path = line[4:].strip()
continue
if line.startswith("@@"):
continue
if current_old_path is None and current_new_path is None:
continue
if line.startswith("+"):
new_lines.append(line[1:])
elif line.startswith("-"):
old_lines.append(line[1:])
elif line.startswith(" "):
shared = line[1:]
old_lines.append(shared)
new_lines.append(shared)
_flush()
return content
def _build_tool_complete_content(
tool_name: str,
result: Optional[str],
*,
function_args: Optional[Dict[str, Any]] = None,
snapshot: Any = None,
) -> List[Any]:
"""Build structured ACP completion content, falling back to plain text."""
display_result = result or ""
if len(display_result) > 5000:
display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)"
if tool_name in {"write_file", "patch", "skill_manage"}:
try:
from agent.display import extract_edit_diff
diff_text = extract_edit_diff(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
if isinstance(diff_text, str) and diff_text.strip():
diff_content = _parse_unified_diff_content(diff_text)
if diff_content:
return diff_content
except Exception:
pass
return [acp.tool_content(acp.text_block(display_result))]
# ---------------------------------------------------------------------------
# Build ACP content objects for tool-call events
# ---------------------------------------------------------------------------
@ -119,9 +284,8 @@ def build_tool_start(
new = arguments.get("new_string", "")
content = [acp.tool_diff_content(path=path, new_text=new, old_text=old)]
else:
# Patch mode — show the patch content as text
patch_text = arguments.get("patch", "")
content = [acp.tool_content(acp.text_block(patch_text))]
content = _build_patch_mode_content(patch_text)
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
@ -178,16 +342,17 @@ def build_tool_complete(
tool_call_id: str,
tool_name: str,
result: Optional[str] = None,
function_args: Optional[Dict[str, Any]] = None,
snapshot: Any = None,
) -> ToolCallProgress:
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
kind = get_tool_kind(tool_name)
# Truncate very large results for the UI
display_result = result or ""
if len(display_result) > 5000:
display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)"
content = [acp.tool_content(acp.text_block(display_result))]
content = _build_tool_complete_content(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
return acp.update_tool_call(
tool_call_id,
kind=kind,

View file

@ -28,19 +28,45 @@ except ImportError:
logger = logging.getLogger(__name__)
THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
# is the strongest level they accept). "minimal" is a legacy alias that
# maps to low on every model. See:
# https://platform.claude.com/docs/en/about-claude/models/migration-guide
ADAPTIVE_EFFORT_MAP = {
"xhigh": "max",
"high": "high",
"medium": "medium",
"low": "low",
"max": "max",
"xhigh": "xhigh",
"high": "high",
"medium": "medium",
"low": "low",
"minimal": "low",
}
# Models that accept the "xhigh" output_config.effort level. Opus 4.7 added
# xhigh as a distinct level between high and max; older adaptive-thinking
# models (4.6) reject it with a 400. Keep this substring list in sync with
# the Anthropic migration guide as new model families ship.
_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
# is the only supported mode; 4.7 additionally forbids manual thinking entirely
# and drops temperature/top_p/top_k).
_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
# Models where temperature/top_p/top_k return 400 if set to non-default values.
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
# ── Max output token limits per Anthropic model ───────────────────────
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
# max_tokens as a mandatory field. Previously we hardcoded 16384, which
# starves thinking-enabled models (thinking tokens count toward the limit).
_ANTHROPIC_OUTPUT_LIMITS = {
# Claude 4.7
"claude-opus-4-7": 128_000,
# Claude 4.6
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
@ -91,11 +117,37 @@ def _get_anthropic_max_output(model: str) -> int:
def _supports_adaptive_thinking(model: str) -> bool:
"""Return True for Claude 4.6 models that support adaptive thinking."""
return any(v in model for v in ("4-6", "4.6"))
"""Return True for Claude 4.6+ models that support adaptive thinking."""
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
# Beta headers for enhanced features (sent with ALL auth types)
def _supports_xhigh_effort(model: str) -> bool:
"""Return True for models that accept the 'xhigh' adaptive effort level.
Opus 4.7 introduced xhigh as a distinct level between high and max.
Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
and reject xhigh with an HTTP 400. Callers should downgrade xhighmax
when this returns False.
"""
return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
def _forbids_sampling_params(model: str) -> bool:
"""Return True for models that 400 on any non-default temperature/top_p/top_k.
Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
expected to follow suit. Callers should omit these fields entirely rather
than passing zero/default values (the API rejects anything non-null).
"""
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
# Beta headers for enhanced features (sent with ALL auth types).
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
# beta headers are still accepted (harmless no-op) but not required. Kept
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
# that still gate on the headers continue to get the enhanced features.
# Migration guide: remove these if you no longer support ≤4.5 models.
_COMMON_BETAS = [
"interleaved-thinking-2025-05-14",
"fine-grained-tool-streaming-2025-05-14",
@ -298,6 +350,33 @@ def build_anthropic_client(api_key: str, base_url: str = None):
return _anthropic_sdk.Anthropic(**kwargs)
def build_anthropic_bedrock_client(region: str):
"""Create an AnthropicBedrock client for Bedrock Claude models.
Uses the Anthropic SDK's native Bedrock adapter, which provides full
Claude feature parity: prompt caching, thinking budgets, adaptive
thinking, fast mode features not available via the Converse API.
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
"""
if _anthropic_sdk is None:
raise ImportError(
"The 'anthropic' package is required for the Bedrock provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
raise ImportError(
"anthropic.AnthropicBedrock not available. "
"Upgrade with: pip install 'anthropic>=0.39.0'"
)
from httpx import Timeout
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
)
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
@ -1314,18 +1393,31 @@ def build_anthropic_kwargs(
kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
# Map reasoning_config to Anthropic's thinking parameter.
# Claude 4.6 models use adaptive thinking + output_config.effort.
# Claude 4.6+ models use adaptive thinking + output_config.effort.
# Older models use manual thinking with budget_tokens.
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
#
# On 4.7+ the `thinking.display` field defaults to "omitted", which
# silently hides reasoning text that Hermes surfaces in its CLI. We
# request "summarized" so the reasoning blocks stay populated — matching
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
if _supports_adaptive_thinking(model):
kwargs["thinking"] = {"type": "adaptive"}
kwargs["thinking"] = {
"type": "adaptive",
"display": "summarized",
}
adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
# Downgrade xhigh→max on models that don't list xhigh as a
# supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
adaptive_effort = "max"
kwargs["output_config"] = {
"effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium")
"effort": adaptive_effort,
}
else:
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
@ -1333,6 +1425,15 @@ def build_anthropic_kwargs(
kwargs["temperature"] = 1
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
# ── Strip sampling params on 4.7+ ─────────────────────────────────
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
# Callers (auxiliary_client, flush_memories, etc.) may set these for
# older models; drop them here as a safety net so upstream 4.6 → 4.7
# migrations don't require coordinated edits everywhere.
if _forbids_sampling_params(model):
for _sampling_key in ("temperature", "top_p", "top_k"):
kwargs.pop(_sampling_key, None)
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
# output speed. Only for native Anthropic endpoints — third-party
@ -1390,12 +1491,20 @@ def normalize_anthropic_response(
)
)
# Map Anthropic stop_reason to OpenAI finish_reason
# Map Anthropic stop_reason to OpenAI finish_reason.
# Newer stop reasons added in Claude 4.5+ / 4.7:
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
# - model_context_window_exceeded: hit context limit (not max_tokens)
# Both need distinct handling upstream — a refusal should surface to the
# user with a clear message, and a context-window overflow should trigger
# compression/truncation rather than be treated as normal end-of-turn.
stop_reason_map = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")

View file

@ -58,6 +58,9 @@ _PROVIDER_ALIASES = {
"google": "gemini",
"google-gemini": "gemini",
"google-ai-studio": "gemini",
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
"glm": "zai",
"z-ai": "zai",
"z.ai": "zai",
@ -91,6 +94,17 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return "custom"
return _PROVIDER_ALIASES.get(normalized, normalized)
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
"kimi-for-coding": 0.6,
}
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
"""Return a required temperature override for models with strict contracts."""
normalized = (model or "").strip().lower()
return _FIXED_TEMPERATURE_MODELS.get(normalized)
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
@ -104,6 +118,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"opencode-zen": "gemini-3-flash",
"opencode-go": "glm-5",
"kilocode": "google/gemini-3-flash-preview",
"ollama-cloud": "nemotron-3-nano:30b",
}
# Vision-specific model overrides for direct providers.
@ -514,8 +529,13 @@ class _AnthropicCompletionsAdapter:
tool_choice=normalized_tool_choice,
is_oauth=self._is_oauth,
)
# Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
# temperature for models that still accept it. build_anthropic_kwargs
# additionally strips these keys as a safety net — keep both layers.
if temperature is not None:
anthropic_kwargs["temperature"] = temperature
from agent.anthropic_adapter import _forbids_sampling_params
if not _forbids_sampling_params(model):
anthropic_kwargs["temperature"] = temperature
response = self._client.messages.create(**anthropic_kwargs)
assistant_message, finish_reason = normalize_anthropic_response(response)
@ -725,6 +745,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif "generativelanguage.googleapis.com" in base_url.lower():
# Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
# Passing api_key= causes the SDK to inject Authorization: Bearer,
# which Google rejects with HTTP 400 "Multiple authentication
# credentials received". Use a placeholder for api_key and pass
# the real key via x-goog-api-key header instead.
# Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
extra["default_headers"] = {"x-goog-api-key": api_key}
api_key = "not-used"
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
creds = resolve_api_key_provider_credentials(provider_id)
@ -746,6 +775,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif "generativelanguage.googleapis.com" in base_url.lower():
# Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
# Passing api_key= causes the SDK to inject Authorization: Bearer,
# which Google rejects with HTTP 400 "Multiple authentication
# credentials received". Use a placeholder for api_key and pass
# the real key via x-goog-api-key header instead.
# Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
extra["default_headers"] = {"x-goog-api-key": api_key}
api_key = "not-used"
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
return None, None
@ -775,6 +813,21 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
# Check cross-session rate limit guard before attempting Nous —
# if another session already recorded a 429, skip Nous entirely
# to avoid piling more requests onto the tapped RPH bucket.
try:
from agent.nous_rate_guard import nous_rate_limit_remaining
_remaining = nous_rate_limit_remaining()
if _remaining is not None and _remaining > 0:
logger.debug(
"Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
_remaining,
)
return None, None
except Exception:
pass
nous = _read_nous_auth()
if not nous:
return None, None
@ -899,6 +952,51 @@ def _current_custom_base_url() -> str:
return custom_base or ""
def _validate_proxy_env_urls() -> None:
"""Fail fast with a clear error when proxy env vars have malformed URLs.
Common cause: shell config (e.g. .zshrc) with a typo like
``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
which concatenates 'export' into the port number. Without this
check the OpenAI/httpx client raises a cryptic ``Invalid port``
error that doesn't name the offending env var.
"""
from urllib.parse import urlparse
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()
if not value:
continue
try:
parsed = urlparse(value)
if parsed.scheme:
_ = parsed.port # raises ValueError for e.g. '6153export'
except ValueError as exc:
raise RuntimeError(
f"Malformed proxy environment variable {key}={value!r}. "
"Fix or unset your proxy settings and try again."
) from exc
def _validate_base_url(base_url: str) -> None:
"""Reject obviously broken custom endpoint URLs before they reach httpx."""
from urllib.parse import urlparse
candidate = str(base_url or "").strip()
if not candidate or candidate.startswith("acp://"):
return
try:
parsed = urlparse(candidate)
if parsed.scheme in {"http", "https"}:
_ = parsed.port # raises ValueError for malformed ports
except ValueError as exc:
raise RuntimeError(
f"Malformed custom endpoint URL: {candidate!r}. "
"Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
) from exc
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
runtime = _resolve_custom_runtime()
if len(runtime) == 2:
@ -995,8 +1093,6 @@ _AUTO_PROVIDER_LABELS = {
"_resolve_api_key_provider": "api-key",
}
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
@ -1127,11 +1223,15 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
"""Full auto-detection chain.
Priority:
1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
use their main provider + main model directly. This ensures users on
Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
provider they already have credentials for no OpenRouter key needed.
2. OpenRouter Nous custom Codex API-key providers (original chain).
1. User's main provider + main model, regardless of provider type.
This means auxiliary tasks (compression, vision, web extraction,
session search, etc.) use the same model the user configured for
chat. Users on OpenRouter/Nous get their chosen chat model; users
on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the
user's picked model keeps behavior predictable — no surprise
switches to a cheap fallback model for side tasks.
2. OpenRouter Nous custom Codex API-key providers (fallback
chain, only used when the main provider has no working client).
"""
global auxiliary_is_nous, _stale_base_url_warned
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
@ -1161,11 +1261,16 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
)
_stale_base_url_warned = True
# ── Step 1: non-aggregator main provider → use main model directly ──
# ── Step 1: main provider + main model → use them directly ──
#
# This is the primary aux backend for every user. "auto" means
# "use my main chat model for side tasks as well" — including users
# on aggregators (OpenRouter, Nous) who previously got routed to a
# cheap provider-side default. Explicit per-task overrides set via
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
if (main_provider and main_model
and main_provider not in _AGGREGATOR_PROVIDERS
and main_provider not in ("auto", "")):
resolved_provider = main_provider
explicit_base_url = None
@ -1299,6 +1404,7 @@ def resolve_provider_client(
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
"""
_validate_proxy_env_urls()
# Normalise aliases
provider = _normalize_aux_provider(provider)
@ -1523,6 +1629,15 @@ def resolve_provider_client(
from hermes_cli.models import copilot_default_headers
headers.update(copilot_default_headers())
elif "generativelanguage.googleapis.com" in base_url.lower():
# Google's OpenAI-compatible endpoint only accepts x-goog-api-key.
# Passing api_key= causes the OpenAI SDK to inject Authorization: Bearer,
# which Google rejects with HTTP 400 "Multiple authentication credentials
# received". Use a placeholder for api_key and pass the real key via
# x-goog-api-key header instead.
# Fixes: https://github.com/NousResearch/hermes-agent/issues/7893
headers["x-goog-api-key"] = api_key
api_key = "not-used"
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
@ -1747,34 +1862,31 @@ def resolve_vision_provider_client(
if requested == "auto":
# Vision auto-detection order:
# 1. Active provider + model (user's main chat config)
# 2. OpenRouter (known vision-capable default model)
# 3. Nous Portal (known vision-capable default model)
# 1. User's main provider + main model (including aggregators).
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
# Known strict backend — use its defaults.
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
return _finalize(main_provider, sync_client, default_model)
else:
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
# Use provider-specific vision model if available, otherwise main model.
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using active provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators.
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
for candidate in _VISION_AUTO_PROVIDER_ORDER:
if candidate == main_provider:
continue # already tried above
@ -1835,9 +1947,15 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
# Client cache: (provider, async_mode, base_url, api_key, api_mode, runtime_key) -> (client, default_model, loop)
# NOTE: loop identity is NOT part of the key. On async cache hits we check
# whether the cached loop is the *current* loop; if not, the stale entry is
# replaced in-place. This bounds cache growth to one entry per unique
# provider config rather than one per (config × event-loop), which previously
# caused unbounded fd accumulation in long-running gateway processes (#10200).
_client_cache: Dict[tuple, tuple] = {}
_client_cache_lock = threading.Lock()
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
def neuter_async_httpx_del() -> None:
@ -1970,39 +2088,49 @@ def _get_cached_client(
Async clients (AsyncOpenAI) use httpx.AsyncClient internally, which
binds to the event loop that was current when the client was created.
Using such a client on a *different* loop causes deadlocks or
RuntimeError. To prevent cross-loop issues (especially in gateway
mode where _run_async() may spawn fresh loops in worker threads), the
cache key for async clients includes the current event loop's identity
so each loop gets its own client instance.
RuntimeError. To prevent cross-loop issues, the cache validates on
every async hit that the cached loop is the *current, open* loop.
If the loop changed (e.g. a new gateway worker-thread loop), the stale
entry is replaced in-place rather than creating an additional entry.
This keeps cache size bounded to one entry per unique provider config,
preventing the fd-exhaustion that previously occurred in long-running
gateways where recycled worker threads created unbounded entries (#10200).
"""
# Include loop identity for async clients to prevent cross-loop reuse.
# httpx.AsyncClient (inside AsyncOpenAI) is bound to the loop where it
# was created — reusing it on a different loop causes deadlocks (#2681).
loop_id = 0
# Resolve the current event loop for async clients so we can validate
# cached entries. Loop identity is NOT in the cache key — instead we
# check at hit time whether the cached loop is still current and open.
# This prevents unbounded cache growth from recycled worker-thread loops
# while still guaranteeing we never reuse a client on the wrong loop
# (which causes deadlocks, see #2681).
current_loop = None
if async_mode:
try:
import asyncio as _aio
current_loop = _aio.get_event_loop()
loop_id = id(current_loop)
except RuntimeError:
pass
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
with _client_cache_lock:
if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key]
if async_mode:
# A cached async client whose loop has been closed will raise
# "Event loop is closed" when httpx tries to clean up its
# transport. Discard the stale client and create a fresh one.
if cached_loop is not None and cached_loop.is_closed():
_force_close_async_httpx(cached_client)
del _client_cache[cache_key]
else:
# Validate: the cached client must be bound to the CURRENT,
# OPEN loop. If the loop changed or was closed, the httpx
# transport inside is dead — force-close and replace.
loop_ok = (
cached_loop is not None
and cached_loop is current_loop
and not cached_loop.is_closed()
)
if loop_ok:
effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
# Stale — evict and fall through to create a new client.
_force_close_async_httpx(cached_client)
del _client_cache[cache_key]
else:
effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
@ -2022,6 +2150,12 @@ def _get_cached_client(
bound_loop = current_loop
with _client_cache_lock:
if cache_key not in _client_cache:
# Safety belt: if the cache has grown beyond the max, evict
# the oldest entries (FIFO — dict preserves insertion order).
while len(_client_cache) >= _CLIENT_CACHE_MAX_SIZE:
evict_key, evict_entry = next(iter(_client_cache.items()))
_force_close_async_httpx(evict_entry[0])
del _client_cache[evict_key]
_client_cache[cache_key] = (client, default_model, bound_loop)
else:
client, default_model, _ = _client_cache[cache_key]
@ -2201,6 +2335,19 @@ def _build_call_kwargs(
"timeout": timeout,
}
fixed_temperature = _fixed_temperature_for_model(model)
if fixed_temperature is not None:
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
# the aux model is flipped to 4.7.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
if _forbids_sampling_params(model):
temperature = None
if temperature is not None:
kwargs["temperature"] = temperature
@ -2304,10 +2451,10 @@ def call_llm(
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
async_mode=False,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:
@ -2512,10 +2659,10 @@ async def async_call_llm(
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
async_mode=True,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:

1098
agent/bedrock_adapter.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -17,7 +17,10 @@ Improvements over v2:
- Richer tool call/result detail in summarizer input
"""
import hashlib
import json
import logging
import re
import time
from typing import Any, Dict, List, Optional
@ -36,7 +39,10 @@ SUMMARY_PREFIX = (
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. Respond ONLY to the latest user message "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
)
@ -57,6 +63,128 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
Used during the pre-compression pruning pass to replace large tool
outputs with a short but useful description of what the tool did,
rather than a generic placeholder that carries zero information.
Returns strings like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (1,200 chars)
[search_files] content search for 'compress' in agent/ -> 12 matches
"""
try:
args = json.loads(tool_args) if tool_args else {}
except (json.JSONDecodeError, TypeError):
args = {}
content = tool_content or ""
content_len = len(content)
line_count = content.count("\n") + 1 if content.strip() else 0
if tool_name == "terminal":
cmd = args.get("command", "")
if len(cmd) > 80:
cmd = cmd[:77] + "..."
exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
exit_code = exit_match.group(1) if exit_match else "?"
return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
if tool_name == "read_file":
path = args.get("path", "?")
offset = args.get("offset", 1)
return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
if tool_name == "write_file":
path = args.get("path", "?")
written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
return f"[write_file] wrote to {path} ({written_lines} lines)"
if tool_name == "search_files":
pattern = args.get("pattern", "?")
path = args.get("path", ".")
target = args.get("target", "content")
match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
count = match_count.group(1) if match_count else "?"
return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
if tool_name == "patch":
path = args.get("path", "?")
mode = args.get("mode", "replace")
return f"[patch] {mode} in {path} ({content_len:,} chars result)"
if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
"browser_type", "browser_scroll", "browser_vision"):
url = args.get("url", "")
ref = args.get("ref", "")
detail = f" {url}" if url else (f" ref={ref}" if ref else "")
return f"[{tool_name}]{detail} ({content_len:,} chars)"
if tool_name == "web_search":
query = args.get("query", "?")
return f"[web_search] query='{query}' ({content_len:,} chars result)"
if tool_name == "web_extract":
urls = args.get("urls", [])
url_desc = urls[0] if isinstance(urls, list) and urls else "?"
if isinstance(urls, list) and len(urls) > 1:
url_desc += f" (+{len(urls) - 1} more)"
return f"[web_extract] {url_desc} ({content_len:,} chars)"
if tool_name == "delegate_task":
goal = args.get("goal", "")
if len(goal) > 60:
goal = goal[:57] + "..."
return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
if tool_name == "execute_code":
code_preview = (args.get("code") or "")[:60].replace("\n", " ")
if len(args.get("code", "")) > 60:
code_preview += "..."
return f"[execute_code] `{code_preview}` ({line_count} lines output)"
if tool_name in ("skill_view", "skills_list", "skill_manage"):
name = args.get("name", "?")
return f"[{tool_name}] name={name} ({content_len:,} chars)"
if tool_name == "vision_analyze":
question = args.get("question", "")[:50]
return f"[vision_analyze] '{question}' ({content_len:,} chars)"
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "?")
return f"[memory] {action} on {target}"
if tool_name == "todo":
return "[todo] updated task list"
if tool_name == "clarify":
return "[clarify] asked user a question"
if tool_name == "text_to_speech":
return f"[text_to_speech] generated audio ({content_len:,} chars)"
if tool_name == "cronjob":
action = args.get("action", "?")
return f"[cronjob] {action}"
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "?")
return f"[process] {action} session={sid}"
# Generic fallback
first_arg = ""
for k, v in list(args.items())[:2]:
sv = str(v)[:40]
first_arg += f" {k}={sv}"
return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
class ContextCompressor(ContextEngine):
"""Default context engine — compresses conversation context via lossy summarization.
@ -78,6 +206,8 @@ class ContextCompressor(ContextEngine):
self._context_probed = False
self._context_probe_persistable = False
self._previous_summary = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
def update_model(
self,
@ -167,6 +297,9 @@ class ContextCompressor(ContextEngine):
# Stores the previous compaction summary for iterative updates
self._previous_summary: Optional[str] = None
# Anti-thrashing: track whether last compression was effective
self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
def update_from_response(self, usage: Dict[str, Any]):
@ -175,9 +308,26 @@ class ContextCompressor(ContextEngine):
self.last_completion_tokens = usage.get("completion_tokens", 0)
def should_compress(self, prompt_tokens: int = None) -> bool:
"""Check if context exceeds the compression threshold."""
"""Check if context exceeds the compression threshold.
Includes anti-thrashing protection: if the last two compressions
each saved less than 10%, skip compression to avoid infinite loops
where each pass removes only 1-2 messages.
"""
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
return tokens >= self.threshold_tokens
if tokens < self.threshold_tokens:
return False
# Anti-thrashing: back off if recent compressions were ineffective
if self._ineffective_compression_count >= 2:
if not self.quiet_mode:
logger.warning(
"Compression skipped — last %d compressions saved <10%% each. "
"Consider /new to start a fresh session, or /compress <topic> "
"for focused compression.",
self._ineffective_compression_count,
)
return False
return True
# ------------------------------------------------------------------
# Tool output pruning (cheap pre-pass, no LLM call)
@ -187,7 +337,16 @@ class ContextCompressor(ContextEngine):
self, messages: List[Dict[str, Any]], protect_tail_count: int,
protect_tail_tokens: int | None = None,
) -> tuple[List[Dict[str, Any]], int]:
"""Replace old tool result contents with a short placeholder.
"""Replace old tool result contents with informative 1-line summaries.
Instead of a generic placeholder, generates a summary like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (3,400 chars)
Also deduplicates identical tool results (e.g. reading the same file
5x keeps only the newest full copy) and truncates large tool_call
arguments in assistant messages outside the protected tail.
Walks backward from the end, protecting the most recent messages that
fall within ``protect_tail_tokens`` (when provided) OR the last
@ -203,6 +362,22 @@ class ContextCompressor(ContextEngine):
result = [m.copy() for m in messages]
pruned = 0
# Build index: tool_call_id -> (tool_name, arguments_json)
call_id_to_tool: Dict[str, tuple] = {}
for msg in result:
if msg.get("role") == "assistant":
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
cid = tc.get("id", "")
fn = tc.get("function", {})
call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
else:
cid = getattr(tc, "id", "") or ""
fn = getattr(tc, "function", None)
name = getattr(fn, "name", "unknown") if fn else "unknown"
args_str = getattr(fn, "arguments", "") if fn else ""
call_id_to_tool[cid] = (name, args_str)
# Determine the prune boundary
if protect_tail_tokens is not None and protect_tail_tokens > 0:
# Token-budget approach: walk backward accumulating tokens
@ -211,7 +386,8 @@ class ContextCompressor(ContextEngine):
min_protect = min(protect_tail_count, len(result) - 1)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
content_len = len(msg.get("content") or "")
raw_content = msg.get("content") or ""
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@ -226,18 +402,69 @@ class ContextCompressor(ContextEngine):
else:
prune_boundary = len(result) - protect_tail_count
# Pass 1: Deduplicate identical tool results.
# When the same file is read multiple times, keep only the most recent
# full copy and replace older duplicates with a back-reference.
content_hashes: dict = {} # hash -> (index, tool_call_id)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content") or ""
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if len(content) < 200:
continue
h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
if h in content_hashes:
# This is an older duplicate — replace with back-reference
result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
pruned += 1
else:
content_hashes[h] = (i, msg.get("tool_call_id", "?"))
# Pass 2: Replace old tool results with informative summaries
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content", "")
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if not content or content == _PRUNED_TOOL_PLACEHOLDER:
continue
# Skip already-deduplicated or previously-summarized results
if content.startswith("[Duplicate tool output"):
continue
# Only prune if the content is substantial (>200 chars)
if len(content) > 200:
result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
call_id = msg.get("tool_call_id", "")
tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
summary = _summarize_tool_result(tool_name, tool_args, content)
result[i] = {**msg, "content": summary}
pruned += 1
# Pass 3: Truncate large tool_call arguments in assistant messages
# outside the protected tail. write_file with 50KB content, for
# example, survives pruning entirely without this.
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
continue
new_tcs = []
modified = False
for tc in msg["tool_calls"]:
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
modified = True
new_tcs.append(tc)
if modified:
result[i] = {**msg, "tool_calls": new_tcs}
return result, pruned
# ------------------------------------------------------------------
@ -357,29 +584,45 @@ class ContextCompressor(ContextEngine):
)
# Shared structured template (used by both paths).
# Key changes vs v1:
# - "Pending User Asks" section (from Claude Code) explicitly tracks
# unanswered questions so the model knows what's resolved vs open
# - "Remaining Work" replaces "Next Steps" to avoid reading as active
# instructions
# - "Resolved Questions" makes it clear which questions were already
# answered (prevents model from re-answering them)
_template_sections = f"""## Goal
[What the user is trying to accomplish]
_template_sections = f"""## Active Task
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
task assignment verbatim the exact words they used. If multiple tasks
were requested and only some are done, list only the ones NOT yet completed.
The next assistant must pick up exactly here. Example:
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
If no outstanding task exists, write "None."]
## Goal
[What the user is trying to accomplish overall]
## Constraints & Preferences
[User preferences, coding style, constraints, important decisions]
## Progress
### Done
[Completed work include specific file paths, commands run, results obtained]
### In Progress
[Work currently underway]
### Blocked
[Any blockers or issues encountered]
## Completed Actions
[Numbered list of concrete actions taken include tool used, target, and outcome.
Format each as: N. ACTION target outcome [tool: name]
Example:
1. READ config.py:45 found `==` should be `!=` [tool: read_file]
2. PATCH config.py:45 changed `==` to `!=` [tool: patch]
3. TEST `pytest tests/` 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
Be specific with file paths, commands, line numbers, and results.]
## Active State
[Current working state include:
- Working directory and branch (if applicable)
- Modified/created files with brief note on each
- Test status (X/Y passing)
- Any running processes or servers
- Environment details that matter]
## In Progress
[Work currently underway what was being done when compaction fired]
## Blocked
[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
## Key Decisions
[Important technical decisions and why they were made]
[Important technical decisions and WHY they were made]
## Resolved Questions
[Questions the user asked that were ALREADY answered include the answer so the next assistant does not re-answer them]
@ -396,10 +639,7 @@ class ContextCompressor(ContextEngine):
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
## Tools & Patterns
[Which tools were used, how they were used effectively, and any tool-specific discoveries]
Target ~{summary_budget} tokens. Be specific include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
Target ~{summary_budget} tokens. Be CONCRETE include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" say exactly what changed.
Write only the summary body. Do not include any preamble or prefix."""
@ -415,7 +655,7 @@ PREVIOUS SUMMARY:
NEW TURNS TO INCORPORATE:
{content_to_summarize}
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
{_template_sections}"""
else:
@ -450,7 +690,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
"api_mode": self.api_mode,
},
"messages": [{"role": "user", "content": prompt}],
"max_tokens": summary_budget * 2,
"max_tokens": int(summary_budget * 1.3),
# timeout resolved from auxiliary.compression.timeout config by call_llm
}
if self.summary_model:
@ -464,8 +704,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
self._summary_model_fallen_back = False
return self._with_summary_prefix(summary)
except RuntimeError:
# No provider configured — long cooldown, unlikely to self-resolve
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary "
@ -473,12 +715,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
return None
except Exception as e:
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
# If the summary model is different from the main model and the
# error looks permanent (model not found, 503, 404), fall back to
# using the main model instead of entering cooldown that leaves
# context growing unbounded. (#8620 sub-issue 4)
_status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
_err_str = str(e).lower()
_is_model_not_found = (
_status in (404, 503)
or "model_not_found" in _err_str
or "does not exist" in _err_str
or "no available channel" in _err_str
)
if (
_is_model_not_found
and self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
):
self._summary_model_fallen_back = True
logging.warning(
"Summary model '%s' not available (%s). "
"Falling back to main model '%s' for compression.",
self.summary_model, e, self.model,
)
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(messages, summary_budget) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
logging.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
e,
_SUMMARY_FAILURE_COOLDOWN_SECONDS,
_transient_cooldown,
)
return None
@ -601,6 +873,62 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Tail protection by token budget
# ------------------------------------------------------------------
def _find_last_user_message_idx(
self, messages: List[Dict[str, Any]], head_end: int
) -> int:
"""Return the index of the last user-role message at or after *head_end*, or -1."""
for i in range(len(messages) - 1, head_end - 1, -1):
if messages[i].get("role") == "user":
return i
return -1
def _ensure_last_user_message_in_tail(
self,
messages: List[Dict[str, Any]],
cut_idx: int,
head_end: int,
) -> int:
"""Guarantee the most recent user message is in the protected tail.
Context compressor bug (#10896): ``_align_boundary_backward`` can pull
``cut_idx`` past a user message when it tries to keep tool_call/result
groups together. If the last user message ends up in the *compressed*
middle region the LLM summariser writes it into "Pending User Asks",
but ``SUMMARY_PREFIX`` tells the next model to respond only to user
messages *after* the summary so the task effectively disappears from
the active context, causing the agent to stall, repeat completed work,
or silently drop the user's latest request.
Fix: if the last user-role message is not already in the tail
(``messages[cut_idx:]``), walk ``cut_idx`` back to include it. We
then re-align backward one more time to avoid splitting any
tool_call/result group that immediately precedes the user message.
"""
last_user_idx = self._find_last_user_message_idx(messages, head_end)
if last_user_idx < 0:
# No user message found beyond head — nothing to anchor.
return cut_idx
if last_user_idx >= cut_idx:
# Already in the tail; nothing to do.
return cut_idx
# The last user message is in the middle (compressed) region.
# Pull cut_idx back to it directly — a user message is already a
# clean boundary (no tool_call/result splitting risk), so there is no
# need to call _align_boundary_backward here; doing so would
# unnecessarily pull the cut further back into the preceding
# assistant + tool_calls group.
if not self.quiet_mode:
logger.debug(
"Anchoring tail cut to last user message at index %d "
"(was %d) to prevent active-task loss after compression",
last_user_idx,
cut_idx,
)
# Safety: never go back into the head region.
return max(last_user_idx, head_end + 1)
def _find_tail_cut_by_tokens(
self, messages: List[Dict[str, Any]], head_end: int,
token_budget: int | None = None,
@ -618,7 +946,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
the cut is placed right after the head so compression still runs.
Never cuts inside a tool_call/result group.
Never cuts inside a tool_call/result group. Always ensures the most
recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
"""
if token_budget is None:
token_budget = self.tail_token_budget
@ -657,6 +986,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Align to avoid splitting tool groups
cut_idx = self._align_boundary_backward(messages, cut_idx)
# Ensure the most recent user message is always in the tail so the
# active task is never lost to compression (fixes #10896).
cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)
return max(cut_idx, head_end + 1)
# ------------------------------------------------------------------
@ -744,11 +1077,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = []
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (
(msg.get("content") or "")
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
)
if i == 0 and msg.get("role") == "system":
existing = msg.get("content") or ""
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
if _compression_note not in existing:
msg["content"] = existing + "\n\n" + _compression_note
compressed.append(msg)
# If LLM summary failed, insert a static fallback so the model
@ -806,14 +1139,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = self._sanitize_tool_pairs(compressed)
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
# Anti-thrashing: track compression effectiveness
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
self._last_compression_savings_pct = savings_pct
if savings_pct < 10:
self._ineffective_compression_count += 1
else:
self._ineffective_compression_count = 0
if not self.quiet_mode:
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
logger.info(
"Compressed: %d -> %d messages (~%d tokens saved)",
"Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
n_messages,
len(compressed),
saved_estimate,
savings_pct,
)
logger.info("Compression #%d complete", self.compression_count)

View file

@ -313,9 +313,25 @@ class CopilotACPClient:
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS),
timeout_seconds=_effective_timeout,
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)

View file

@ -1130,6 +1130,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
state = _load_provider_state(auth_store, "nous")
if state:
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
# `hermes auth add nous --label <name>`). Fall back to the
# auto-derived token fingerprint for logins that didn't supply one.
custom_label = str(state.get("label") or "").strip()
seeded_label = custom_label or label_from_token(
state.get("access_token", ""), "device_code"
)
changed |= _upsert_entry(
entries,
provider,
@ -1148,7 +1156,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
"agent_key": state.get("agent_key"),
"agent_key_expires_at": state.get("agent_key_expires_at"),
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
"label": label_from_token(state.get("access_token", ""), "device_code"),
"label": seeded_label,
},
)
@ -1162,6 +1170,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
if token:
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
@ -1170,6 +1179,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
@ -1206,6 +1216,19 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
logger.debug("Qwen OAuth token seed failed: %s", exc)
elif provider == "openai-codex":
# Respect user suppression — `hermes auth remove openai-codex` marks
# the device_code source as suppressed so it won't be re-seeded from
# either the Hermes auth store or ~/.codex/auth.json. Without this
# gate the removal is instantly undone on the next load_pool() call.
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
codex_suppressed = is_source_suppressed(provider, "device_code")
except ImportError:
pass
if codex_suppressed:
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
tokens = state.get("tokens") if isinstance(state, dict) else None
# Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth

View file

@ -600,6 +600,45 @@ class KawaiiSpinner:
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
]
@classmethod
def get_waiting_faces(cls) -> list:
"""Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("waiting_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_WAITING
@classmethod
def get_thinking_faces(cls) -> list:
"""Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("thinking_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_THINKING
@classmethod
def get_thinking_verbs(cls) -> list:
"""Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
try:
skin = _get_skin()
if skin:
verbs = skin.spinner.get("thinking_verbs", [])
if verbs:
return verbs
except Exception:
pass
return cls.THINKING_VERBS
def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
self.message = message
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
@ -954,84 +993,4 @@ def get_cute_tool_message(
# Honcho session line (one-liner with clickable OSC 8 hyperlink)
# =========================================================================
_DIM = "\033[2m"
_SKY_BLUE = "\033[38;5;117m"
_ANSI_RESET = "\033[0m"
# =========================================================================
# Context pressure display (CLI user-facing warnings)
# =========================================================================
# ANSI color codes for context pressure tiers
_CYAN = "\033[36m"
_YELLOW = "\033[33m"
_BOLD = "\033[1m"
_DIM_ANSI = "\033[2m"
# Bar characters
_BAR_FILLED = ""
_BAR_EMPTY = ""
_BAR_WIDTH = 20
def format_context_pressure(
compaction_progress: float,
threshold_tokens: int,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a formatted context pressure line for CLI display.
The bar and percentage show progress toward the compaction threshold,
NOT the raw context window. 100% = compaction fires.
Args:
compaction_progress: How close to compaction (0.01.0, 1.0 = fires).
threshold_tokens: Compaction threshold in tokens.
threshold_percent: Compaction threshold as a fraction of context window.
compression_enabled: Whether auto-compression is active.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
threshold_pct_int = int(threshold_percent * 100)
color = f"{_BOLD}{_YELLOW}"
icon = ""
if compression_enabled:
hint = "compaction approaching"
else:
hint = "no auto-compaction"
return (
f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
)
def format_context_pressure_gateway(
compaction_progress: float,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a plain-text context pressure notification for messaging platforms.
No ANSI just Unicode and plain text suitable for Telegram/Discord/etc.
The percentage shows progress toward the compaction threshold.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_pct_int = int(threshold_percent * 100)
icon = "⚠️"
if compression_enabled:
hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
else:
hint = "Auto-compaction is disabled — context may be truncated."
return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"

View file

@ -112,6 +112,10 @@ _RATE_LIMIT_PATTERNS = [
"please retry after",
"resource_exhausted",
"rate increased too quickly", # Alibaba/DashScope throttling
# AWS Bedrock throttling
"throttlingexception",
"too many concurrent requests",
"servicequotaexceededexception",
]
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@ -171,6 +175,11 @@ _CONTEXT_OVERFLOW_PATTERNS = [
# Chinese error messages (some providers return these)
"超过最大长度",
"上下文长度",
# AWS Bedrock Converse API error patterns
"input is too long",
"max input token",
"input token",
"exceeds the maximum number of input tokens",
]
# Model not found patterns

View file

@ -0,0 +1,895 @@
"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend.
This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were
a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP
traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent,
streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE.
Architecture
------------
- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)``
mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses.
- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated
to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` /
``toolConfig`` / ``systemInstruction`` shape.
- The request body is wrapped ``{project, model, user_prompt_id, request}``
per Code Assist API expectations.
- Responses (``candidates[].content.parts[]``) are converted back to
OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``.
- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks.
Attribution
-----------
Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public
Gemini API docs. Request envelope shape
(``{project, model, user_prompt_id, request}``) is documented nowhere; it is
reverse-engineered from the opencode-gemini-auth and clawdbot implementations.
"""
from __future__ import annotations
import json
import logging
import os
import time
import uuid
from types import SimpleNamespace
from typing import Any, Dict, Iterator, List, Optional
import httpx
from agent import google_oauth
from agent.google_code_assist import (
CODE_ASSIST_ENDPOINT,
FREE_TIER_ID,
CodeAssistError,
ProjectContext,
resolve_project_context,
)
logger = logging.getLogger(__name__)
# =============================================================================
# Request translation: OpenAI → Gemini
# =============================================================================
_ROLE_MAP_OPENAI_TO_GEMINI = {
"user": "user",
"assistant": "model",
"system": "user", # handled separately via systemInstruction
"tool": "user", # functionResponse is wrapped in a user-role turn
"function": "user",
}
def _coerce_content_to_text(content: Any) -> str:
"""OpenAI content may be str or a list of parts; reduce to plain text."""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
pieces: List[str] = []
for p in content:
if isinstance(p, str):
pieces.append(p)
elif isinstance(p, dict):
if p.get("type") == "text" and isinstance(p.get("text"), str):
pieces.append(p["text"])
# Multimodal (image_url, etc.) — stub for now; log and skip
elif p.get("type") in ("image_url", "input_audio"):
logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
return "\n".join(pieces)
return str(content)
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
"""OpenAI tool_call -> Gemini functionCall part."""
fn = tool_call.get("function") or {}
args_raw = fn.get("arguments", "")
try:
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
except json.JSONDecodeError:
args = {"_raw": args_raw}
if not isinstance(args, dict):
args = {"_value": args}
return {
"functionCall": {
"name": fn.get("name") or "",
"args": args,
},
# Sentinel signature — matches opencode-gemini-auth's approach.
# Without this, Code Assist rejects function calls that originated
# outside its own chain.
"thoughtSignature": "skip_thought_signature_validator",
}
def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]:
"""OpenAI tool-role message -> Gemini functionResponse part.
The function name isn't in the OpenAI tool message directly; it must be
passed via the assistant message that issued the call. For simplicity we
look up ``name`` on the message (OpenAI SDK copies it there) or on the
``tool_call_id`` cross-reference.
"""
name = str(message.get("name") or message.get("tool_call_id") or "tool")
content = _coerce_content_to_text(message.get("content"))
# Gemini expects the response as a dict under `response`. We wrap plain
# text in {"output": "..."}.
try:
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
except json.JSONDecodeError:
parsed = None
response = parsed if isinstance(parsed, dict) else {"output": content}
return {
"functionResponse": {
"name": name,
"response": response,
},
}
def _build_gemini_contents(
messages: List[Dict[str, Any]],
) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
"""Convert OpenAI messages[] to Gemini contents[] + systemInstruction."""
system_text_parts: List[str] = []
contents: List[Dict[str, Any]] = []
for msg in messages:
if not isinstance(msg, dict):
continue
role = str(msg.get("role") or "user")
if role == "system":
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
continue
# Tool result message — emit a user-role turn with functionResponse
if role == "tool" or role == "function":
contents.append({
"role": "user",
"parts": [_translate_tool_result_to_gemini(msg)],
})
continue
gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user")
parts: List[Dict[str, Any]] = []
text = _coerce_content_to_text(msg.get("content"))
if text:
parts.append({"text": text})
# Assistant messages can carry tool_calls
tool_calls = msg.get("tool_calls") or []
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict):
parts.append(_translate_tool_call_to_gemini(tc))
if not parts:
# Gemini rejects empty parts; skip the turn entirely
continue
contents.append({"role": gemini_role, "parts": parts})
system_instruction: Optional[Dict[str, Any]] = None
joined_system = "\n".join(p for p in system_text_parts if p).strip()
if joined_system:
system_instruction = {
"role": "system",
"parts": [{"text": joined_system}],
}
return contents, system_instruction
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
"""OpenAI tools[] -> Gemini tools[].functionDeclarations[]."""
if not isinstance(tools, list) or not tools:
return []
declarations: List[Dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not name:
continue
decl = {"name": str(name)}
if fn.get("description"):
decl["description"] = str(fn["description"])
params = fn.get("parameters")
if isinstance(params, dict):
decl["parameters"] = params
declarations.append(decl)
if not declarations:
return []
return [{"functionDeclarations": declarations}]
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
"""OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig."""
if tool_choice is None:
return None
if isinstance(tool_choice, str):
if tool_choice == "auto":
return {"functionCallingConfig": {"mode": "AUTO"}}
if tool_choice == "required":
return {"functionCallingConfig": {"mode": "ANY"}}
if tool_choice == "none":
return {"functionCallingConfig": {"mode": "NONE"}}
if isinstance(tool_choice, dict):
fn = tool_choice.get("function") or {}
name = fn.get("name")
if name:
return {
"functionCallingConfig": {
"mode": "ANY",
"allowedFunctionNames": [str(name)],
},
}
return None
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
"""Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case)."""
if not isinstance(config, dict) or not config:
return None
budget = config.get("thinkingBudget", config.get("thinking_budget"))
level = config.get("thinkingLevel", config.get("thinking_level"))
include = config.get("includeThoughts", config.get("include_thoughts"))
normalized: Dict[str, Any] = {}
if isinstance(budget, (int, float)):
normalized["thinkingBudget"] = int(budget)
if isinstance(level, str) and level.strip():
normalized["thinkingLevel"] = level.strip().lower()
if isinstance(include, bool):
normalized["includeThoughts"] = include
return normalized or None
def build_gemini_request(
*,
messages: List[Dict[str, Any]],
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
thinking_config: Any = None,
) -> Dict[str, Any]:
"""Build the inner Gemini request body (goes inside ``request`` wrapper)."""
contents, system_instruction = _build_gemini_contents(messages)
body: Dict[str, Any] = {"contents": contents}
if system_instruction is not None:
body["systemInstruction"] = system_instruction
gemini_tools = _translate_tools_to_gemini(tools)
if gemini_tools:
body["tools"] = gemini_tools
tool_cfg = _translate_tool_choice_to_gemini(tool_choice)
if tool_cfg is not None:
body["toolConfig"] = tool_cfg
generation_config: Dict[str, Any] = {}
if isinstance(temperature, (int, float)):
generation_config["temperature"] = float(temperature)
if isinstance(max_tokens, int) and max_tokens > 0:
generation_config["maxOutputTokens"] = max_tokens
if isinstance(top_p, (int, float)):
generation_config["topP"] = float(top_p)
if isinstance(stop, str) and stop:
generation_config["stopSequences"] = [stop]
elif isinstance(stop, list) and stop:
generation_config["stopSequences"] = [str(s) for s in stop if s]
normalized_thinking = _normalize_thinking_config(thinking_config)
if normalized_thinking:
generation_config["thinkingConfig"] = normalized_thinking
if generation_config:
body["generationConfig"] = generation_config
return body
def wrap_code_assist_request(
*,
project_id: str,
model: str,
inner_request: Dict[str, Any],
user_prompt_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Wrap the inner Gemini request in the Code Assist envelope."""
return {
"project": project_id,
"model": model,
"user_prompt_id": user_prompt_id or str(uuid.uuid4()),
"request": inner_request,
}
# =============================================================================
# Response translation: Gemini → OpenAI
# =============================================================================
def _translate_gemini_response(
resp: Dict[str, Any],
model: str,
) -> SimpleNamespace:
"""Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace.
Code Assist wraps the actual Gemini response inside ``response``, so we
unwrap it first if present.
"""
inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp
candidates = inner.get("candidates") or []
if not isinstance(candidates, list) or not candidates:
return _empty_response(model)
cand = candidates[0]
content_obj = cand.get("content") if isinstance(cand, dict) else {}
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
text_pieces: List[str] = []
reasoning_pieces: List[str] = []
tool_calls: List[SimpleNamespace] = []
for i, part in enumerate(parts or []):
if not isinstance(part, dict):
continue
# Thought parts are model's internal reasoning — surface as reasoning,
# don't mix into content.
if part.get("thought") is True:
if isinstance(part.get("text"), str):
reasoning_pieces.append(part["text"])
continue
if isinstance(part.get("text"), str):
text_pieces.append(part["text"])
continue
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
tool_calls.append(SimpleNamespace(
id=f"call_{uuid.uuid4().hex[:12]}",
type="function",
index=i,
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
))
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(
str(cand.get("finishReason") or "")
)
usage_meta = inner.get("usageMetadata") or {}
usage = SimpleNamespace(
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
prompt_tokens_details=SimpleNamespace(
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
),
)
message = SimpleNamespace(
role="assistant",
content="".join(text_pieces) if text_pieces else None,
tool_calls=tool_calls or None,
reasoning="".join(reasoning_pieces) or None,
reasoning_content="".join(reasoning_pieces) or None,
reasoning_details=None,
)
choice = SimpleNamespace(
index=0,
message=message,
finish_reason=finish_reason,
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def _empty_response(model: str) -> SimpleNamespace:
message = SimpleNamespace(
role="assistant", content="", tool_calls=None,
reasoning=None, reasoning_content=None, reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
usage = SimpleNamespace(
prompt_tokens=0, completion_tokens=0, total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def _map_gemini_finish_reason(reason: str) -> str:
mapping = {
"STOP": "stop",
"MAX_TOKENS": "length",
"SAFETY": "content_filter",
"RECITATION": "content_filter",
"OTHER": "stop",
}
return mapping.get(reason.upper(), "stop")
# =============================================================================
# Streaming SSE iterator
# =============================================================================
class _GeminiStreamChunk(SimpleNamespace):
"""Mimics an OpenAI ChatCompletionChunk with .choices[0].delta."""
pass
def _make_stream_chunk(
*,
model: str,
content: str = "",
tool_call_delta: Optional[Dict[str, Any]] = None,
finish_reason: Optional[str] = None,
reasoning: str = "",
) -> _GeminiStreamChunk:
delta_kwargs: Dict[str, Any] = {"role": "assistant"}
if content:
delta_kwargs["content"] = content
if tool_call_delta is not None:
delta_kwargs["tool_calls"] = [SimpleNamespace(
index=tool_call_delta.get("index", 0),
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
type="function",
function=SimpleNamespace(
name=tool_call_delta.get("name") or "",
arguments=tool_call_delta.get("arguments") or "",
),
)]
if reasoning:
delta_kwargs["reasoning"] = reasoning
delta_kwargs["reasoning_content"] = reasoning
delta = SimpleNamespace(**delta_kwargs)
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
return _GeminiStreamChunk(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion.chunk",
created=int(time.time()),
model=model,
choices=[choice],
usage=None,
)
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
"""Parse Server-Sent Events from an httpx streaming response."""
buffer = ""
for chunk in response.iter_text():
if not chunk:
continue
buffer += chunk
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.rstrip("\r")
if not line:
continue
if line.startswith("data: "):
data = line[6:]
if data == "[DONE]":
return
try:
yield json.loads(data)
except json.JSONDecodeError:
logger.debug("Non-JSON SSE line: %s", data[:200])
def _translate_stream_event(
event: Dict[str, Any],
model: str,
tool_call_indices: Dict[str, int],
) -> List[_GeminiStreamChunk]:
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
inner = event.get("response") if isinstance(event.get("response"), dict) else event
candidates = inner.get("candidates") or []
if not candidates:
return []
cand = candidates[0]
if not isinstance(cand, dict):
return []
chunks: List[_GeminiStreamChunk] = []
content = cand.get("content") or {}
parts = content.get("parts") if isinstance(content, dict) else []
for part in parts or []:
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
chunks.append(_make_stream_chunk(
model=model, reasoning=part["text"],
))
continue
if isinstance(part.get("text"), str) and part["text"]:
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
name = str(fc["name"])
idx = tool_call_indices.setdefault(name, len(tool_call_indices))
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
chunks.append(_make_stream_chunk(
model=model,
tool_call_delta={
"index": idx,
"name": name,
"arguments": args_str,
},
))
finish_reason_raw = str(cand.get("finishReason") or "")
if finish_reason_raw:
mapped = _map_gemini_finish_reason(finish_reason_raw)
if tool_call_indices:
mapped = "tool_calls"
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
return chunks
# =============================================================================
# GeminiCloudCodeClient — OpenAI-compatible facade
# =============================================================================
MARKER_BASE_URL = "cloudcode-pa://google"
class _GeminiChatCompletions:
def __init__(self, client: "GeminiCloudCodeClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _GeminiChatNamespace:
def __init__(self, client: "GeminiCloudCodeClient"):
self.completions = _GeminiChatCompletions(client)
class GeminiCloudCodeClient:
"""Minimal OpenAI-SDK-compatible facade over Code Assist v1internal."""
def __init__(
self,
*,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
default_headers: Optional[Dict[str, str]] = None,
project_id: str = "",
**_: Any,
):
# `api_key` here is a dummy — real auth is the OAuth access token
# fetched on every call via agent.google_oauth.get_valid_access_token().
# We accept the kwarg for openai.OpenAI interface parity.
self.api_key = api_key or "google-oauth"
self.base_url = base_url or MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._configured_project_id = project_id
self._project_context: Optional[ProjectContext] = None
self._project_context_lock = False # simple single-thread guard
self.chat = _GeminiChatNamespace(self)
self.is_closed = False
self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0))
def close(self) -> None:
self.is_closed = True
try:
self._http.close()
except Exception:
pass
# Implement the OpenAI SDK's context-manager-ish closure check
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext:
"""Lazily resolve and cache the project context for this client."""
if self._project_context is not None:
return self._project_context
env_project = google_oauth.resolve_project_id_from_env()
creds = google_oauth.load_credentials()
stored_project = creds.project_id if creds else ""
# Prefer what's already baked into the creds
if stored_project:
self._project_context = ProjectContext(
project_id=stored_project,
managed_project_id=creds.managed_project_id if creds else "",
tier_id="",
source="stored",
)
return self._project_context
ctx = resolve_project_context(
access_token,
configured_project_id=self._configured_project_id,
env_project_id=env_project,
user_agent_model=model,
)
# Persist discovered project back to the creds file so the next
# session doesn't re-run the discovery.
if ctx.project_id or ctx.managed_project_id:
google_oauth.update_project_ids(
project_id=ctx.project_id,
managed_project_id=ctx.managed_project_id,
)
self._project_context = ctx
return ctx
def _create_chat_completion(
self,
*,
model: str = "gemini-2.5-flash",
messages: Optional[List[Dict[str, Any]]] = None,
stream: bool = False,
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Any = None,
**_: Any,
) -> Any:
access_token = google_oauth.get_valid_access_token()
ctx = self._ensure_project_context(access_token, model)
thinking_config = None
if isinstance(extra_body, dict):
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
inner = build_gemini_request(
messages=messages or [],
tools=tools,
tool_choice=tool_choice,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stop=stop,
thinking_config=thinking_config,
)
wrapped = wrap_code_assist_request(
project_id=ctx.project_id,
model=model,
inner_request=inner,
)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {access_token}",
"User-Agent": "hermes-agent (gemini-cli-compat)",
"X-Goog-Api-Client": "gl-python/hermes",
"x-activity-request-id": str(uuid.uuid4()),
}
headers.update(self._default_headers)
if stream:
return self._stream_completion(model=model, wrapped=wrapped, headers=headers)
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent"
response = self._http.post(url, json=wrapped, headers=headers)
if response.status_code != 200:
raise _gemini_http_error(response)
try:
payload = response.json()
except ValueError as exc:
raise CodeAssistError(
f"Invalid JSON from Code Assist: {exc}",
code="code_assist_invalid_json",
) from exc
return _translate_gemini_response(payload, model=model)
def _stream_completion(
self,
*,
model: str,
wrapped: Dict[str, Any],
headers: Dict[str, str],
) -> Iterator[_GeminiStreamChunk]:
"""Generator that yields OpenAI-shaped streaming chunks."""
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse"
stream_headers = dict(headers)
stream_headers["Accept"] = "text/event-stream"
def _generator() -> Iterator[_GeminiStreamChunk]:
try:
with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response:
if response.status_code != 200:
# Materialize error body for better diagnostics
response.read()
raise _gemini_http_error(response)
tool_call_indices: Dict[str, int] = {}
for event in _iter_sse_events(response):
for chunk in _translate_stream_event(event, model, tool_call_indices):
yield chunk
except httpx.HTTPError as exc:
raise CodeAssistError(
f"Streaming request failed: {exc}",
code="code_assist_stream_error",
) from exc
return _generator()
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
"""Translate an httpx response into a CodeAssistError with rich metadata.
Parses Google's error envelope (``{"error": {"code", "message", "status",
"details": [...]}}``) so the agent's error classifier can reason about
the failure ``status_code`` enables the rate_limit / auth classification
paths, and ``response`` lets the main loop honor ``Retry-After`` just
like it does for OpenAI SDK exceptions.
Also lifts a few recognizable Google conditions into human-readable
messages so the user sees something better than a 500-char JSON dump:
MODEL_CAPACITY_EXHAUSTED "Gemini model capacity exhausted for
<model>. This is a Google-side throttle..."
RESOURCE_EXHAUSTED w/o reason quota-style message
404 "Model <name> not found at cloudcode-pa..."
"""
status = response.status_code
# Parse the body once, surviving any weird encodings.
body_text = ""
body_json: Dict[str, Any] = {}
try:
body_text = response.text
except Exception:
body_text = ""
if body_text:
try:
parsed = json.loads(body_text)
if isinstance(parsed, dict):
body_json = parsed
except (ValueError, TypeError):
body_json = {}
# Dig into Google's error envelope. Shape is:
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
# "metadata": {...}},
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
if not isinstance(err_obj, dict):
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
# than one ErrorInfo (rare), so we pick the first one with a reason.
error_reason = ""
error_metadata: Dict[str, Any] = {}
retry_delay_seconds: Optional[float] = None
for detail in err_details_list:
if not isinstance(detail, dict):
continue
type_url = str(detail.get("@type") or "")
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
reason = detail.get("reason")
if isinstance(reason, str) and reason:
error_reason = reason
md = detail.get("metadata")
if isinstance(md, dict):
error_metadata = md
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
delay_raw = detail.get("retryDelay")
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
try:
retry_delay_seconds = float(delay_raw[:-1])
except ValueError:
pass
elif isinstance(delay_raw, (int, float)):
retry_delay_seconds = float(delay_raw)
# Fall back to the Retry-After header if the body didn't include RetryInfo.
if retry_delay_seconds is None:
try:
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
except Exception:
header_val = None
if header_val:
try:
retry_delay_seconds = float(header_val)
except (TypeError, ValueError):
retry_delay_seconds = None
# Classify the error code. ``code_assist_rate_limited`` stays the default
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
# logs) without changing the rate_limit classification path.
code = f"code_assist_http_{status}"
if status == 401:
code = "code_assist_unauthorized"
elif status == 429:
code = "code_assist_rate_limited"
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
code = "code_assist_capacity_exhausted"
# Build a human-readable message. Keep the status + a raw-body tail for
# debugging, but lead with a friendlier summary when we recognize the
# Google signal.
model_hint = ""
if isinstance(error_metadata, dict):
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
target = model_hint or "this Gemini model"
message = (
f"Gemini capacity exhausted for {target} (Google-side throttle, "
f"not a Hermes issue). Try a different Gemini model or set a "
f"fallback_providers entry to a non-Gemini provider."
)
if retry_delay_seconds is not None:
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
message = (
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
f"Check /gquota for remaining daily requests."
)
if retry_delay_seconds is not None:
message += f" Retry suggested in {retry_delay_seconds:g}s."
elif status == 404:
# Google returns 404 when a model has been retired or renamed.
target = model_hint or (err_message or "model")
message = (
f"Code Assist 404: {target} is not available at "
f"cloudcode-pa.googleapis.com. It may have been renamed or "
f"retired. Check hermes_cli/models.py for the current list."
)
elif err_message:
# Generic fallback with the parsed message.
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
else:
# Last-ditch fallback — raw body snippet.
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
return CodeAssistError(
message,
code=code,
status_code=status,
response=response,
retry_after=retry_delay_seconds,
details={
"status": err_status,
"reason": error_reason,
"metadata": error_metadata,
"message": err_message,
},
)

453
agent/google_code_assist.py Normal file
View file

@ -0,0 +1,453 @@
"""Google Code Assist API client — project discovery, onboarding, quota.
The Code Assist API powers Google's official gemini-cli. It sits at
``cloudcode-pa.googleapis.com`` and provides:
- Free tier access (generous daily quota) for personal Google accounts
- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise
This module handles the control-plane dance needed before inference:
1. ``load_code_assist()`` probe the user's account to learn what tier they're on
and whether a ``cloudaicompanionProject`` is already assigned.
2. ``onboard_user()`` if the user hasn't been onboarded yet (new account, fresh
free tier, etc.), call this with the chosen tier + project id. Supports LRO
polling for slow provisioning.
3. ``retrieve_user_quota()`` fetch the ``buckets[]`` array showing remaining
quota per model, used by the ``/gquota`` slash command.
VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter
will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this
and force the account to ``standard-tier`` so the call chain still succeeds.
Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The
request/response shapes are specific to Google's internal Code Assist API,
documented nowhere public we copy them from the reference implementations.
"""
from __future__ import annotations
import json
import logging
import os
import time
import urllib.error
import urllib.parse
import urllib.request
import uuid
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# =============================================================================
# Constants
# =============================================================================
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
# Fallback endpoints tried when prod returns an error during project discovery
FALLBACK_ENDPOINTS = [
"https://daily-cloudcode-pa.sandbox.googleapis.com",
"https://autopush-cloudcode-pa.sandbox.googleapis.com",
]
# Tier identifiers that Google's API uses
FREE_TIER_ID = "free-tier"
LEGACY_TIER_ID = "legacy-tier"
STANDARD_TIER_ID = "standard-tier"
# Default HTTP headers matching gemini-cli's fingerprint.
# Google may reject unrecognized User-Agents on these internal endpoints.
_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)"
_X_GOOG_API_CLIENT = "gl-node/24.0.0"
_DEFAULT_REQUEST_TIMEOUT = 30.0
_ONBOARDING_POLL_ATTEMPTS = 12
_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
class CodeAssistError(RuntimeError):
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
Carries HTTP status / response / retry-after metadata so the agent's
``error_classifier._extract_status_code`` and the main loop's Retry-After
handling (which walks ``error.response.headers``) pick up the right
signals. Without these, 429s from the OAuth path look like opaque
``RuntimeError`` and skip the rate-limit path.
"""
def __init__(
self,
message: str,
*,
code: str = "code_assist_error",
status_code: Optional[int] = None,
response: Any = None,
retry_after: Optional[float] = None,
details: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(message)
self.code = code
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
# triggers the main loop's fallback_providers chain the same way SDK
# errors do.
self.status_code = status_code
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
# ``.headers`` mapping and ``.json()`` method). The main loop reads
# ``error.response.headers["Retry-After"]`` to honor Google's retry
# hints when the backend throttles us.
self.response = response
# Parsed ``Retry-After`` seconds (kept separately for convenience —
# Google returns retry hints in both the header and the error body's
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
self.retry_after = retry_after
# Parsed structured error details from the Google error envelope
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
# Useful for logging and for tests that want to assert on specifics.
self.details = details or {}
class ProjectIdRequiredError(CodeAssistError):
def __init__(self, message: str = "GCP project id required for this tier") -> None:
super().__init__(message, code="code_assist_project_id_required")
# =============================================================================
# HTTP primitive (auth via Bearer token passed per-call)
# =============================================================================
def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]:
ua = _GEMINI_CLI_USER_AGENT
if user_agent_model:
ua = f"{ua} model/{user_agent_model}"
return {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {access_token}",
"User-Agent": ua,
"X-Goog-Api-Client": _X_GOOG_API_CLIENT,
"x-activity-request-id": str(uuid.uuid4()),
}
def _client_metadata() -> Dict[str, str]:
"""Match Google's gemini-cli exactly — unrecognized metadata may be rejected."""
return {
"ideType": "IDE_UNSPECIFIED",
"platform": "PLATFORM_UNSPECIFIED",
"pluginType": "GEMINI",
}
def _post_json(
url: str,
body: Dict[str, Any],
access_token: str,
*,
timeout: float = _DEFAULT_REQUEST_TIMEOUT,
user_agent_model: str = "",
) -> Dict[str, Any]:
data = json.dumps(body).encode("utf-8")
request = urllib.request.Request(
url, data=data, method="POST",
headers=_build_headers(access_token, user_agent_model=user_agent_model),
)
try:
with urllib.request.urlopen(request, timeout=timeout) as response:
raw = response.read().decode("utf-8", errors="replace")
return json.loads(raw) if raw else {}
except urllib.error.HTTPError as exc:
detail = ""
try:
detail = exc.read().decode("utf-8", errors="replace")
except Exception:
pass
# Special case: VPC-SC violation should be distinguishable
if _is_vpc_sc_violation(detail):
raise CodeAssistError(
f"VPC-SC policy violation: {detail}",
code="code_assist_vpc_sc",
) from exc
raise CodeAssistError(
f"Code Assist HTTP {exc.code}: {detail or exc.reason}",
code=f"code_assist_http_{exc.code}",
) from exc
except urllib.error.URLError as exc:
raise CodeAssistError(
f"Code Assist request failed: {exc}",
code="code_assist_network_error",
) from exc
def _is_vpc_sc_violation(body: str) -> bool:
"""Detect a VPC Service Controls violation from a response body."""
if not body:
return False
try:
parsed = json.loads(body)
except (json.JSONDecodeError, ValueError):
return "SECURITY_POLICY_VIOLATED" in body
# Walk the nested error structure Google uses
error = parsed.get("error") if isinstance(parsed, dict) else None
if not isinstance(error, dict):
return False
details = error.get("details") or []
if isinstance(details, list):
for item in details:
if isinstance(item, dict):
reason = item.get("reason") or ""
if reason == "SECURITY_POLICY_VIOLATED":
return True
msg = str(error.get("message", ""))
return "SECURITY_POLICY_VIOLATED" in msg
# =============================================================================
# load_code_assist — discovers current tier + assigned project
# =============================================================================
@dataclass
class CodeAssistProjectInfo:
"""Result from ``load_code_assist``."""
current_tier_id: str = ""
cloudaicompanion_project: str = "" # Google-managed project (free tier)
allowed_tiers: List[str] = field(default_factory=list)
raw: Dict[str, Any] = field(default_factory=dict)
def load_code_assist(
access_token: str,
*,
project_id: str = "",
user_agent_model: str = "",
) -> CodeAssistProjectInfo:
"""Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback.
Returns whatever tier + project info Google reports. On VPC-SC violations,
returns a synthetic ``standard-tier`` result so the chain can continue.
"""
body: Dict[str, Any] = {
"metadata": {
"duetProject": project_id,
**_client_metadata(),
},
}
if project_id:
body["cloudaicompanionProject"] = project_id
endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS
last_err: Optional[Exception] = None
for endpoint in endpoints:
url = f"{endpoint}/v1internal:loadCodeAssist"
try:
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
return _parse_load_response(resp)
except CodeAssistError as exc:
if exc.code == "code_assist_vpc_sc":
logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint)
return CodeAssistProjectInfo(
current_tier_id=STANDARD_TIER_ID,
cloudaicompanion_project=project_id,
)
last_err = exc
logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc)
continue
if last_err:
raise last_err
return CodeAssistProjectInfo()
def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo:
current_tier = resp.get("currentTier") or {}
tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else ""
project = str(resp.get("cloudaicompanionProject") or "")
allowed = resp.get("allowedTiers") or []
allowed_ids: List[str] = []
if isinstance(allowed, list):
for t in allowed:
if isinstance(t, dict):
tid = str(t.get("id") or "")
if tid:
allowed_ids.append(tid)
return CodeAssistProjectInfo(
current_tier_id=tier_id,
cloudaicompanion_project=project,
allowed_tiers=allowed_ids,
raw=resp,
)
# =============================================================================
# onboard_user — provisions a new user on a tier (with LRO polling)
# =============================================================================
def onboard_user(
access_token: str,
*,
tier_id: str,
project_id: str = "",
user_agent_model: str = "",
) -> Dict[str, Any]:
"""Call ``POST /v1internal:onboardUser`` to provision the user.
For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError).
For free tiers, ``project_id`` is optional Google will assign one.
Returns the final operation response. Polls ``/v1internal/<name>`` for up
to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS``
(default: 12 × 5s = 1 min).
"""
if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id:
raise ProjectIdRequiredError(
f"Tier {tier_id!r} requires a GCP project id. "
"Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT."
)
body: Dict[str, Any] = {
"tierId": tier_id,
"metadata": _client_metadata(),
}
if project_id:
body["cloudaicompanionProject"] = project_id
endpoint = CODE_ASSIST_ENDPOINT
url = f"{endpoint}/v1internal:onboardUser"
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
# Poll if LRO (long-running operation)
if not resp.get("done"):
op_name = resp.get("name", "")
if not op_name:
return resp
for attempt in range(_ONBOARDING_POLL_ATTEMPTS):
time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS)
poll_url = f"{endpoint}/v1internal/{op_name}"
try:
poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model)
except CodeAssistError as exc:
logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc)
continue
if poll_resp.get("done"):
return poll_resp
logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS)
return resp
# =============================================================================
# retrieve_user_quota — for /gquota
# =============================================================================
@dataclass
class QuotaBucket:
model_id: str
token_type: str = ""
remaining_fraction: float = 0.0
reset_time_iso: str = ""
raw: Dict[str, Any] = field(default_factory=dict)
def retrieve_user_quota(
access_token: str,
*,
project_id: str = "",
user_agent_model: str = "",
) -> List[QuotaBucket]:
"""Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``."""
body: Dict[str, Any] = {}
if project_id:
body["project"] = project_id
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota"
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
raw_buckets = resp.get("buckets") or []
buckets: List[QuotaBucket] = []
if not isinstance(raw_buckets, list):
return buckets
for b in raw_buckets:
if not isinstance(b, dict):
continue
buckets.append(QuotaBucket(
model_id=str(b.get("modelId") or ""),
token_type=str(b.get("tokenType") or ""),
remaining_fraction=float(b.get("remainingFraction") or 0.0),
reset_time_iso=str(b.get("resetTime") or ""),
raw=b,
))
return buckets
# =============================================================================
# Project context resolution
# =============================================================================
@dataclass
class ProjectContext:
"""Resolved state for a given OAuth session."""
project_id: str = "" # effective project id sent on requests
managed_project_id: str = "" # Google-assigned project (free tier)
tier_id: str = ""
source: str = "" # "env", "config", "discovered", "onboarded"
def resolve_project_context(
access_token: str,
*,
configured_project_id: str = "",
env_project_id: str = "",
user_agent_model: str = "",
) -> ProjectContext:
"""Figure out what project id + tier to use for requests.
Priority:
1. If configured_project_id or env_project_id is set, use that directly
and short-circuit (no discovery needed).
2. Otherwise call loadCodeAssist to see what Google says.
3. If no tier assigned yet, onboard the user (free tier default).
"""
# Short-circuit: caller provided a project id
if configured_project_id:
return ProjectContext(
project_id=configured_project_id,
tier_id=STANDARD_TIER_ID, # assume paid since they specified one
source="config",
)
if env_project_id:
return ProjectContext(
project_id=env_project_id,
tier_id=STANDARD_TIER_ID,
source="env",
)
# Discover via loadCodeAssist
info = load_code_assist(access_token, user_agent_model=user_agent_model)
effective_project = info.cloudaicompanion_project
tier = info.current_tier_id
if not tier:
# User hasn't been onboarded — provision them on free tier
onboard_resp = onboard_user(
access_token,
tier_id=FREE_TIER_ID,
project_id="",
user_agent_model=user_agent_model,
)
# Re-parse from the onboard response
response_body = onboard_resp.get("response") or {}
if isinstance(response_body, dict):
effective_project = (
effective_project
or str(response_body.get("cloudaicompanionProject") or "")
)
tier = FREE_TIER_ID
source = "onboarded"
else:
source = "discovered"
return ProjectContext(
project_id=effective_project,
managed_project_id=effective_project if tier == FREE_TIER_ID else "",
tier_id=tier,
source=source,
)

1048
agent/google_oauth.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -634,13 +634,7 @@ class InsightsEngine:
lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}")
lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}")
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
if cache_total > 0:
lines.append(f" Cache read: {o['total_cache_read_tokens']:<12,} Cache write: {o['total_cache_write_tokens']:,}")
cost_str = f"${o['estimated_cost']:.2f}"
if o.get("models_without_pricing"):
cost_str += " *"
lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}")
lines.append(f" Total tokens: {o['total_tokens']:,}")
if o["total_hours"] > 0:
lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
@ -650,16 +644,10 @@ class InsightsEngine:
if report["models"]:
lines.append(" 🤖 Models Used")
lines.append(" " + "" * 56)
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12}")
for m in report["models"]:
model_name = m["model"][:28]
if m.get("has_pricing"):
cost_cell = f"${m['cost']:>6.2f}"
else:
cost_cell = " N/A"
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
if o.get("models_without_pricing"):
lines.append(" * Cost N/A for custom/self-hosted models")
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}")
lines.append("")
# Platform breakdown
@ -739,15 +727,7 @@ class InsightsEngine:
# Overview
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
if cache_total > 0:
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
else:
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
cost_note = ""
if o.get("models_without_pricing"):
cost_note = " _(excludes custom/self-hosted models)_"
lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
if o["total_hours"] > 0:
lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
lines.append("")
@ -756,8 +736,7 @@ class InsightsEngine:
if report["models"]:
lines.append("**🤖 Models:**")
for m in report["models"][:5]:
cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens")
lines.append("")
# Platforms (if multi-platform)

View file

@ -28,6 +28,7 @@ Usage in run_agent.py:
from __future__ import annotations
import json
import logging
import re
from typing import Any, Dict, List, Optional
@ -43,11 +44,22 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
_FENCE_TAG_RE = re.compile(r'</?\s*memory-context\s*>', re.IGNORECASE)
_INTERNAL_CONTEXT_RE = re.compile(
r'<\s*memory-context\s*>[\s\S]*?</\s*memory-context\s*>',
re.IGNORECASE,
)
_INTERNAL_NOTE_RE = re.compile(
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
re.IGNORECASE,
)
def sanitize_context(text: str) -> str:
"""Strip fence-escape sequences from provider output."""
return _FENCE_TAG_RE.sub('', text)
"""Strip fence tags, injected context blocks, and system notes from provider output."""
text = _INTERNAL_CONTEXT_RE.sub('', text)
text = _INTERNAL_NOTE_RE.sub('', text)
text = _FENCE_TAG_RE.sub('', text)
return text
def build_memory_context_block(raw_context: str) -> str:

View file

@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
# are preserved so the full model name reaches cache lookups and server queries.
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
"qwen-oauth",
"xiaomi",
@ -33,10 +33,12 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"google", "google-gemini", "google-ai-studio",
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
"ollama",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
})
@ -101,6 +103,8 @@ DEFAULT_CONTEXT_LENGTHS = {
# fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
# substring of "anthropic/claude-sonnet-4.6").
# OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
"claude-opus-4-7": 1000000,
"claude-opus-4.7": 1000000,
"claude-opus-4-6": 1000000,
"claude-sonnet-4-6": 1000000,
"claude-opus-4.6": 1000000,
@ -121,7 +125,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemini": 1048576,
# Gemma (open models served via AI Studio)
"gemma-4-31b": 256000,
"gemma-4-26b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek
@ -155,6 +158,8 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
# Kimi
"kimi": 262144,
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
"nemotron": 131072,
# Arcee
"trinity": 262144,
# OpenRouter
@ -237,8 +242,10 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
}
@ -1012,6 +1019,16 @@ def get_model_context_length(
if ctx:
return ctx
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
except ImportError:
pass # boto3 not installed — fall through to generic resolution
# 5. Provider-aware lookups (before generic OpenRouter cache)
# These are provider-specific and take priority over the generic OR cache,
# since the same model can have different context limits per provider

View file

@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"togetherai": "togetherai",
"perplexity": "perplexity",
"cohere": "cohere",
"ollama-cloud": "ollama-cloud",
}
# Reverse mapping: models.dev → Hermes (built lazily)

182
agent/nous_rate_guard.py Normal file
View file

@ -0,0 +1,182 @@
"""Cross-session rate limit guard for Nous Portal.
Writes rate limit state to a shared file so all sessions (CLI, gateway,
cron, auxiliary) can check whether Nous Portal is currently rate-limited
before making requests. Prevents retry amplification when RPH is tapped.
Each 429 from Nous triggers up to 9 API calls per conversation turn
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
against RPH. By recording the rate limit state on first 429 and checking
it before subsequent attempts, we eliminate the amplification effect.
"""
from __future__ import annotations
import json
import logging
import os
import tempfile
import time
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
_STATE_SUBDIR = "rate_limits"
_STATE_FILENAME = "nous.json"
def _state_path() -> str:
"""Return the path to the Nous rate limit state file."""
try:
from hermes_constants import get_hermes_home
base = get_hermes_home()
except ImportError:
base = os.path.join(os.path.expanduser("~"), ".hermes")
return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
"""Extract the best available reset-time estimate from response headers.
Priority:
1. x-ratelimit-reset-requests-1h (hourly RPH window most useful)
2. x-ratelimit-reset-requests (per-minute RPM window)
3. retry-after (generic HTTP header)
Returns seconds-from-now, or None if no usable header found.
"""
if not headers:
return None
lowered = {k.lower(): v for k, v in headers.items()}
for key in (
"x-ratelimit-reset-requests-1h",
"x-ratelimit-reset-requests",
"retry-after",
):
raw = lowered.get(key)
if raw is not None:
try:
val = float(raw)
if val > 0:
return val
except (TypeError, ValueError):
pass
return None
def record_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
error_context: Optional[dict[str, Any]] = None,
default_cooldown: float = 300.0,
) -> None:
"""Record that Nous Portal is rate-limited.
Parses the reset time from response headers or error context.
Falls back to ``default_cooldown`` (5 minutes) if no reset info
is available. Writes to a shared file that all sessions can read.
Args:
headers: HTTP response headers from the 429 error.
error_context: Structured error context from _extract_api_error_context().
default_cooldown: Fallback cooldown in seconds when no header data.
"""
now = time.time()
reset_at = None
# Try headers first (most accurate)
header_seconds = _parse_reset_seconds(headers)
if header_seconds is not None:
reset_at = now + header_seconds
# Try error_context reset_at (from body parsing)
if reset_at is None and isinstance(error_context, dict):
ctx_reset = error_context.get("reset_at")
if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
reset_at = float(ctx_reset)
# Default cooldown
if reset_at is None:
reset_at = now + default_cooldown
path = _state_path()
try:
state_dir = os.path.dirname(path)
os.makedirs(state_dir, exist_ok=True)
state = {
"reset_at": reset_at,
"recorded_at": now,
"reset_seconds": reset_at - now,
}
# Atomic write: write to temp file + rename
fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
try:
with os.fdopen(fd, "w") as f:
json.dump(state, f)
os.replace(tmp_path, path)
except Exception:
# Clean up temp file on failure
try:
os.unlink(tmp_path)
except OSError:
pass
raise
logger.info(
"Nous rate limit recorded: resets in %.0fs (at %.0f)",
reset_at - now, reset_at,
)
except Exception as exc:
logger.debug("Failed to write Nous rate limit state: %s", exc)
def nous_rate_limit_remaining() -> Optional[float]:
"""Check if Nous Portal is currently rate-limited.
Returns:
Seconds remaining until reset, or None if not rate-limited.
"""
path = _state_path()
try:
with open(path) as f:
state = json.load(f)
reset_at = state.get("reset_at", 0)
remaining = reset_at - time.time()
if remaining > 0:
return remaining
# Expired — clean up
try:
os.unlink(path)
except OSError:
pass
return None
except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
return None
def clear_nous_rate_limit() -> None:
"""Clear the rate limit state (e.g., after a successful Nous request)."""
try:
os.unlink(_state_path())
except FileNotFoundError:
pass
except OSError as exc:
logger.debug("Failed to clear Nous rate limit state: %s", exc)
def format_remaining(seconds: float) -> str:
"""Format seconds remaining into human-readable duration."""
s = max(0, int(seconds))
if s < 60:
return f"{s}s"
if s < 3600:
m, sec = divmod(s, 60)
return f"{m}m {sec}s" if sec else f"{m}m"
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"

View file

@ -295,7 +295,9 @@ PLATFORM_HINTS = {
),
"telegram": (
"You are on a text messaging communication platform, Telegram. "
"Please do not use markdown as it does not render. "
"Standard markdown is automatically converted to Telegram format. "
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
"`inline code`, ```code blocks```, [links](url), and ## headers. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Images "
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@ -652,7 +654,7 @@ def build_skills_system_prompt(
):
continue
skills_by_category.setdefault(category, []).append(
(skill_name, entry.get("description", ""))
(frontmatter_name, entry.get("description", ""))
)
category_descriptions = {
str(k): str(v)
@ -677,7 +679,7 @@ def build_skills_system_prompt(
):
continue
skills_by_category.setdefault(entry["category"], []).append(
(skill_name, entry["description"])
(entry["frontmatter_name"], entry["description"])
)
# Read category-level DESCRIPTION.md files
@ -720,9 +722,10 @@ def build_skills_system_prompt(
continue
entry = _build_snapshot_entry(skill_file, ext_dir, frontmatter, desc)
skill_name = entry["skill_name"]
if skill_name in seen_skill_names:
frontmatter_name = entry["frontmatter_name"]
if frontmatter_name in seen_skill_names:
continue
if entry["frontmatter_name"] in disabled or skill_name in disabled:
if frontmatter_name in disabled or skill_name in disabled:
continue
if not _skill_should_show(
extract_skill_conditions(frontmatter),
@ -730,9 +733,9 @@ def build_skills_system_prompt(
available_toolsets,
):
continue
seen_skill_names.add(skill_name)
seen_skill_names.add(frontmatter_name)
skills_by_category.setdefault(entry["category"], []).append(
(skill_name, entry["description"])
(frontmatter_name, entry["description"])
)
except Exception as e:
logger.debug("Error reading external skill %s: %s", skill_file, e)

View file

@ -93,6 +93,17 @@ _DB_CONNSTR_RE = re.compile(
re.IGNORECASE,
)
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
_JWT_RE = re.compile(
r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ)
r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature
)
# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# E.164 phone numbers: +<country><number>, 7-15 digits
# Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
@ -159,6 +170,12 @@ def redact_sensitive_text(text: str) -> str:
# Database connection string passwords
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
# E.164 phone numbers (Signal, WhatsApp)
def _redact_phone(m):
phone = m.group(1)

View file

@ -12,6 +12,8 @@ from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
@ -70,7 +72,14 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
skill_name = str(loaded_skill.get("name") or normalized)
skill_path = str(loaded_skill.get("path") or "")
skill_dir = None
if skill_path:
# Prefer the absolute skill_dir returned by skill_view() — this is
# correct for both local and external skills. Fall back to the old
# SKILLS_DIR-relative reconstruction only when skill_dir is absent
# (e.g. legacy skill_view responses).
abs_skill_dir = loaded_skill.get("skill_dir")
if abs_skill_dir:
skill_dir = Path(abs_skill_dir)
elif skill_path:
try:
skill_dir = SKILLS_DIR / Path(skill_path).parent
except Exception:
@ -108,7 +117,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
if not resolved:
return
lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
for key, value in resolved.items():
display_val = str(value) if value else "(not set)"
lines.append(f" {key} = {display_val}")

View file

@ -284,6 +284,80 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://ai.google.dev/pricing",
pricing_version="google-pricing-2026-03-16",
),
# AWS Bedrock — pricing per the Bedrock pricing page.
# Bedrock charges the same per-token rates as the model provider but
# through AWS billing. These are the on-demand prices (no commitment).
# Source: https://aws.amazon.com/bedrock/pricing/
(
"bedrock",
"anthropic.claude-opus-4-6",
): PricingEntry(
input_cost_per_million=Decimal("15.00"),
output_cost_per_million=Decimal("75.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-6",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-5",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-haiku-4-5",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("4.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-pro",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("3.20"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-lite",
): PricingEntry(
input_cost_per_million=Decimal("0.06"),
output_cost_per_million=Decimal("0.24"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-micro",
): PricingEntry(
input_cost_per_million=Decimal("0.035"),
output_cost_per_million=Decimal("0.14"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
}

View file

@ -561,7 +561,10 @@ class BatchRunner:
provider_sort (str): Sort providers by price/throughput/latency (optional)
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming).
NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a trailing assistant-role prefill
(400 error). For those models use output_config.format or structured-output
schemas instead. Safe here for user-role priming and for older Claude / non-Claude models.
max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
"""
self.dataset_file = Path(dataset_file)

View file

@ -16,7 +16,7 @@ model:
# "nous" - Nous Portal OAuth (requires: hermes login)
# "nous-api" - Nous Portal API key (requires: NOUS_API_KEY)
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
# "openai-codex" - OpenAI Codex (requires: hermes auth)
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
# "gemini" - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
@ -24,8 +24,10 @@ model:
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
# "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
#
@ -37,12 +39,6 @@ model:
# base_url: "http://localhost:1234/v1"
# No API key needed — local servers typically ignore auth.
#
# For Ollama Cloud (https://ollama.com/pricing):
# provider: "custom"
# base_url: "https://ollama.com/v1"
# Set OLLAMA_API_KEY in .env — automatically picked up when base_url
# points to ollama.com.
#
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
provider: "auto"
@ -337,6 +333,7 @@ compression:
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
# "nous" - Force Nous Portal (requires: hermes login)
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
# Uses gpt-5.3-codex which supports vision.
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
@ -564,6 +561,18 @@ platform_toolsets:
homeassistant: [hermes-homeassistant]
qqbot: [hermes-qqbot]
# =============================================================================
# Gateway Platform Settings
# =============================================================================
# Optional per-platform messaging settings.
# Platform-specific knobs live under `extra`.
#
# platforms:
# telegram:
# reply_to_mode: "first" # off | first | all
# extra:
# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages
# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#

680
cli.py
View file

@ -18,6 +18,8 @@ import os
import shutil
import sys
import json
import re
import base64
import atexit
import tempfile
import time
@ -78,6 +80,42 @@ _project_env = Path(__file__).parent / '.env'
load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
_REASONING_TAGS = (
"REASONING_SCRATCHPAD",
"think",
"reasoning",
"THINKING",
"thinking",
)
def _strip_reasoning_tags(text: str) -> str:
cleaned = text
for tag in _REASONING_TAGS:
cleaned = re.sub(rf"<{tag}>.*?</{tag}>\s*", "", cleaned, flags=re.DOTALL)
cleaned = re.sub(rf"<{tag}>.*$", "", cleaned, flags=re.DOTALL)
return cleaned.strip()
def _assistant_content_as_text(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
parts = [
str(part.get("text", ""))
for part in content
if isinstance(part, dict) and part.get("type") == "text"
]
return "\n".join(p for p in parts if p)
return str(content)
def _assistant_copy_text(content: Any) -> str:
return _strip_reasoning_tags(_assistant_content_as_text(content))
# =============================================================================
# Configuration Loading
# =============================================================================
@ -401,14 +439,27 @@ def load_cli_config() -> Dict[str, Any]:
# filesystem is directly accessible. For ALL remote/container backends
# (ssh, docker, modal, singularity), the host path doesn't exist on the
# target -- remove the key so terminal_tool.py uses its per-backend default.
if terminal_config.get("cwd") in (".", "auto", "cwd"):
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
#
# GUARD: If TERMINAL_CWD is already set to a real absolute path (by the
# gateway's config bridge earlier in the process), don't clobber it.
# This prevents a lazy import of cli.py during gateway runtime from
# rewriting TERMINAL_CWD to the service's working directory.
# See issue #10817.
_CWD_PLACEHOLDERS = (".", "auto", "cwd")
if terminal_config.get("cwd") in _CWD_PLACEHOLDERS:
_existing_cwd = os.environ.get("TERMINAL_CWD", "")
if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd):
# Gateway (or earlier startup) already resolved a real path — keep it
terminal_config["cwd"] = _existing_cwd
defaults["terminal"]["cwd"] = _existing_cwd
else:
# Remove so TERMINAL_CWD stays unset → tool picks backend default
terminal_config.pop("cwd", None)
effective_backend = terminal_config.get("env_type", "local")
if effective_backend == "local":
terminal_config["cwd"] = os.getcwd()
defaults["terminal"]["cwd"] = terminal_config["cwd"]
else:
# Remove so TERMINAL_CWD stays unset → tool picks backend default
terminal_config.pop("cwd", None)
env_mappings = {
"env_type": "TERMINAL_ENV",
@ -1159,6 +1210,10 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
return None
expanded = os.path.expandvars(os.path.expanduser(token))
if os.name != "nt":
normalized = expanded.replace("\\", "/")
if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
expanded = f"/mnt/{normalized[0].lower()}/{normalized[3:]}"
path = Path(expanded)
if not path.is_absolute():
base_dir = Path(os.getenv("TERMINAL_CWD", os.getcwd()))
@ -1241,10 +1296,12 @@ def _detect_file_drop(user_input: str) -> "dict | None":
or stripped.startswith("~")
or stripped.startswith("./")
or stripped.startswith("../")
or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
or stripped.startswith('"/')
or stripped.startswith('"~')
or stripped.startswith("'/")
or stripped.startswith("'~")
or (len(stripped) >= 4 and stripped[0] in ("'", '"') and stripped[2] == ":" and stripped[3] in ("\\", "/") and stripped[1].isalpha())
)
if not starts_like_path:
return None
@ -2013,7 +2070,17 @@ class HermesCLI:
"""Return the visible height for the spinner/status text line above the status bar."""
if not getattr(self, "_spinner_text", ""):
return 0
return 0 if self._use_minimal_tui_chrome(width=width) else 1
if self._use_minimal_tui_chrome(width=width):
return 0
# Compute how many lines the spinner text needs when wrapped.
# The rendered text is " {emoji} {label} ({elapsed})" — about
# len(_spinner_text) + 16 chars for indent + timer suffix.
width = width or self._get_tui_terminal_width()
if width and width > 10:
import math
text_len = len(self._spinner_text) + 16 # indent + timer
return max(1, math.ceil(text_len / width))
return 1
def _get_voice_status_fragments(self, width: Optional[int] = None):
"""Return the voice status bar fragments for the interactive TUI."""
@ -3102,21 +3169,6 @@ class HermesCLI:
MAX_ASST_LEN = 200 # truncate assistant text
MAX_ASST_LINES = 3 # max lines of assistant text
def _strip_reasoning(text: str) -> str:
"""Remove <REASONING_SCRATCHPAD>...</REASONING_SCRATCHPAD> blocks
from displayed text (reasoning model internal thoughts)."""
import re
cleaned = re.sub(
r"<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>\s*",
"", text, flags=re.DOTALL,
)
# Also strip unclosed reasoning tags at the end
cleaned = re.sub(
r"<REASONING_SCRATCHPAD>.*$",
"", cleaned, flags=re.DOTALL,
)
return cleaned.strip()
# Collect displayable entries (skip system, tool-result messages)
entries = [] # list of (role, display_text)
_last_asst_idx = None # index of last assistant entry
@ -3148,7 +3200,7 @@ class HermesCLI:
elif role == "assistant":
text = "" if content is None else str(content)
text = _strip_reasoning(text)
text = _strip_reasoning_tags(text)
parts = []
full_parts = [] # un-truncated version
if text:
@ -3487,6 +3539,26 @@ class HermesCLI:
killed = process_registry.kill_all()
print(f" ✅ Stopped {killed} process(es).")
def _handle_agents_command(self):
"""Handle /agents — show background processes and agent status."""
from tools.process_registry import format_uptime_short, process_registry
processes = process_registry.list_sessions()
running = [p for p in processes if p.get("status") == "running"]
finished = [p for p in processes if p.get("status") != "running"]
_cprint(f" Running processes: {len(running)}")
for p in running:
cmd = p.get("command", "")[:80]
up = format_uptime_short(p.get("uptime_seconds", 0))
_cprint(f" {p.get('session_id', '?')} · {up} · {cmd}")
if finished:
_cprint(f" Recently finished: {len(finished)}")
agent_running = getattr(self, "_agent_running", False)
_cprint(f" Agent: {'running' if agent_running else 'idle'}")
def _handle_paste_command(self):
"""Handle /paste — explicitly check clipboard for an image.
@ -3512,6 +3584,61 @@ class HermesCLI:
else:
_cprint(f" {_DIM}(._.) No image found in clipboard{_RST}")
def _write_osc52_clipboard(self, text: str) -> None:
"""Copy *text* to terminal clipboard via OSC 52."""
payload = base64.b64encode(text.encode("utf-8")).decode("ascii")
seq = f"\x1b]52;c;{payload}\x07"
out = getattr(self, "_app", None)
output = getattr(out, "output", None) if out else None
if output and hasattr(output, "write_raw"):
output.write_raw(seq)
output.flush()
return
if output and hasattr(output, "write"):
output.write(seq)
output.flush()
return
sys.stdout.write(seq)
sys.stdout.flush()
def _handle_copy_command(self, cmd_original: str) -> None:
"""Handle /copy [number] — copy assistant output to clipboard."""
parts = cmd_original.split(maxsplit=1)
arg = parts[1].strip() if len(parts) > 1 else ""
assistant = [m for m in self.conversation_history if m.get("role") == "assistant"]
if not assistant:
_cprint(" Nothing to copy yet.")
return
if arg:
try:
idx = int(arg) - 1
except ValueError:
_cprint(" Usage: /copy [number]")
return
if idx < 0 or idx >= len(assistant):
_cprint(f" Invalid response number. Use 1-{len(assistant)}.")
return
else:
idx = len(assistant) - 1
while idx >= 0 and not _assistant_copy_text(assistant[idx].get("content")):
idx -= 1
if idx < 0:
_cprint(" Nothing to copy in assistant responses yet.")
return
text = _assistant_copy_text(assistant[idx].get("content"))
if not text:
_cprint(" Nothing to copy in that assistant response.")
return
try:
self._write_osc52_clipboard(text)
_cprint(f" Copied assistant response #{idx + 1} to clipboard")
except Exception as e:
_cprint(f" Clipboard copy failed: {e}")
def _handle_image_command(self, cmd_original: str):
"""Handle /image <path> — attach a local image file for the next prompt."""
raw_args = (cmd_original.split(None, 1)[1].strip() if " " in cmd_original else "")
@ -3648,7 +3775,7 @@ class HermesCLI:
skin = get_active_skin()
separator_color = skin.get_color("banner_dim", "#B8860B")
accent_color = skin.get_color("ui_accent", "#FFBF00")
label_color = skin.get_color("ui_label", "#4dd0e1")
label_color = skin.get_color("ui_label", "#DAA520")
except Exception:
separator_color, accent_color, label_color = "#B8860B", "#FFBF00", "cyan"
toolsets_info = ""
@ -3897,23 +4024,14 @@ class HermesCLI:
def _handle_profile_command(self):
"""Display active profile name and home directory."""
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_constants import display_hermes_home
from hermes_cli.profiles import get_active_profile_name
home = get_hermes_home()
display = display_hermes_home()
profiles_parent = Path.home() / ".hermes" / "profiles"
try:
rel = home.relative_to(profiles_parent)
profile_name = str(rel).split("/")[0]
except ValueError:
profile_name = None
profile_name = get_active_profile_name()
print()
if profile_name:
print(f" Profile: {profile_name}")
else:
print(" Profile: default")
print(f" Profile: {profile_name}")
print(f" Home: {display}")
print()
@ -4094,6 +4212,8 @@ class HermesCLI:
self.agent.flush_memories(self.conversation_history)
except (Exception, KeyboardInterrupt):
pass
# Trigger memory extraction on the old session before session_id rotates.
self.agent.commit_memory_session(self.conversation_history)
self._notify_session_boundary("on_session_finalize")
elif self.agent:
# First session or empty history — still finalize the old session
@ -4492,6 +4612,34 @@ class HermesCLI:
self._restore_modal_input_snapshot()
self._invalidate(min_interval=0.0)
@staticmethod
def _compute_model_picker_viewport(
selected: int,
scroll_offset: int,
n: int,
term_rows: int,
reserved_below: int = 6,
panel_chrome: int = 6,
min_visible: int = 3,
) -> tuple[int, int]:
"""Resolve (scroll_offset, visible) for the /model picker viewport.
``reserved_below`` matches the approval / clarify panels input area,
status bar, and separators below the panel. ``panel_chrome`` covers
this panel's own borders + blanks + hint row. The remaining rows hold
the scrollable list, with the offset slid to keep ``selected`` on screen.
"""
max_visible = max(min_visible, term_rows - reserved_below - panel_chrome)
if n <= max_visible:
return 0, n
visible = max_visible
if selected < scroll_offset:
scroll_offset = selected
elif selected >= scroll_offset + visible:
scroll_offset = selected - visible + 1
scroll_offset = max(0, min(scroll_offset, n - visible))
return scroll_offset, visible
def _apply_model_switch_result(self, result, persist_global: bool) -> None:
if not result.success:
_cprint(f"{result.error_message}")
@ -4582,16 +4730,19 @@ class HermesCLI:
self._close_model_picker()
return
provider_data = providers[selected]
model_list = []
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
# Use the curated model list from list_authenticated_providers()
# (same lists as `hermes model` and gateway pickers).
# Only fall back to the live provider catalog when the curated
# list is empty (e.g. user-defined endpoints with no curated list).
model_list = provider_data.get("models", [])
if not model_list:
model_list = provider_data.get("models", [])
try:
from hermes_cli.models import provider_model_ids
live = provider_model_ids(provider_data["slug"])
if live:
model_list = live
except Exception:
pass
state["stage"] = "model"
state["provider_data"] = provider_data
state["model_list"] = model_list
@ -4899,6 +5050,52 @@ class HermesCLI:
return "\n".join(p for p in parts if p)
return str(value)
def _handle_gquota_command(self, cmd_original: str) -> None:
"""Show Google Gemini Code Assist quota usage for the current OAuth account."""
try:
from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials
from agent.google_code_assist import retrieve_user_quota, CodeAssistError
except ImportError as exc:
self.console.print(f" [red]Gemini modules unavailable: {exc}[/]")
return
try:
access_token = get_valid_access_token()
except GoogleOAuthError as exc:
self.console.print(f" [yellow]{exc}[/]")
self.console.print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.")
return
creds = load_credentials()
project_id = (creds.project_id if creds else "") or ""
try:
buckets = retrieve_user_quota(access_token, project_id=project_id)
except CodeAssistError as exc:
self.console.print(f" [red]Quota lookup failed:[/] {exc}")
return
if not buckets:
self.console.print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]")
return
# Sort for stable display, group by model
buckets.sort(key=lambda b: (b.model_id, b.token_type))
self.console.print()
self.console.print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})")
self.console.print()
for b in buckets:
pct = max(0.0, min(1.0, b.remaining_fraction))
width = 20
filled = int(round(pct * width))
bar = "" * filled + "" * (width - filled)
pct_str = f"{int(pct * 100):3d}%"
header = b.model_id
if b.token_type:
header += f" [{b.token_type}]"
self.console.print(f" {header:40s} {bar} {pct_str}")
self.console.print()
def _handle_personality_command(self, cmd: str):
"""Handle the /personality command to set predefined personalities."""
parts = cmd.split(maxsplit=1)
@ -5408,6 +5605,8 @@ class HermesCLI:
self._handle_model_switch(cmd_original)
elif canonical == "provider":
self._show_model_and_providers()
elif canonical == "gquota":
self._handle_gquota_command(cmd_original)
elif canonical == "personality":
# Use original case (handler lowercases the personality name itself)
@ -5452,6 +5651,8 @@ class HermesCLI:
self._show_usage()
elif canonical == "insights":
self._show_insights(cmd_original)
elif canonical == "copy":
self._handle_copy_command(cmd_original)
elif canonical == "debug":
self._handle_debug_command()
elif canonical == "paste":
@ -5482,7 +5683,8 @@ class HermesCLI:
version = f" v{p['version']}" if p["version"] else ""
tools = f"{p['tools']} tools" if p["tools"] else ""
hooks = f"{p['hooks']} hooks" if p["hooks"] else ""
parts = [x for x in [tools, hooks] if x]
commands = f"{p['commands']} commands" if p.get("commands") else ""
parts = [x for x in [tools, hooks, commands] if x]
detail = f" ({', '.join(parts)})" if parts else ""
error = f"{p['error']}" if p["error"] else ""
print(f" {status} {p['name']}{version}{detail}{error}")
@ -5494,6 +5696,8 @@ class HermesCLI:
self._handle_snapshot_command(cmd_original)
elif canonical == "stop":
self._handle_stop_command()
elif canonical == "agents":
self._handle_agents_command()
elif canonical == "background":
self._handle_background_command(cmd_original)
elif canonical == "btw":
@ -5510,6 +5714,30 @@ class HermesCLI:
_cprint(f" Queued for the next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
else:
_cprint(f" Queued: {payload[:80]}{'...' if len(payload) > 80 else ''}")
elif canonical == "steer":
# Inject a message after the next tool call without interrupting.
# If the agent is actively running, push the text into the agent's
# pending_steer slot — the drain hook in _execute_tool_calls_*
# will append it to the next tool result's content. If no agent
# is running, fall back to queue semantics (same as /queue).
parts = cmd_original.split(None, 1)
payload = parts[1].strip() if len(parts) > 1 else ""
if not payload:
_cprint(" Usage: /steer <prompt>")
elif self._agent_running and self.agent is not None and hasattr(self.agent, "steer"):
try:
accepted = self.agent.steer(payload)
except Exception as exc:
_cprint(f" Steer failed: {exc}")
else:
if accepted:
_cprint(f" ⏩ Steer queued — arrives after the next tool call: {payload[:80]}{'...' if len(payload) > 80 else ''}")
else:
_cprint(" Steer rejected (empty payload).")
else:
# No active run — treat as a normal next-turn message.
self._pending_input.put(payload)
_cprint(f" No agent running; queued as next turn: {payload[:80]}{'...' if len(payload) > 80 else ''}")
elif canonical == "skin":
self._handle_skin_command(cmd_original)
elif canonical == "voice":
@ -5947,7 +6175,7 @@ class HermesCLI:
parts = cmd.strip().split(None, 1)
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
_DEFAULT_CDP = "http://localhost:9222"
_DEFAULT_CDP = "http://127.0.0.1:9222"
current = os.environ.get("BROWSER_CDP_URL", "").strip()
if sub.startswith("connect"):
@ -6194,13 +6422,21 @@ class HermesCLI:
def _toggle_yolo(self):
"""Toggle YOLO mode — skip all dangerous command approval prompts."""
import os
from hermes_cli.colors import Colors as _Colors
current = bool(os.environ.get("HERMES_YOLO_MODE"))
if current:
os.environ.pop("HERMES_YOLO_MODE", None)
self.console.print(" ⚠ YOLO mode [bold red]OFF[/] — dangerous commands will require approval.")
_cprint(
f" ⚠ YOLO mode {_Colors.BOLD}{_Colors.RED}OFF{_Colors.RESET}"
" — dangerous commands will require approval."
)
else:
os.environ["HERMES_YOLO_MODE"] = "1"
self.console.print(" ⚡ YOLO mode [bold green]ON[/] — all commands auto-approved. Use with caution.")
_cprint(
f" ⚡ YOLO mode {_Colors.BOLD}{_Colors.GREEN}ON{_Colors.RESET}"
" — all commands auto-approved. Use with caution."
)
def _handle_reasoning_command(self, cmd: str):
"""Handle /reasoning — manage effort level and display toggle.
@ -6799,8 +7035,7 @@ class HermesCLI:
)
raise RuntimeError(
"Voice mode requires sounddevice and numpy.\n"
"Install with: pip install sounddevice numpy\n"
"Or: pip install hermes-agent[voice]"
f"Install with: {sys.executable} -m pip install sounddevice numpy"
)
if not reqs.get("stt_available", reqs.get("stt_key_set")):
raise RuntimeError(
@ -7076,8 +7311,7 @@ class HermesCLI:
_cprint(f" {_DIM}Then install/update the Termux:API Android app for microphone capture{_RST}")
_cprint(f" {_BOLD}Option 2: pkg install python-numpy portaudio && python -m pip install sounddevice{_RST}")
else:
_cprint(f"\n {_BOLD}Install: pip install {' '.join(reqs['missing_packages'])}{_RST}")
_cprint(f" {_DIM}Or: pip install hermes-agent[voice]{_RST}")
_cprint(f"\n {_BOLD}Install: {sys.executable} -m pip install {' '.join(reqs['missing_packages'])}{_RST}")
return
with self._voice_lock:
@ -7377,7 +7611,15 @@ class HermesCLI:
self._invalidate()
def _get_approval_display_fragments(self):
"""Render the dangerous-command approval panel for the prompt_toolkit UI."""
"""Render the dangerous-command approval panel for the prompt_toolkit UI.
Layout priority: title + command + choices must always render, even if
the terminal is short or the description is long. Description is placed
at the bottom of the panel and gets truncated to fit the remaining row
budget. This prevents HSplit from clipping approve/deny off-screen when
tirith findings produce multi-paragraph descriptions or when the user
runs in a compact terminal pane.
"""
state = self._approval_state
if not state:
return []
@ -7436,22 +7678,89 @@ class HermesCLI:
box_width = _panel_box_width(title, preview_lines)
inner_text_width = max(8, box_width - 2)
# Pre-wrap the mandatory content — command + choices must always render.
cmd_wrapped = _wrap_panel_text(cmd_display, inner_text_width)
# (choice_index, wrapped_line) so we can re-apply selected styling below
choice_wrapped: list[tuple[int, str]] = []
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Budget vertical space so HSplit never clips the command or choices.
# Panel chrome (full layout with separators):
# top border + title + blank_after_title
# + blank_between_cmd_choices + bottom border = 5 rows.
# In tight terminals we collapse to:
# top border + title + bottom border = 3 rows (no blanks).
#
# reserved_below: rows consumed below the approval panel by the
# spinner/tool-progress line, status bar, input area, separators, and
# prompt symbol. Measured at ~6 rows during live PTY approval prompts;
# budget 6 so we don't overestimate the panel's room.
term_rows = shutil.get_terminal_size((100, 24)).lines
chrome_full = 5
chrome_tight = 3
reserved_below = 6
available = max(0, term_rows - reserved_below)
mandatory_full = chrome_full + len(cmd_wrapped) + len(choice_wrapped)
# If the full-chrome panel doesn't fit, drop the separator blanks.
# This keeps the command and every choice on-screen in compact terminals.
use_compact_chrome = mandatory_full > available
chrome_rows = chrome_tight if use_compact_chrome else chrome_full
# If the command itself is too long to leave room for choices (e.g. user
# hit "view" on a multi-hundred-character command), truncate it so the
# approve/deny buttons still render. Keep at least 1 row of command.
max_cmd_rows = max(1, available - chrome_rows - len(choice_wrapped))
if len(cmd_wrapped) > max_cmd_rows:
keep = max(1, max_cmd_rows - 1) if max_cmd_rows > 1 else 1
cmd_wrapped = cmd_wrapped[:keep] + ["… (command truncated — use /logs or /debug for full text)"]
# Allocate any remaining rows to description. The extra -1 in full mode
# accounts for the blank separator between choices and description.
mandatory_no_desc = chrome_rows + len(cmd_wrapped) + len(choice_wrapped)
desc_sep_cost = 0 if use_compact_chrome else 1
available_for_desc = available - mandatory_no_desc - desc_sep_cost
# Even on huge terminals, cap description height so the panel stays compact.
available_for_desc = max(0, min(available_for_desc, 10))
desc_wrapped = _wrap_panel_text(description, inner_text_width) if description else []
if available_for_desc < 1 or not desc_wrapped:
desc_wrapped = []
elif len(desc_wrapped) > available_for_desc:
keep = max(1, available_for_desc - 1)
desc_wrapped = desc_wrapped[:keep] + ["… (description truncated)"]
# Render: title → command → choices → description (description last so
# any remaining overflow clips from the bottom of the least-critical
# content, never from the command or choices). Use compact chrome (no
# blank separators) when the terminal is tight.
lines = []
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
_append_panel_line(lines, 'class:approval-border', 'class:approval-title', title, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in _wrap_panel_text(description, inner_text_width):
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
for wrapped in _wrap_panel_text(cmd_display, inner_text_width):
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in cmd_wrapped:
_append_panel_line(lines, 'class:approval-border', 'class:approval-cmd', wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for i, wrapped in choice_wrapped:
style = 'class:approval-selected' if i == selected else 'class:approval-choice'
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
_append_blank_panel_line(lines, 'class:approval-border', box_width)
_append_panel_line(lines, 'class:approval-border', style, wrapped, box_width)
if desc_wrapped:
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:approval-border', box_width)
for wrapped in desc_wrapped:
_append_panel_line(lines, 'class:approval-border', 'class:approval-desc', wrapped, box_width)
lines.append(('class:approval-border', '' + ('' * box_width) + '\n'))
return lines
@ -7747,7 +8056,33 @@ class HermesCLI:
# Fallback for non-interactive mode (e.g., single-query)
agent_thread.join(0.1)
agent_thread.join() # Ensure agent thread completes
# Wait for the agent thread to finish. After an interrupt the
# agent may take a few seconds to clean up (kill subprocess, persist
# session). Poll instead of a blocking join so the process_loop
# stays responsive — if the user sent another interrupt or the
# agent gets stuck, we can break out instead of freezing forever.
if interrupt_msg is not None:
# Interrupt path: poll briefly, then move on. The agent
# thread is daemon — it dies on process exit regardless.
for _wait_tick in range(50): # 50 * 0.2s = 10s max
agent_thread.join(timeout=0.2)
if not agent_thread.is_alive():
break
# Check if user fired ANOTHER interrupt (Ctrl+C sets
# _should_exit which process_loop checks on next pass).
if getattr(self, '_should_exit', False):
break
if agent_thread.is_alive():
logger.warning(
"Agent thread still alive after interrupt "
"(thread %s). Daemon thread will be cleaned up "
"on exit.",
agent_thread.ident,
)
else:
# Normal completion: agent thread should be done already,
# but guard against edge cases.
agent_thread.join(timeout=30)
# Proactively clean up async clients whose event loop is dead.
# The agent thread may have created AsyncOpenAI clients bound
@ -7927,7 +8262,15 @@ class HermesCLI:
else:
print(f"\n⚡ Sending after interrupt: '{preview}'")
self._pending_input.put(combined)
# If a /steer was left over (agent finished before another tool
# batch could absorb it), deliver it as the next user turn.
_leftover_steer = result.get("pending_steer") if result else None
if _leftover_steer and hasattr(self, '_pending_input'):
preview = _leftover_steer[:60] + ("..." if len(_leftover_steer) > 60 else "")
print(f"\n⏩ Delivering leftover /steer as next turn: '{preview}'")
self._pending_input.put(_leftover_steer)
return response
except Exception as e:
@ -8345,6 +8688,7 @@ class HermesCLI:
# --- /model picker modal ---
if self._model_picker_state:
self._handle_model_picker_selection()
event.app.current_buffer.reset()
event.app.invalidate()
return
@ -8510,6 +8854,13 @@ class HermesCLI:
state["selected"] = min(max_idx, state.get("selected", 0) + 1)
event.app.invalidate()
@kb.add('escape', filter=Condition(lambda: bool(self._model_picker_state)), eager=True)
def model_picker_escape(event):
"""ESC closes the /model picker."""
self._close_model_picker()
event.app.current_buffer.reset()
event.app.invalidate()
# --- History navigation: up/down browse history in normal input mode ---
# The TextArea is multiline, so by default up/down only move the cursor.
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@ -9040,6 +9391,7 @@ class HermesCLI:
spinner_widget = Window(
content=FormattedTextControl(get_spinner_text),
height=get_spinner_height,
wrap_lines=True,
)
spacer = Window(
@ -9076,7 +9428,13 @@ class HermesCLI:
lines.append((border_style, "" + (" " * box_width) + "\n"))
def _get_clarify_display():
"""Build styled text for the clarify question/choices panel."""
"""Build styled text for the clarify question/choices panel.
Layout priority: choices + Other option must always render even if
the question is very long. The question is budgeted to leave enough
rows for the choices and trailing chrome; anything over the budget
is truncated with a marker.
"""
state = cli_ref._clarify_state
if not state:
return []
@ -9097,48 +9455,97 @@ class HermesCLI:
box_width = _panel_box_width("Hermes needs your input", preview_lines)
inner_text_width = max(8, box_width - 2)
# Pre-wrap choices + Other option — these are mandatory.
choice_wrapped: list[tuple[int, str]] = []
if choices:
for i, choice in enumerate(choices):
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Trailing Other row(s)
other_idx = len(choices)
if selected == other_idx and not cli_ref._clarify_freetext:
other_label_mand = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_label_mand = ' Other (type below)'
else:
other_label_mand = ' Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
elif cli_ref._clarify_freetext:
# Freetext-only mode: the guidance line takes the place of choices.
other_wrapped = _wrap_panel_text(
"Type your answer in the prompt below, then press Enter.",
inner_text_width,
)
else:
other_wrapped = []
# Budget the question so mandatory rows always render.
# Chrome layouts:
# full : top border + blank_after_title + blank_after_question
# + blank_before_bottom + bottom border = 5 rows
# tight: top border + bottom border = 2 rows (drop all blanks)
#
# reserved_below matches the approval-panel budget (~6 rows for
# spinner/tool-progress + status + input + separators + prompt).
term_rows = shutil.get_terminal_size((100, 24)).lines
chrome_full = 5
chrome_tight = 2
reserved_below = 6
available = max(0, term_rows - reserved_below)
mandatory_full = chrome_full + len(choice_wrapped) + len(other_wrapped)
use_compact_chrome = mandatory_full > available
chrome_rows = chrome_tight if use_compact_chrome else chrome_full
max_question_rows = max(1, available - chrome_rows - len(choice_wrapped) - len(other_wrapped))
max_question_rows = min(max_question_rows, 12) # soft cap on huge terminals
question_wrapped = _wrap_panel_text(question, inner_text_width)
if len(question_wrapped) > max_question_rows:
keep = max(1, max_question_rows - 1)
question_wrapped = question_wrapped[:keep] + ["… (question truncated)"]
lines = []
# Box top border
lines.append(('class:clarify-border', '╭─ '))
lines.append(('class:clarify-title', 'Hermes needs your input'))
lines.append(('class:clarify-border', ' ' + ('' * max(0, box_width - len("Hermes needs your input") - 3)) + '\n'))
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
# Question text
for wrapped in _wrap_panel_text(question, inner_text_width):
# Question text (bounded)
for wrapped in question_wrapped:
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-question', wrapped, box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if cli_ref._clarify_freetext and not choices:
guidance = "Type your answer in the prompt below, then press Enter."
for wrapped in _wrap_panel_text(guidance, inner_text_width):
for wrapped in other_wrapped:
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-choice', wrapped, box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if choices:
# Multiple-choice mode: show selectable options
for i, choice in enumerate(choices):
for i, wrapped in choice_wrapped:
style = 'class:clarify-selected' if i == selected and not cli_ref._clarify_freetext else 'class:clarify-choice'
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
wrapped_lines = _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" ")
for wrapped in wrapped_lines:
_append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)
_append_panel_line(lines, 'class:clarify-border', style, wrapped, box_width)
# "Other" option (5th line, only shown when choices exist)
# "Other" option (trailing row(s), only shown when choices exist)
other_idx = len(choices)
if selected == other_idx and not cli_ref._clarify_freetext:
other_style = 'class:clarify-selected'
other_label = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_style = 'class:clarify-active-other'
other_label = ' Other (type below)'
else:
other_style = 'class:clarify-choice'
other_label = ' Other (type your answer)'
for wrapped in _wrap_panel_text(other_label, inner_text_width, subsequent_indent=" "):
for wrapped in other_wrapped:
_append_panel_line(lines, 'class:clarify-border', other_style, wrapped, box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
if not use_compact_chrome:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
lines.append(('class:clarify-border', '' + ('' * box_width) + '\n'))
return lines
@ -9255,6 +9662,22 @@ class HermesCLI:
box_width = _panel_box_width(title, [hint] + choices, min_width=46, max_width=84)
inner_text_width = max(8, box_width - 6)
selected = state.get("selected", 0)
# Scrolling viewport: the panel renders into a Window with no max
# height, so without limiting visible items the bottom border and
# any items past the available terminal rows get clipped on long
# provider catalogs (e.g. Ollama Cloud's 36+ models).
try:
from prompt_toolkit.application import get_app
term_rows = get_app().output.get_size().rows
except Exception:
term_rows = shutil.get_terminal_size((100, 24)).lines
scroll_offset, visible = HermesCLI._compute_model_picker_viewport(
selected, state.get("_scroll_offset", 0), len(choices), term_rows,
)
state["_scroll_offset"] = scroll_offset
lines = []
lines.append(('class:clarify-border', '╭─ '))
lines.append(('class:clarify-title', title))
@ -9262,8 +9685,8 @@ class HermesCLI:
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
_append_panel_line(lines, 'class:clarify-border', 'class:clarify-hint', hint, box_width)
_append_blank_panel_line(lines, 'class:clarify-border', box_width)
selected = state.get("selected", 0)
for idx, choice in enumerate(choices):
for idx in range(scroll_offset, scroll_offset + visible):
choice = choices[idx]
style = 'class:clarify-selected' if idx == selected else 'class:clarify-choice'
prefix = ' ' if idx == selected else ' '
for wrapped in _wrap_panel_text(prefix + choice, inner_text_width, subsequent_indent=' '):
@ -9668,8 +10091,36 @@ class HermesCLI:
# Register signal handlers for graceful shutdown on SSH disconnect / SIGTERM
def _signal_handler(signum, frame):
"""Handle SIGHUP/SIGTERM by triggering graceful cleanup."""
"""Handle SIGHUP/SIGTERM by triggering graceful cleanup.
Calls ``self.agent.interrupt()`` first so the agent daemon
thread's poll loop sees the per-thread interrupt and kills the
tool's subprocess group via ``_kill_process`` (os.killpg).
Without this, the main thread dies from KeyboardInterrupt and
the daemon thread is killed with it before it can run one
more poll iteration to clean up the subprocess, which was
spawned with ``os.setsid`` and therefore survives as an orphan
with PPID=1.
Grace window (``HERMES_SIGTERM_GRACE``, default 1.5 s) gives
the daemon time to: detect the interrupt (next 200 ms poll)
call _kill_process (SIGTERM + 1 s wait + SIGKILL if needed)
return from _wait_for_process. ``time.sleep`` releases the
GIL so the daemon actually runs during the window.
"""
logger.debug("Received signal %s, triggering graceful shutdown", signum)
try:
if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
self.agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
try:
@ -9972,6 +10423,45 @@ def main(
# Register cleanup for single-query mode (interactive mode registers in run())
atexit.register(_run_cleanup)
# Also install signal handlers in single-query / `-q` mode. Interactive
# mode registers its own inside HermesCLI.run(), but `-q` runs
# cli.agent.run_conversation() below and AIAgent spawns worker threads
# for tools — so when SIGTERM arrives on the main thread, raising
# KeyboardInterrupt only unwinds the main thread, not the worker
# running _wait_for_process. Python then exits, the child subprocess
# (spawned with os.setsid, its own process group) is reparented to
# init and keeps running as an orphan.
#
# Fix: route SIGTERM/SIGHUP through agent.interrupt() which sets the
# per-thread interrupt flag the worker's poll loop checks every 200 ms.
# Give the worker a grace window to call _kill_process (SIGTERM to the
# process group, then SIGKILL after 1 s), then raise KeyboardInterrupt
# so main unwinds normally. HERMES_SIGTERM_GRACE overrides the 1.5 s
# default for debugging.
def _signal_handler_q(signum, frame):
logger.debug("Received signal %s in single-query mode", signum)
try:
_agent = getattr(cli, "agent", None)
if _agent is not None:
_agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
try:
import signal as _signal
_signal.signal(_signal.SIGTERM, _signal_handler_q)
if hasattr(_signal, "SIGHUP"):
_signal.signal(_signal.SIGHUP, _signal_handler_q)
except Exception:
pass # signal handler may fail in restricted environments
# Handle single query mode
if query or image:
@ -9999,6 +10489,11 @@ def main(
):
cli.agent.quiet_mode = True
cli.agent.suppress_status_output = True
# Suppress streaming display callbacks so stdout stays
# machine-readable (no styled "Hermes" box, no tool-gen
# status lines). The response is printed once below.
cli.agent.stream_delta_callback = None
cli.agent.tool_gen_callback = None
result = cli.agent.run_conversation(
user_message=effective_query,
conversation_history=cli.conversation_history,
@ -10006,7 +10501,8 @@ def main(
response = result.get("final_response", "") if isinstance(result, dict) else str(result)
if response:
print(response)
print(f"\nsession_id: {cli.session_id}")
# Session ID goes to stderr so piped stdout is clean.
print(f"\nsession_id: {cli.session_id}", file=sys.stderr)
# Ensure proper exit code for automation wrappers
sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)

View file

@ -501,6 +501,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
if schedule_changed:
updated_schedule = updated["schedule"]
# The API may pass schedule as a raw string (e.g. "every 10m")
# instead of a pre-parsed dict. Normalize it the same way
# create_job() does so downstream code can call .get() safely.
if isinstance(updated_schedule, str):
updated_schedule = parse_schedule(updated_schedule)
updated["schedule"] = updated_schedule
updated["schedule_display"] = updates.get(
"schedule_display",
updated_schedule.get("display", updated.get("schedule_display")),

View file

@ -10,6 +10,7 @@ runs at a time if multiple processes overlap.
import asyncio
import concurrent.futures
import contextvars
import json
import logging
import os
@ -26,7 +27,7 @@ except ImportError:
except ImportError:
msvcrt = None
from pathlib import Path
from typing import Optional
from typing import List, Optional
# Add parent directory to path for imports BEFORE repo-level imports.
# Without this, standalone invocations (e.g. after `hermes update` reloads
@ -48,6 +49,33 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
"qqbot",
})
# Platforms that support a configured cron/notification home target, mapped to
# the environment variable used by gateway setup/runtime config.
_HOME_TARGET_ENV_VARS = {
"matrix": "MATRIX_HOME_ROOM",
"telegram": "TELEGRAM_HOME_CHANNEL",
"discord": "DISCORD_HOME_CHANNEL",
"slack": "SLACK_HOME_CHANNEL",
"signal": "SIGNAL_HOME_CHANNEL",
"mattermost": "MATTERMOST_HOME_CHANNEL",
"sms": "SMS_HOME_CHANNEL",
"email": "EMAIL_HOME_ADDRESS",
"dingtalk": "DINGTALK_HOME_CHANNEL",
"feishu": "FEISHU_HOME_CHANNEL",
"wecom": "WECOM_HOME_CHANNEL",
"weixin": "WEIXIN_HOME_CHANNEL",
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
"qqbot": "QQBOT_HOME_CHANNEL",
}
# Legacy env var names kept for back-compat. Each entry is the current
# primary env var → the previous name. _get_home_target_chat_id falls
# back to the legacy name if the primary is unset, so users who set the
# old name before the rename keep working until they migrate.
_LEGACY_HOME_TARGET_ENV_VARS = {
"QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
}
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
# Sentinel: when a cron agent has nothing new to report, it can start its
@ -75,15 +103,28 @@ def _resolve_origin(job: dict) -> Optional[dict]:
return None
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
deliver = job.get("deliver", "local")
def _get_home_target_chat_id(platform_name: str) -> str:
"""Return the configured home target chat/room ID for a delivery platform."""
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
if not env_var:
return ""
value = os.getenv(env_var, "")
if not value:
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
if legacy:
value = os.getenv(legacy, "")
return value
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
"""Resolve one concrete auto-delivery target for a cron job."""
origin = _resolve_origin(job)
if deliver == "local":
if deliver_value == "local":
return None
if deliver == "origin":
if deliver_value == "origin":
if origin:
return {
"platform": origin["platform"],
@ -92,8 +133,8 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
}
# Origin missing (e.g. job created via API/script) — try each
# platform's home channel as a fallback instead of silently dropping.
for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"):
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
for platform_name in _HOME_TARGET_ENV_VARS:
chat_id = _get_home_target_chat_id(platform_name)
if chat_id:
logger.info(
"Job '%s' has deliver=origin but no origin; falling back to %s home channel",
@ -107,8 +148,8 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
}
return None
if ":" in deliver:
platform_name, rest = deliver.split(":", 1)
if ":" in deliver_value:
platform_name, rest = deliver_value.split(":", 1)
platform_key = platform_name.lower()
from tools.send_message_tool import _parse_target_ref
@ -138,7 +179,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
"thread_id": thread_id,
}
platform_name = deliver
platform_name = deliver_value
if origin and origin.get("platform") == platform_name:
return {
"platform": platform_name,
@ -148,7 +189,7 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
if platform_name.lower() not in _KNOWN_DELIVERY_PLATFORMS:
return None
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
chat_id = _get_home_target_chat_id(platform_name)
if not chat_id:
return None
@ -159,6 +200,30 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
}
def _resolve_delivery_targets(job: dict) -> List[dict]:
"""Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
deliver = job.get("deliver", "local")
if deliver == "local":
return []
parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
seen = set()
targets = []
for part in parts:
target = _resolve_single_delivery_target(job, part)
if target:
key = (target["platform"].lower(), str(target["chat_id"]), target.get("thread_id"))
if key not in seen:
seen.add(key)
targets.append(target)
return targets
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
targets = _resolve_delivery_targets(job)
return targets[0] if targets else None
# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
@ -199,7 +264,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
"""
Deliver job output to the configured target (origin chat, specific platform, etc.).
Deliver job output to the configured target(s) (origin chat, specific platform, etc.).
When ``adapters`` and ``loop`` are provided (gateway is running), tries to
use the live adapter first this supports E2EE rooms (e.g. Matrix) where
@ -208,33 +273,14 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
Returns None on success, or an error string on failure.
"""
target = _resolve_delivery_target(job)
if not target:
targets = _resolve_delivery_targets(job)
if not targets:
if job.get("deliver", "local") != "local":
msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
return None # local-only jobs don't deliver — not a failure
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
# Diagnostic: log thread_id for topic-aware delivery debugging
origin = job.get("origin") or {}
origin_thread = origin.get("thread_id")
if origin_thread and not thread_id:
logger.warning(
"Job '%s': origin has thread_id=%s but delivery target lost it "
"(deliver=%s, target=%s)",
job["id"], origin_thread, job.get("deliver", "local"), target,
)
elif thread_id:
logger.debug(
"Job '%s': delivering to %s:%s thread_id=%s",
job["id"], platform_name, chat_id, thread_id,
)
from tools.send_message_tool import _send_to_platform
from gateway.config import load_gateway_config, Platform
@ -257,24 +303,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
"bluebubbles": Platform.BLUEBUBBLES,
"qqbot": Platform.QQBOT,
}
platform = platform_map.get(platform_name.lower())
if not platform:
msg = f"unknown platform '{platform_name}'"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
try:
config = load_gateway_config()
except Exception as e:
msg = f"failed to load gateway config: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
msg = f"platform '{platform_name}' not configured/enabled"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
# Optionally wrap the content with a header/footer so the user knows this
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
@ -288,11 +316,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
if wrap_response:
task_name = job.get("name", job["id"])
job_id = job.get("id", "")
delivery_content = (
f"Cronjob Response: {task_name}\n"
f"(job_id: {job_id})\n"
f"-------------\n\n"
f"{content}\n\n"
f"Note: The agent cannot see this message, and therefore cannot respond to it."
f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
)
else:
delivery_content = content
@ -301,67 +331,117 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
from gateway.platforms.base import BasePlatformAdapter
media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
# Prefer the live adapter when the gateway is running — this supports E2EE
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
runtime_adapter = (adapters or {}).get(platform)
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
send_metadata = {"thread_id": thread_id} if thread_id else None
try:
# Send cleaned text (MEDIA tags stripped) — not the raw content
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
try:
config = load_gateway_config()
except Exception as e:
msg = f"failed to load gateway config: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
delivery_errors = []
if adapter_ok:
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
return None
except Exception as e:
for target in targets:
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
# Diagnostic: log thread_id for topic-aware delivery debugging
origin = job.get("origin") or {}
origin_thread = origin.get("thread_id")
if origin_thread and not thread_id:
logger.warning(
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, e,
"Job '%s': origin has thread_id=%s but delivery target lost it "
"(deliver=%s, target=%s)",
job["id"], origin_thread, job.get("deliver", "local"), target,
)
elif thread_id:
logger.debug(
"Job '%s': delivering to %s:%s thread_id=%s",
job["id"], platform_name, chat_id, thread_id,
)
# Standalone path: run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
try:
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
except Exception as e:
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
platform = platform_map.get(platform_name.lower())
if not platform:
msg = f"unknown platform '{platform_name}'"
logger.warning("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
if result and result.get("error"):
msg = f"delivery error: {result['error']}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
# Prefer the live adapter when the gateway is running — this supports E2EE
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
runtime_adapter = (adapters or {}).get(platform)
delivered = False
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
send_metadata = {"thread_id": thread_id} if thread_id else None
try:
# Send cleaned text (MEDIA tags stripped) — not the raw content
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
if adapter_ok:
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
delivered = True
except Exception as e:
logger.warning(
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, e,
)
if not delivered:
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
msg = f"platform '{platform_name}' not configured/enabled"
logger.warning("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
# Standalone path: run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
try:
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
except Exception as e:
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
logger.error("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
if result and result.get("error"):
msg = f"delivery error: {result['error']}"
logger.error("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
if delivery_errors:
return "; ".join(delivery_errors)
return None
@ -768,7 +848,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
_POLL_INTERVAL = 5.0
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
_cron_future = _cron_pool.submit(agent.run_conversation, prompt)
# Preserve scheduler-scoped ContextVar state (for example skill-declared
# env passthrough registrations) when the cron run hops into the worker
# thread used for inactivity timeout monitoring.
_cron_context = contextvars.copy_context()
_cron_future = _cron_pool.submit(_cron_context.run, agent.run_conversation, prompt)
_inactivity_timeout = False
try:
if _cron_inactivity_limit is None:
@ -830,6 +914,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
if final_response.strip() == "(No response generated)":
final_response = ""
# Use a separate variable for log display; keep final_response clean
# for delivery logic (empty response = no delivery).
logged_response = final_response if final_response else "(No response generated)"
@ -969,6 +1056,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
delivery_error = str(de)
logger.error("Delivery failed for job %s: %s", job["id"], de)
# Treat empty final_response as a soft failure so last_status
# is not "ok" — the agent ran but produced nothing useful.
# (issue #8585)
if success and not final_response:
success = False
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
executed += 1

19
docker/entrypoint.sh Normal file → Executable file
View file

@ -1,13 +1,14 @@
#!/bin/bash
# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes.
# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes.
set -e
HERMES_HOME="/opt/data"
HERMES_HOME="${HERMES_HOME:-/opt/data}"
INSTALL_DIR="/opt/hermes"
# --- Privilege dropping via gosu ---
# When started as root (the default), optionally remap the hermes user/group
# to match host-side ownership, fix volume permissions, then re-exec as hermes.
# When started as root (the default for Docker, or fakeroot in rootless Podman),
# optionally remap the hermes user/group to match host-side ownership, fix volume
# permissions, then re-exec as hermes.
if [ "$(id -u)" = "0" ]; then
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
echo "Changing hermes UID to $HERMES_UID"
@ -16,13 +17,19 @@ if [ "$(id -u)" = "0" ]; then
if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
echo "Changing hermes GID to $HERMES_GID"
groupmod -g "$HERMES_GID" hermes
# -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist
# as "dialout" in the Debian-based container image)
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
fi
actual_hermes_uid=$(id -u hermes)
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
chown -R hermes:hermes "$HERMES_HOME"
# In rootless Podman the container's "root" is mapped to an unprivileged
# host UID — chown will fail. That's fine: the volume is already owned
# by the mapped user on the host side.
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
echo "Warning: chown failed (rootless container?) — continuing anyway"
fi
echo "Dropping root privileges"

View file

@ -0,0 +1,108 @@
# Ink Gateway TUI Migration — Post-mortem
Planned: 2026-04-01 · Delivered: 2026-04 · Status: shipped, classic (prompt_toolkit) CLI still present
## What Shipped
Three layers, same repo, Python runtime unchanged.
```
ui-tui (Node/TS) ──stdio JSON-RPC──▶ tui_gateway (Py) ──▶ AIAgent (run_agent.py)
```
### Backend — `tui_gateway/`
```
tui_gateway/
├── entry.py # subprocess entrypoint, stdio read/write loop
├── server.py # everything: sessions dict, @method handlers, _emit
├── render.py # stream renderer, diff rendering, message rendering
├── slash_worker.py # subprocess that runs hermes_cli slash commands
└── __init__.py
```
`server.py` owns the full runtime-control surface: session store (`_sessions: dict[str, dict]`), method registry (`@method("…")` decorator), event emitter (`_emit`), agent lifecycle (`_make_agent`, `_init_session`, `_wire_callbacks`), approval/sudo/clarify round-trips, and JSON-RPC dispatch.
Protocol methods (`@method(...)` in `server.py`):
- session: `session.{create, resume, list, close, interrupt, usage, history, compress, branch, title, save, undo}`
- prompt: `prompt.{submit, background, btw}`
- tools: `tools.{list, show, configure}`
- slash: `slash.exec`, `command.{dispatch, resolve}`, `commands.catalog`, `complete.{path, slash}`
- approvals: `approval.respond`, `sudo.respond`, `clarify.respond`, `secret.respond`
- config/state: `config.{get, set, show}`, `model.options`, `reload.mcp`
- ops: `shell.exec`, `cli.exec`, `terminal.resize`, `input.detect_drop`, `clipboard.paste`, `paste.collapse`, `image.attach`, `process.stop`
- misc: `agents.list`, `skills.manage`, `plugins.list`, `cron.manage`, `insights.get`, `rollback.{list, diff, restore}`, `browser.manage`
Protocol events (`_emit(…)` → handled in `ui-tui/src/app/createGatewayEventHandler.ts`):
- lifecycle: `gateway.{ready, stderr}`, `session.info`, `skin.changed`
- stream: `message.{start, delta, complete}`, `thinking.delta`, `reasoning.{delta, available}`, `status.update`
- tools: `tool.{start, progress, complete, generating}`, `subagent.{start, thinking, tool, progress, complete}`
- interactive: `approval.request`, `sudo.request`, `clarify.request`, `secret.request`
- async: `background.complete`, `btw.complete`, `error`
### Frontend — `ui-tui/src/`
```
src/
├── entry.tsx # node bootstrap: bootBanner → spawn python → dynamic-import Ink → render(<App/>)
├── app.tsx # <GatewayProvider> wraps <AppLayout>
├── bootBanner.ts # raw-ANSI banner to stdout in ~2ms, pre-React
├── gatewayClient.ts # JSON-RPC client over child_process stdio
├── gatewayTypes.ts # typed RPC responses + GatewayEvent union
├── theme.ts # DEFAULT_THEME + fromSkin
├── app/ # hooks + stores — the orchestration layer
│ ├── uiStore.ts # nanostore: sid, info, busy, usage, theme, status…
│ ├── turnStore.ts # nanostore: per-turn activity / reasoning / tools
│ ├── turnController.ts # imperative singleton for stream-time operations
│ ├── overlayStore.ts # nanostore: modal/overlay state
│ ├── useMainApp.ts # top-level composition hook
│ ├── useSessionLifecycle.ts # session.create/resume/close/reset
│ ├── useSubmission.ts # shell/slash/prompt dispatch + interpolation
│ ├── useConfigSync.ts # config.get + mtime poll
│ ├── useComposerState.ts # input buffer, paste snippets, editor mode
│ ├── useInputHandlers.ts # key bindings
│ ├── createGatewayEventHandler.ts # event-stream dispatcher
│ ├── createSlashHandler.ts # slash command router (registry + python fallback)
│ └── slash/commands/ # core.ts, ops.ts, session.ts — TS-owned slash commands
├── components/ # AppLayout, AppChrome, AppOverlays, MessageLine, Thinking, Markdown, pickers, prompts, Banner, SessionPanel
├── config/ # env, limits, timing constants
├── content/ # charms, faces, fortunes, hotkeys, placeholders, verbs
├── domain/ # details, messages, paths, roles, slash, usage, viewport
├── protocol/ # interpolation, paste regex
├── hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
└── lib/ # history, messages, osc52, rpc, text
```
### CLI entry points — `hermes_cli/main.py`
- `hermes --tui``node dist/entry.js` (auto-builds when `.ts`/`.tsx` newer than `dist/entry.js`)
- `hermes --tui --dev``tsx src/entry.tsx` (skip build)
- `HERMES_TUI_DIR=…` → external prebuilt dist (nix, distro packaging)
## Diverged From Original Plan
| Plan | Reality | Why |
|---|---|---|
| `tui_gateway/{controller,session_state,events,protocol}.py` | all collapsed into `server.py` | no second consumer ever emerged, keeping one file cheaper than four |
| `ui-tui/src/main.tsx` | split into `entry.tsx` (bootstrap) + `app.tsx` (shell) | boot banner + early python spawn wanted a pre-React moment |
| `ui-tui/src/state/store.ts` | three nanostores (`uiStore`, `turnStore`, `overlayStore`) | separate lifetimes: ui persists, turn resets per reply, overlay is modal |
| `approval.requested` / `sudo.requested` / `clarify.requested` | `*.request` (no `-ed`) | cosmetic |
| `session.cancel` | dropped | `session.interrupt` covers it |
| `HERMES_EXPERIMENTAL_TUI=1`, `display.experimental_tui: true`, `/tui on/off/status` | none shipped | `--tui` went from opt-in to first-class without an experimental phase |
## Post-migration Additions (not in original plan)
- **Async `session.create`** — returns sid in ~1ms, agent builds on a background thread, `session.info` broadcasts when ready; `_wait_agent()` gates every agent-touching handler via `_sess`
- **`bootBanner`** — raw-ANSI logo painted to stdout at T≈2ms, before Ink loads; `<AlternateScreen>` wipes it seamlessly when React mounts
- **Selection uniform bg**`theme.color.selectionBg` wired via `useSelection().setSelectionBgColor`; replaces SGR-inverse per-cell swap that fragmented over amber/gold fg
- **Slash command registry** — TS-owned commands in `app/slash/commands/{core,ops,session}.ts`, everything else falls through to `slash.exec` (python worker)
- **Turn store + controller split** — imperative singleton (`turnController`) holds refs/timers, nanostore (`turnStore`) holds render-visible state
## What's Still Open
- **Classic CLI not deleted.** `cli.py` still has ~80 `prompt_toolkit` references; classic REPL is still the default when `--tui` is absent. The original plan's "Cut 4 · prompt_toolkit removal later" hasn't happened.
- **No config-file opt-in.** `HERMES_EXPERIMENTAL_TUI` and `display.experimental_tui` were never built; only the CLI flag exists. Fine for now — if we want "default to TUI", a single line in `main.py` flips it.

View file

@ -6,6 +6,11 @@
# All fields are optional — missing values inherit from the default skin.
# Activate with: /skin <name> or display.skin: <name> in config.yaml
#
# Keys are marked:
# (both) — applies to both the classic CLI and the TUI
# (classic) — classic CLI only (see hermes --tui in user-guide/tui.md)
# (tui) — TUI only
#
# See hermes_cli/skin_engine.py for the full schema reference.
# ============================================================================
@ -14,43 +19,47 @@ name: example
description: An example custom skin — copy and modify this template
# ── Colors ──────────────────────────────────────────────────────────────────
# Hex color values for Rich markup. These control the CLI's visual palette.
# Hex color values. These control the visual palette.
colors:
# Banner panel (the startup welcome box)
# Banner panel (the startup welcome box) — (both)
banner_border: "#CD7F32" # Panel border
banner_title: "#FFD700" # Panel title text
banner_accent: "#FFBF00" # Section headers (Available Tools, Skills, etc.)
banner_dim: "#B8860B" # Dim/muted text (separators, model info)
banner_text: "#FFF8DC" # Body text (tool names, skill names)
# UI elements
ui_accent: "#FFBF00" # General accent color
# UI elements — (both)
ui_accent: "#FFBF00" # General accent (falls back to banner_accent)
ui_label: "#4dd0e1" # Labels
ui_ok: "#4caf50" # Success indicators
ui_error: "#ef5350" # Error indicators
ui_warn: "#ffa726" # Warning indicators
# Input area
prompt: "#FFF8DC" # Prompt text color
input_rule: "#CD7F32" # Horizontal rule around input
prompt: "#FFF8DC" # Prompt text / `` glyph color (both)
input_rule: "#CD7F32" # Horizontal rule above input (classic)
# Response box
response_border: "#FFD700" # Response box border (ANSI color)
# Response box — (classic)
response_border: "#FFD700" # Response box border
# Session display
session_label: "#DAA520" # Session label
session_border: "#8B8682" # Session ID dim color
# Session display — (both)
session_label: "#DAA520" # "Session: " label
session_border: "#8B8682" # Session ID text
# TUI surfaces
status_bar_bg: "#1a1a2e" # Status / usage bar background
voice_status_bg: "#1a1a2e" # Voice-mode badge background
completion_menu_bg: "#1a1a2e" # Completion list background
completion_menu_current_bg: "#333355" # Active completion row background
completion_menu_meta_bg: "#1a1a2e" # Completion meta column background
completion_menu_meta_current_bg: "#333355" # Active completion meta background
# TUI / CLI surfaces — (classic: status bar, voice badge, completion meta)
status_bar_bg: "#1a1a2e" # Status / usage bar background (classic)
voice_status_bg: "#1a1a2e" # Voice-mode badge background (classic)
completion_menu_bg: "#1a1a2e" # Completion list background (both)
completion_menu_current_bg: "#333355" # Active completion row background (both)
completion_menu_meta_bg: "#1a1a2e" # Completion meta column bg (classic)
completion_menu_meta_current_bg: "#333355" # Active meta bg (classic)
# Drag-to-select background — (tui)
selection_bg: "#3a3a55" # Uniform selection highlight in the TUI
# ── Spinner ─────────────────────────────────────────────────────────────────
# Customize the animated spinner shown during API calls and tool execution.
# (classic) — the TUI uses its own animated indicators; spinner config here
# is only read by the classic prompt_toolkit CLI.
spinner:
# Faces shown while waiting for the API response
waiting_faces:
@ -78,17 +87,17 @@ spinner:
# - ["⟪▲", "▲⟫"]
# ── Branding ────────────────────────────────────────────────────────────────
# Text strings used throughout the CLI interface.
# Text strings used throughout the interface.
branding:
agent_name: "Hermes Agent" # Banner title, about display
welcome: "Welcome! Type your message or /help for commands."
goodbye: "Goodbye! ⚕" # Exit message
response_label: " ⚕ Hermes " # Response box header label
prompt_symbol: " " # Input prompt symbol
help_header: "(^_^)? Available Commands" # /help header text
agent_name: "Hermes Agent" # (both) Banner title, about display
welcome: "Welcome! Type your message or /help for commands." # (both)
goodbye: "Goodbye! ⚕" # (both) Exit message
response_label: " ⚕ Hermes " # (classic) Response box header label
prompt_symbol: " " # (both) Input prompt glyph
help_header: "(^_^)? Available Commands" # (both) /help overlay title
# ── Tool Output ─────────────────────────────────────────────────────────────
# Character used as the prefix for tool output lines.
# Character used as the prefix for tool output lines. (both)
# Default is "┊" (thin dotted vertical line). Some alternatives:
# "╎" (light triple dash vertical)
# "▏" (left one-eighth block)

21
flake.lock generated
View file

@ -36,6 +36,26 @@
"type": "github"
}
},
"npm-lockfile-fix": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1775903712,
"narHash": "sha256-2GV79U6iVH4gKAPWYrxUReB0S41ty/Y3dBLquU8AlaA=",
"owner": "jeslie0",
"repo": "npm-lockfile-fix",
"rev": "c6093acb0c0548e0f9b8b3d82918823721930fe8",
"type": "github"
},
"original": {
"owner": "jeslie0",
"repo": "npm-lockfile-fix",
"type": "github"
}
},
"pyproject-build-systems": {
"inputs": {
"nixpkgs": [
@ -124,6 +144,7 @@
"inputs": {
"flake-parts": "flake-parts",
"nixpkgs": "nixpkgs",
"npm-lockfile-fix": "npm-lockfile-fix",
"pyproject-build-systems": "pyproject-build-systems",
"pyproject-nix": "pyproject-nix_2",
"uv2nix": "uv2nix_2"

View file

@ -19,11 +19,20 @@
url = "github:pyproject-nix/build-system-pkgs";
inputs.nixpkgs.follows = "nixpkgs";
};
npm-lockfile-fix = {
url = "github:jeslie0/npm-lockfile-fix";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = inputs:
outputs =
inputs:
inputs.flake-parts.lib.mkFlake { inherit inputs; } {
systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ];
systems = [
"x86_64-linux"
"aarch64-linux"
"aarch64-darwin"
];
imports = [
./nix/packages.nix

View file

@ -100,7 +100,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
def _build_discord(adapter) -> List[Dict[str, str]]:
"""Enumerate all text channels the Discord bot can see."""
"""Enumerate all text channels and forum channels the Discord bot can see."""
channels = []
client = getattr(adapter, "_client", None)
if not client:
@ -119,6 +119,15 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
"guild": guild.name,
"type": "channel",
})
# Forum channels (type 15) — creating a message auto-spawns a thread post.
forums = getattr(guild, "forum_channels", None) or []
for ch in forums:
channels.append({
"id": str(ch.id),
"name": ch.name,
"guild": guild.name,
"type": "forum",
})
# Also include DM-capable users we've interacted with is not
# feasible via guild enumeration; those come from sessions.
@ -191,6 +200,15 @@ def load_directory() -> Dict[str, Any]:
return {"updated_at": None, "platforms": {}}
def lookup_channel_type(platform_name: str, chat_id: str) -> Optional[str]:
"""Return the channel ``type`` string (e.g. ``"channel"``, ``"forum"``) for *chat_id*, or *None* if unknown."""
directory = load_directory()
for ch in directory.get("platforms", {}).get(platform_name, []):
if ch.get("id") == chat_id:
return ch.get("type")
return None
def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
"""
Resolve a human-friendly channel name to a numeric ID.

View file

@ -258,6 +258,13 @@ class GatewayConfig:
# Streaming configuration
streaming: StreamingConfig = field(default_factory=StreamingConfig)
# Session store pruning: drop SessionEntry records older than this many
# days from the in-memory dict and sessions.json. Keeps the store from
# growing unbounded in gateways serving many chats/threads/users over
# months. Pruning is invisible to users — if they resume, they get a
# fresh session exactly as if the reset policy had fired. 0 = disabled.
session_store_max_age_days: int = 90
def get_connected_platforms(self) -> List[Platform]:
"""Return list of platforms that are enabled and configured."""
connected = []
@ -307,6 +314,14 @@ class GatewayConfig:
# QQBot uses extra dict for app credentials
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
connected.append(platform)
# DingTalk uses client_id/client_secret from config.extra or env vars
elif platform == Platform.DINGTALK and (
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
) and (
config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
):
connected.append(platform)
return connected
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@ -357,6 +372,7 @@ class GatewayConfig:
"thread_sessions_per_user": self.thread_sessions_per_user,
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
"streaming": self.streaming.to_dict(),
"session_store_max_age_days": self.session_store_max_age_days,
}
@classmethod
@ -404,6 +420,13 @@ class GatewayConfig:
"pair",
)
try:
session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
if session_store_max_age_days < 0:
session_store_max_age_days = 0
except (TypeError, ValueError):
session_store_max_age_days = 90
return cls(
platforms=platforms,
default_reset_policy=default_policy,
@ -418,6 +441,7 @@ class GatewayConfig:
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
unauthorized_dm_behavior=unauthorized_dm_behavior,
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
session_store_max_age_days=session_store_max_age_days,
)
def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
@ -554,6 +578,12 @@ def load_gateway_config() -> GatewayConfig:
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
if isinstance(channel_prompts, dict):
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
if not bridged:
continue
plat_data = platforms_data.setdefault(plat.value, {})
@ -611,6 +641,20 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(ntc, list):
ntc = ",".join(str(v) for v in ntc)
os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
# allow_mentions: granular control over what the bot can ping.
# Safe defaults (no @everyone/roles) are applied in the adapter;
# these YAML keys only override when set and let users opt back
# into unsafe modes (e.g. roles=true) if they actually want it.
allow_mentions_cfg = discord_cfg.get("allow_mentions")
if isinstance(allow_mentions_cfg, dict):
for yaml_key, env_key in (
("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
("roles", "DISCORD_ALLOW_MENTION_ROLES"),
("users", "DISCORD_ALLOW_MENTION_USERS"),
("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
):
if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
# Telegram settings → env vars (env vars take precedence)
telegram_cfg = yaml_cfg.get("telegram", {})
@ -632,6 +676,18 @@ def load_gateway_config() -> GatewayConfig:
os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads)
if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
if "disable_link_previews" in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[Platform.TELEGRAM.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
if isinstance(whatsapp_cfg, dict):
@ -645,6 +701,24 @@ def load_gateway_config() -> GatewayConfig:
frc = ",".join(str(v) for v in frc)
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
# DingTalk settings → env vars (env vars take precedence)
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
if isinstance(dingtalk_cfg, dict):
if "require_mention" in dingtalk_cfg and not os.getenv("DINGTALK_REQUIRE_MENTION"):
os.environ["DINGTALK_REQUIRE_MENTION"] = str(dingtalk_cfg["require_mention"]).lower()
if "mention_patterns" in dingtalk_cfg and not os.getenv("DINGTALK_MENTION_PATTERNS"):
os.environ["DINGTALK_MENTION_PATTERNS"] = json.dumps(dingtalk_cfg["mention_patterns"])
frc = dingtalk_cfg.get("free_response_chats")
if frc is not None and not os.getenv("DINGTALK_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["DINGTALK_FREE_RESPONSE_CHATS"] = str(frc)
allowed = dingtalk_cfg.get("allowed_users")
if allowed is not None and not os.getenv("DINGTALK_ALLOWED_USERS"):
if isinstance(allowed, list):
allowed = ",".join(str(v) for v in allowed)
os.environ["DINGTALK_ALLOWED_USERS"] = str(allowed)
# Matrix settings → env vars (env vars take precedence)
matrix_cfg = yaml_cfg.get("matrix", {})
if isinstance(matrix_cfg, dict):
@ -988,6 +1062,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if webhook_secret:
config.platforms[Platform.WEBHOOK].extra["secret"] = webhook_secret
# DingTalk
dingtalk_client_id = os.getenv("DINGTALK_CLIENT_ID")
dingtalk_client_secret = os.getenv("DINGTALK_CLIENT_SECRET")
if dingtalk_client_id and dingtalk_client_secret:
if Platform.DINGTALK not in config.platforms:
config.platforms[Platform.DINGTALK] = PlatformConfig()
config.platforms[Platform.DINGTALK].enabled = True
config.platforms[Platform.DINGTALK].extra.update({
"client_id": dingtalk_client_id,
"client_secret": dingtalk_client_secret,
})
dingtalk_home = os.getenv("DINGTALK_HOME_CHANNEL")
if dingtalk_home:
config.platforms[Platform.DINGTALK].home_channel = HomeChannel(
platform=Platform.DINGTALK,
chat_id=dingtalk_home,
name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
)
# Feishu / Lark
feishu_app_id = os.getenv("FEISHU_APP_ID")
feishu_app_secret = os.getenv("FEISHU_APP_SECRET")
@ -1136,12 +1229,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
if qq_group_allowed:
extra["group_allow_from"] = qq_group_allowed
qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME"
if not qq_home:
# Back-compat: accept the pre-rename name and log a one-time warning.
legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
if legacy_home:
qq_home = legacy_home
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
import logging
logging.getLogger(__name__).warning(
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
"in your .env for consistency with the platform key."
)
if qq_home:
config.platforms[Platform.QQBOT].home_channel = HomeChannel(
platform=Platform.QQBOT,
chat_id=qq_home,
name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
)
# Session settings

View file

@ -515,6 +515,8 @@ class APIServerAdapter(BasePlatformAdapter):
session_id: Optional[str] = None,
stream_delta_callback=None,
tool_progress_callback=None,
tool_start_callback=None,
tool_complete_callback=None,
) -> Any:
"""
Create an AIAgent instance using the gateway's runtime config.
@ -553,6 +555,8 @@ class APIServerAdapter(BasePlatformAdapter):
platform="api_server",
stream_delta_callback=stream_delta_callback,
tool_progress_callback=tool_progress_callback,
tool_start_callback=tool_start_callback,
tool_complete_callback=tool_complete_callback,
session_db=self._ensure_session_db(),
fallback_model=fallback_model,
)
@ -898,7 +902,7 @@ class APIServerAdapter(BasePlatformAdapter):
return time.monotonic()
# Stream content chunks as they arrive from the agent
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
while True:
try:
delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@ -965,6 +969,427 @@ class APIServerAdapter(BasePlatformAdapter):
return response
async def _write_sse_responses(
self,
request: "web.Request",
response_id: str,
model: str,
created_at: int,
stream_q,
agent_task,
agent_ref,
conversation_history: List[Dict[str, str]],
user_message: str,
instructions: Optional[str],
conversation: Optional[str],
store: bool,
session_id: str,
) -> "web.StreamResponse":
"""Write an SSE stream for POST /v1/responses (OpenAI Responses API).
Emits spec-compliant event types as the agent runs:
- ``response.created`` initial envelope (status=in_progress)
- ``response.output_text.delta`` / ``response.output_text.done``
streamed assistant text
- ``response.output_item.added`` / ``response.output_item.done``
with ``item.type == "function_call"`` when the agent invokes a
tool (both events fire; the ``done`` event carries the finalized
``arguments`` string)
- ``response.output_item.added`` with
``item.type == "function_call_output"`` tool result with
``{call_id, output, status}``
- ``response.completed`` terminal event carrying the full
response object with all output items + usage (same payload
shape as the non-streaming path for parity)
- ``response.failed`` terminal event on agent error
If the client disconnects mid-stream, ``agent.interrupt()`` is
called so the agent stops issuing upstream LLM calls, then the
asyncio task is cancelled. When ``store=True`` the full response
is persisted to the ResponseStore in a ``finally`` block so GET
/v1/responses/{id} and ``previous_response_id`` chaining work the
same as the batch path.
"""
import queue as _q
sse_headers = {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
}
origin = request.headers.get("Origin", "")
cors = self._cors_headers_for_origin(origin) if origin else None
if cors:
sse_headers.update(cors)
if session_id:
sse_headers["X-Hermes-Session-Id"] = session_id
response = web.StreamResponse(status=200, headers=sse_headers)
await response.prepare(request)
# State accumulated during the stream
final_text_parts: List[str] = []
# Track open function_call items by name so we can emit a matching
# ``done`` event when the tool completes. Order preserved.
pending_tool_calls: List[Dict[str, Any]] = []
# Output items we've emitted so far (used to build the terminal
# response.completed payload). Kept in the order they appeared.
emitted_items: List[Dict[str, Any]] = []
# Monotonic counter for output_index (spec requires it).
output_index = 0
# Monotonic counter for call_id generation if the agent doesn't
# provide one (it doesn't, from tool_progress_callback).
call_counter = 0
# Canonical Responses SSE events include a monotonically increasing
# sequence_number. Add it server-side for every emitted event so
# clients that validate the OpenAI event schema can parse our stream.
sequence_number = 0
# Track the assistant message item id + content index for text
# delta events — the spec ties deltas to a specific item.
message_item_id = f"msg_{uuid.uuid4().hex[:24]}"
message_output_index: Optional[int] = None
message_opened = False
async def _write_event(event_type: str, data: Dict[str, Any]) -> None:
nonlocal sequence_number
if "sequence_number" not in data:
data["sequence_number"] = sequence_number
sequence_number += 1
payload = f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
await response.write(payload.encode())
def _envelope(status: str) -> Dict[str, Any]:
env: Dict[str, Any] = {
"id": response_id,
"object": "response",
"status": status,
"created_at": created_at,
"model": model,
}
return env
final_response_text = ""
agent_error: Optional[str] = None
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
try:
# response.created — initial envelope, status=in_progress
created_env = _envelope("in_progress")
created_env["output"] = []
await _write_event("response.created", {
"type": "response.created",
"response": created_env,
})
last_activity = time.monotonic()
async def _open_message_item() -> None:
"""Emit response.output_item.added for the assistant message
the first time any text delta arrives."""
nonlocal message_opened, message_output_index, output_index
if message_opened:
return
message_opened = True
message_output_index = output_index
output_index += 1
item = {
"id": message_item_id,
"type": "message",
"status": "in_progress",
"role": "assistant",
"content": [],
}
await _write_event("response.output_item.added", {
"type": "response.output_item.added",
"output_index": message_output_index,
"item": item,
})
async def _emit_text_delta(delta_text: str) -> None:
await _open_message_item()
final_text_parts.append(delta_text)
await _write_event("response.output_text.delta", {
"type": "response.output_text.delta",
"item_id": message_item_id,
"output_index": message_output_index,
"content_index": 0,
"delta": delta_text,
"logprobs": [],
})
async def _emit_tool_started(payload: Dict[str, Any]) -> str:
"""Emit response.output_item.added for a function_call.
Returns the call_id so the matching completion event can
reference it. Prefer the real ``tool_call_id`` from the
agent when available; fall back to a generated call id for
safety in tests or older code paths.
"""
nonlocal output_index, call_counter
call_counter += 1
call_id = payload.get("tool_call_id") or f"call_{response_id[5:]}_{call_counter}"
args = payload.get("arguments", {})
if isinstance(args, dict):
arguments_str = json.dumps(args)
else:
arguments_str = str(args)
item = {
"id": f"fc_{uuid.uuid4().hex[:24]}",
"type": "function_call",
"status": "in_progress",
"name": payload.get("name", ""),
"call_id": call_id,
"arguments": arguments_str,
}
idx = output_index
output_index += 1
pending_tool_calls.append({
"call_id": call_id,
"name": payload.get("name", ""),
"arguments": arguments_str,
"item_id": item["id"],
"output_index": idx,
})
emitted_items.append({
"type": "function_call",
"name": payload.get("name", ""),
"arguments": arguments_str,
"call_id": call_id,
})
await _write_event("response.output_item.added", {
"type": "response.output_item.added",
"output_index": idx,
"item": item,
})
return call_id
async def _emit_tool_completed(payload: Dict[str, Any]) -> None:
"""Emit response.output_item.done (function_call) followed
by response.output_item.added (function_call_output)."""
nonlocal output_index
call_id = payload.get("tool_call_id")
result = payload.get("result", "")
pending = None
if call_id:
for i, p in enumerate(pending_tool_calls):
if p["call_id"] == call_id:
pending = pending_tool_calls.pop(i)
break
if pending is None:
# Completion without a matching start — skip to avoid
# emitting orphaned done events.
return
# function_call done
done_item = {
"id": pending["item_id"],
"type": "function_call",
"status": "completed",
"name": pending["name"],
"call_id": pending["call_id"],
"arguments": pending["arguments"],
}
await _write_event("response.output_item.done", {
"type": "response.output_item.done",
"output_index": pending["output_index"],
"item": done_item,
})
# function_call_output added (result)
result_str = result if isinstance(result, str) else json.dumps(result)
output_parts = [{"type": "input_text", "text": result_str}]
output_item = {
"id": f"fco_{uuid.uuid4().hex[:24]}",
"type": "function_call_output",
"call_id": pending["call_id"],
"output": output_parts,
"status": "completed",
}
idx = output_index
output_index += 1
emitted_items.append({
"type": "function_call_output",
"call_id": pending["call_id"],
"output": output_parts,
})
await _write_event("response.output_item.added", {
"type": "response.output_item.added",
"output_index": idx,
"item": output_item,
})
await _write_event("response.output_item.done", {
"type": "response.output_item.done",
"output_index": idx,
"item": output_item,
})
# Main drain loop — thread-safe queue fed by agent callbacks.
async def _dispatch(it) -> None:
"""Route a queue item to the correct SSE emitter.
Plain strings are text deltas. Tagged tuples with
``__tool_started__`` / ``__tool_completed__`` prefixes
are tool lifecycle events.
"""
if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str):
tag, payload = it
if tag == "__tool_started__":
await _emit_tool_started(payload)
elif tag == "__tool_completed__":
await _emit_tool_completed(payload)
# Unknown tags are silently ignored (forward-compat).
elif isinstance(it, str):
await _emit_text_delta(it)
# Other types (non-string, non-tuple) are silently dropped.
loop = asyncio.get_running_loop()
while True:
try:
item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
except _q.Empty:
if agent_task.done():
# Drain remaining
while True:
try:
item = stream_q.get_nowait()
if item is None:
break
await _dispatch(item)
last_activity = time.monotonic()
except _q.Empty:
break
break
if time.monotonic() - last_activity >= CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS:
await response.write(b": keepalive\n\n")
last_activity = time.monotonic()
continue
if item is None: # EOS sentinel
break
await _dispatch(item)
last_activity = time.monotonic()
# Pick up agent result + usage from the completed task
try:
result, agent_usage = await agent_task
usage = agent_usage or usage
# If the agent produced a final_response but no text
# deltas were streamed (e.g. some providers only emit
# the full response at the end), emit a single fallback
# delta so Responses clients still receive a live text part.
agent_final = result.get("final_response", "") if isinstance(result, dict) else ""
if agent_final and not final_text_parts:
await _emit_text_delta(agent_final)
if agent_final and not final_response_text:
final_response_text = agent_final
if isinstance(result, dict) and result.get("error") and not final_response_text:
agent_error = result["error"]
except Exception as e: # noqa: BLE001
logger.error("Error running agent for streaming responses: %s", e, exc_info=True)
agent_error = str(e)
# Close the message item if it was opened
final_response_text = "".join(final_text_parts) or final_response_text
if message_opened:
await _write_event("response.output_text.done", {
"type": "response.output_text.done",
"item_id": message_item_id,
"output_index": message_output_index,
"content_index": 0,
"text": final_response_text,
"logprobs": [],
})
msg_done_item = {
"id": message_item_id,
"type": "message",
"status": "completed",
"role": "assistant",
"content": [
{"type": "output_text", "text": final_response_text}
],
}
await _write_event("response.output_item.done", {
"type": "response.output_item.done",
"output_index": message_output_index,
"item": msg_done_item,
})
# Always append a final message item in the completed
# response envelope so clients that only parse the terminal
# payload still see the assistant text. This mirrors the
# shape produced by _extract_output_items in the batch path.
final_items: List[Dict[str, Any]] = list(emitted_items)
final_items.append({
"type": "message",
"role": "assistant",
"content": [
{"type": "output_text", "text": final_response_text or (agent_error or "")}
],
})
if agent_error:
failed_env = _envelope("failed")
failed_env["output"] = final_items
failed_env["error"] = {"message": agent_error, "type": "server_error"}
failed_env["usage"] = {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
await _write_event("response.failed", {
"type": "response.failed",
"response": failed_env,
})
else:
completed_env = _envelope("completed")
completed_env["output"] = final_items
completed_env["usage"] = {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
await _write_event("response.completed", {
"type": "response.completed",
"response": completed_env,
})
# Persist for future chaining / GET retrieval, mirroring
# the batch path behavior.
if store:
full_history = list(conversation_history)
full_history.append({"role": "user", "content": user_message})
if isinstance(result, dict) and result.get("messages"):
full_history.extend(result["messages"])
else:
full_history.append({"role": "assistant", "content": final_response_text})
self._response_store.put(response_id, {
"response": completed_env,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
if conversation:
self._response_store.set_conversation(conversation, response_id)
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
# Client disconnected — interrupt the agent so it stops
# making upstream LLM calls, then cancel the task.
agent = agent_ref[0] if agent_ref else None
if agent is not None:
try:
agent.interrupt("SSE client disconnected")
except Exception:
pass
if not agent_task.done():
agent_task.cancel()
try:
await agent_task
except (asyncio.CancelledError, Exception):
pass
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
return response
async def _handle_responses(self, request: "web.Request") -> "web.Response":
"""POST /v1/responses — OpenAI Responses API format."""
auth_err = self._check_auth(request)
@ -1035,11 +1460,13 @@ class APIServerAdapter(BasePlatformAdapter):
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
stored_session_id = None
if not conversation_history and previous_response_id:
stored = self._response_store.get(previous_response_id)
if stored is None:
return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
conversation_history = list(stored.get("conversation_history", []))
stored_session_id = stored.get("session_id")
# If no instructions provided, carry forward from previous
if instructions is None:
instructions = stored.get("instructions")
@ -1057,8 +1484,83 @@ class APIServerAdapter(BasePlatformAdapter):
if body.get("truncation") == "auto" and len(conversation_history) > 100:
conversation_history = conversation_history[-100:]
# Run the agent (with Idempotency-Key support)
session_id = str(uuid.uuid4())
# Reuse session from previous_response_id chain so the dashboard
# groups the entire conversation under one session entry.
session_id = stored_session_id or str(uuid.uuid4())
stream = bool(body.get("stream", False))
if stream:
# Streaming branch — emit OpenAI Responses SSE events as the
# agent runs so frontends can render text deltas and tool
# calls in real time. See _write_sse_responses for details.
import queue as _q
_stream_q: _q.Queue = _q.Queue()
def _on_delta(delta):
# None from the agent is a CLI box-close signal, not EOS.
# Forwarding would kill the SSE stream prematurely; the
# SSE writer detects completion via agent_task.done().
if delta is not None:
_stream_q.put(delta)
def _on_tool_progress(event_type, name, preview, args, **kwargs):
"""Queue non-start tool progress events if needed in future.
The structured Responses stream uses ``tool_start_callback``
and ``tool_complete_callback`` for exact call-id correlation,
so progress events are currently ignored here.
"""
return
def _on_tool_start(tool_call_id, function_name, function_args):
"""Queue a started tool for live function_call streaming."""
_stream_q.put(("__tool_started__", {
"tool_call_id": tool_call_id,
"name": function_name,
"arguments": function_args or {},
}))
def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
"""Queue a completed tool result for live function_call_output streaming."""
_stream_q.put(("__tool_completed__", {
"tool_call_id": tool_call_id,
"name": function_name,
"arguments": function_args or {},
"result": function_result,
}))
agent_ref = [None]
agent_task = asyncio.ensure_future(self._run_agent(
user_message=user_message,
conversation_history=conversation_history,
ephemeral_system_prompt=instructions,
session_id=session_id,
stream_delta_callback=_on_delta,
tool_progress_callback=_on_tool_progress,
tool_start_callback=_on_tool_start,
tool_complete_callback=_on_tool_complete,
agent_ref=agent_ref,
))
response_id = f"resp_{uuid.uuid4().hex[:28]}"
model_name = body.get("model", self._model_name)
created_at = int(time.time())
return await self._write_sse_responses(
request=request,
response_id=response_id,
model=model_name,
created_at=created_at,
stream_q=_stream_q,
agent_task=agent_task,
agent_ref=agent_ref,
conversation_history=conversation_history,
user_message=user_message,
instructions=instructions,
conversation=conversation,
store=store,
session_id=session_id,
)
async def _compute_response():
return await self._run_agent(
@ -1133,6 +1635,7 @@ class APIServerAdapter(BasePlatformAdapter):
"response": response_data,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
# Update conversation mapping so the next request with the same
# conversation name automatically chains to this response
@ -1486,6 +1989,8 @@ class APIServerAdapter(BasePlatformAdapter):
session_id: Optional[str] = None,
stream_delta_callback=None,
tool_progress_callback=None,
tool_start_callback=None,
tool_complete_callback=None,
agent_ref: Optional[list] = None,
) -> tuple:
"""
@ -1499,7 +2004,7 @@ class APIServerAdapter(BasePlatformAdapter):
callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
another thread to stop in-progress LLM calls.
"""
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
def _run():
agent = self._create_agent(
@ -1507,6 +2012,8 @@ class APIServerAdapter(BasePlatformAdapter):
session_id=session_id,
stream_delta_callback=stream_delta_callback,
tool_progress_callback=tool_progress_callback,
tool_start_callback=tool_start_callback,
tool_complete_callback=tool_complete_callback,
)
if agent_ref is not None:
agent_ref[0] = agent
@ -1643,10 +2150,12 @@ class APIServerAdapter(BasePlatformAdapter):
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
stored_session_id = None
if not conversation_history and previous_response_id:
stored = self._response_store.get(previous_response_id)
if stored:
conversation_history = list(stored.get("conversation_history", []))
stored_session_id = stored.get("session_id")
if instructions is None:
instructions = stored.get("instructions")
@ -1665,7 +2174,7 @@ class APIServerAdapter(BasePlatformAdapter):
)
conversation_history.append({"role": msg["role"], "content": str(content)})
session_id = body.get("session_id") or run_id
session_id = body.get("session_id") or stored_session_id or run_id
ephemeral_system_prompt = instructions
async def _run_and_close():

View file

@ -669,6 +669,15 @@ class MessageEvent:
# Original platform data
raw_message: Any = None
message_id: Optional[str] = None
# Platform-specific update identifier. For Telegram this is the
# ``update_id`` from the PTB Update wrapper; other platforms currently
# ignore it. Used by ``/restart`` to record the triggering update so the
# new gateway can advance the Telegram offset past it and avoid processing
# the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
# ("Error while calling `get_updates` one more time to mark all fetched
# updates" in gateway.log).
platform_update_id: Optional[int] = None
# Media attachments
# media_urls: local file paths (for vision tool access)
@ -682,6 +691,10 @@ class MessageEvent:
# Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
# Discord channel_skill_bindings). A single name or ordered list.
auto_skill: Optional[str | list[str]] = None
# Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
# Applied at API call time and never persisted to transcript history.
channel_prompt: Optional[str] = None
# Internal flag — set for synthetic events (e.g. background process
# completion notifications) that must bypass user authorization checks.
@ -730,25 +743,56 @@ def merge_pending_message_event(
pending_messages: Dict[str, MessageEvent],
session_key: str,
event: MessageEvent,
*,
merge_text: bool = False,
) -> None:
"""Store or merge a pending event for a session.
Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
events. Merge those into the existing queued event so the next turn sees
the whole burst, while non-photo follow-ups still replace the pending
event normally.
the whole burst.
When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
instead of replacing the pending turn. This is used for Telegram bursty
follow-ups so a multi-part user thought is not silently truncated to only
the last queued fragment.
"""
existing = pending_messages.get(session_key)
if (
existing
and getattr(existing, "message_type", None) == MessageType.PHOTO
and event.message_type == MessageType.PHOTO
):
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
if existing:
existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
incoming_is_photo = event.message_type == MessageType.PHOTO
existing_has_media = bool(existing.media_urls)
incoming_has_media = bool(event.media_urls)
if existing_is_photo and incoming_is_photo:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
if existing_has_media or incoming_has_media:
if incoming_has_media:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if existing.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
else:
existing.text = event.text
if existing_is_photo or incoming_is_photo:
existing.message_type = MessageType.PHOTO
return
if (
merge_text
and getattr(existing, "message_type", None) == MessageType.TEXT
and event.message_type == MessageType.TEXT
):
if event.text:
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
return
pending_messages[session_key] = event
@ -776,6 +820,36 @@ _RETRYABLE_ERROR_PATTERNS = (
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
def resolve_channel_prompt(
config_extra: dict,
channel_id: str,
parent_id: str | None = None,
) -> str | None:
"""Resolve a per-channel ephemeral prompt from platform config.
Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
Prefers an exact match on *channel_id*; falls back to *parent_id*
(useful for forum threads / child channels inheriting a parent prompt).
Returns the prompt string, or None if no match is found. Blank/whitespace-
only prompts are treated as absent.
"""
prompts = config_extra.get("channel_prompts") or {}
if not isinstance(prompts, dict):
return None
for key in (channel_id, parent_id):
if not key:
continue
prompt = prompts.get(key)
if prompt is None:
continue
prompt = str(prompt).strip()
if prompt:
return prompt
return None
class BasePlatformAdapter(ABC):
"""
Base class for platform adapters.
@ -805,6 +879,11 @@ class BasePlatformAdapter(ABC):
# Gateway shutdown cancels these so an old gateway instance doesn't keep
# working on a task after --replace or manual restarts.
self._background_tasks: set[asyncio.Task] = set()
# One-shot callbacks to fire after the main response is delivered.
# Keyed by session_key. GatewayRunner uses this to defer
# background-review notifications ("💾 Skill created") until the
# primary reply has been sent.
self._post_delivery_callbacks: Dict[str, Callable] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Chats where auto-TTS on voice input is disabled (set by /voice off)
@ -975,16 +1054,40 @@ class BasePlatformAdapter(ABC):
"""
pass
# Default: the adapter treats ``finalize=True`` on edit_message as a
# no-op and is happy to have the stream consumer skip redundant final
# edits. Subclasses that *require* an explicit finalize call to close
# out the message lifecycle (e.g. rich card / AI assistant surfaces
# such as DingTalk AI Cards) override this to True (class attribute or
# property) so the stream consumer knows not to short-circuit.
REQUIRES_EDIT_FINALIZE: bool = False
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""
Edit a previously sent message. Optional platforms that don't
support editing return success=False and callers fall back to
sending a new message.
``finalize`` signals that this is the last edit in a streaming
sequence. Most platforms (Telegram, Slack, Discord, Matrix,
etc.) treat it as a no-op because their edit APIs have no notion
of message lifecycle state an edit is an edit. Platforms that
render streaming updates with a distinct "in progress" state and
require explicit closure (e.g. rich card / AI assistant surfaces
such as DingTalk AI Cards) use it to finalize the message and
transition the UI out of the streaming indicator those should
also set ``REQUIRES_EDIT_FINALIZE = True`` so callers route a
final edit through even when content is unchanged. Callers
should set ``finalize=True`` on the final edit of a streamed
response (typically when ``got_done`` fires in the stream
consumer) and leave it ``False`` on intermediate edits.
"""
return SendResult(success=False, error="Not supported")
@ -1221,7 +1324,7 @@ class BasePlatformAdapter(ABC):
path = path[1:-1].strip()
path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
if path:
media.append((path, has_voice_tag))
media.append((os.path.expanduser(path), has_voice_tag))
# Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
if media:
@ -1509,7 +1612,9 @@ class BasePlatformAdapter(ABC):
# session lifecycle and its cleanup races with the running task
# (see PR #4926).
cmd = event.get_command()
if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"):
from hermes_cli.commands import should_bypass_active_session
if should_bypass_active_session(cmd):
logger.debug(
"[%s] Command '/%s' bypassing active-session guard for %s",
self.name, cmd, session_key,
@ -1624,6 +1729,21 @@ class BasePlatformAdapter(ABC):
# streaming already delivered the text (already_sent=True) or
# when the message was queued behind an active agent. Log at
# DEBUG to avoid noisy warnings for expected behavior.
#
# Suppress stale response when the session was interrupted by a
# new message that hasn't been consumed yet. The pending message
# is processed by the pending-message handler below (#8221/#2483).
if (
response
and interrupt_event.is_set()
and session_key in self._pending_messages
):
logger.info(
"[%s] Suppressing stale response for interrupted session %s",
self.name,
session_key,
)
response = None
if not response:
logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
if response:
@ -1845,6 +1965,14 @@ class BasePlatformAdapter(ABC):
except Exception:
pass # Last resort — don't let error reporting crash the handler
finally:
# Fire any one-shot post-delivery callback registered for this
# session (e.g. deferred background-review notifications).
_post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
if callable(_post_cb):
try:
_post_cb()
except Exception:
pass
# Stop typing indicator
typing_task.cancel()
try:
@ -1898,6 +2026,7 @@ class BasePlatformAdapter(ABC):
chat_topic: Optional[str] = None,
user_id_alt: Optional[str] = None,
chat_id_alt: Optional[str] = None,
is_bot: bool = False,
) -> SessionSource:
"""Helper to build a SessionSource for this platform."""
# Normalize empty topic to None
@ -1914,6 +2043,7 @@ class BasePlatformAdapter(ABC):
chat_topic=chat_topic.strip() if chat_topic else None,
user_id_alt=user_id_alt,
chat_id_alt=chat_id_alt,
is_bot=is_bot,
)
@abstractmethod

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1073,6 +1073,13 @@ class FeishuAdapter(BasePlatformAdapter):
self._webhook_rate_counts: Dict[str, tuple[int, float]] = {} # rate_key → (count, window_start)
self._webhook_anomaly_counts: Dict[str, tuple[int, str, float]] = {} # ip → (count, last_status, first_seen)
self._card_action_tokens: Dict[str, float] = {} # token → first_seen_time
# Inbound events that arrived before the adapter loop was ready
# (e.g. during startup/restart or network-flap reconnect). A single
# drainer thread replays them as soon as the loop becomes available.
self._pending_inbound_events: List[Any] = []
self._pending_inbound_lock = threading.Lock()
self._pending_drain_scheduled = False
self._pending_inbound_max_depth = 1000 # cap queue; drop oldest beyond
self._chat_locks: Dict[str, asyncio.Lock] = {} # chat_id → lock (per-chat serial processing)
self._sent_message_ids_to_chat: Dict[str, str] = {} # message_id → chat_id (for reaction routing)
self._sent_message_id_order: List[str] = [] # LRU order for _sent_message_ids_to_chat
@ -1219,6 +1226,12 @@ class FeishuAdapter(BasePlatformAdapter):
.register_p2_card_action_trigger(self._on_card_action_trigger)
.register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
.register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
.register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self._on_p2p_chat_entered)
.register_p2_im_message_recalled_v1(self._on_message_recalled)
.register_p2_customized_event(
"drive.notice.comment_add_v1",
self._on_drive_comment_event,
)
.build()
)
@ -1757,10 +1770,22 @@ class FeishuAdapter(BasePlatformAdapter):
# =========================================================================
def _on_message_event(self, data: Any) -> None:
"""Normalize Feishu inbound events into MessageEvent."""
"""Normalize Feishu inbound events into MessageEvent.
Called by the lark_oapi SDK's event dispatcher on a background thread.
If the adapter loop is not currently accepting callbacks (brief window
during startup/restart or network-flap reconnect), the event is queued
for replay instead of dropped.
"""
loop = self._loop
if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
if not self._loop_accepts_callbacks(loop):
start_drainer = self._enqueue_pending_inbound_event(data)
if start_drainer:
threading.Thread(
target=self._drain_pending_inbound_events,
name="feishu-pending-inbound-drainer",
daemon=True,
).start()
return
future = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(data),
@ -1768,6 +1793,124 @@ class FeishuAdapter(BasePlatformAdapter):
)
future.add_done_callback(self._log_background_failure)
def _enqueue_pending_inbound_event(self, data: Any) -> bool:
"""Append an event to the pending-inbound queue.
Returns True if the caller should spawn a drainer thread (no drainer
currently scheduled), False if a drainer is already running and will
pick up the new event on its next pass.
"""
with self._pending_inbound_lock:
if len(self._pending_inbound_events) >= self._pending_inbound_max_depth:
# Queue full — drop the oldest to make room. This happens only
# if the loop stays unavailable for an extended period AND the
# WS keeps firing callbacks. Still better than silent drops.
dropped = self._pending_inbound_events.pop(0)
try:
event = getattr(dropped, "event", None)
message = getattr(event, "message", None)
message_id = str(getattr(message, "message_id", "") or "unknown")
except Exception:
message_id = "unknown"
logger.error(
"[Feishu] Pending-inbound queue full (%d); dropped oldest event %s",
self._pending_inbound_max_depth,
message_id,
)
self._pending_inbound_events.append(data)
depth = len(self._pending_inbound_events)
should_start = not self._pending_drain_scheduled
if should_start:
self._pending_drain_scheduled = True
logger.warning(
"[Feishu] Queued inbound event for replay (loop not ready, queue depth=%d)",
depth,
)
return should_start
def _drain_pending_inbound_events(self) -> None:
"""Replay queued inbound events once the adapter loop is ready.
Runs in a dedicated daemon thread. Polls ``_running`` and
``_loop_accepts_callbacks`` until events can be dispatched or the
adapter shuts down. A single drainer handles the entire queue;
concurrent ``_on_message_event`` calls just append.
"""
poll_interval = 0.25
max_wait_seconds = 120.0 # safety cap: drop queue after 2 minutes
waited = 0.0
try:
while True:
if not getattr(self, "_running", True):
# Adapter shutting down — drop queued events rather than
# holding them against a closed loop.
with self._pending_inbound_lock:
dropped = len(self._pending_inbound_events)
self._pending_inbound_events.clear()
if dropped:
logger.warning(
"[Feishu] Dropped %d queued inbound event(s) during shutdown",
dropped,
)
return
loop = self._loop
if self._loop_accepts_callbacks(loop):
with self._pending_inbound_lock:
batch = self._pending_inbound_events[:]
self._pending_inbound_events.clear()
if not batch:
# Queue emptied between check and grab; done.
with self._pending_inbound_lock:
if not self._pending_inbound_events:
return
continue
dispatched = 0
requeue: List[Any] = []
for event in batch:
try:
fut = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(event),
loop,
)
fut.add_done_callback(self._log_background_failure)
dispatched += 1
except RuntimeError:
# Loop closed between check and submit — requeue
# and poll again.
requeue.append(event)
if requeue:
with self._pending_inbound_lock:
self._pending_inbound_events[:0] = requeue
if dispatched:
logger.info(
"[Feishu] Replayed %d queued inbound event(s)",
dispatched,
)
if not requeue:
# Successfully drained; check if more arrived while
# we were dispatching and exit if not.
with self._pending_inbound_lock:
if not self._pending_inbound_events:
return
# More events queued or requeue pending — loop again.
continue
if waited >= max_wait_seconds:
with self._pending_inbound_lock:
dropped = len(self._pending_inbound_events)
self._pending_inbound_events.clear()
logger.error(
"[Feishu] Adapter loop unavailable for %.0fs; "
"dropped %d queued inbound event(s)",
max_wait_seconds,
dropped,
)
return
time.sleep(poll_interval)
waited += poll_interval
finally:
with self._pending_inbound_lock:
self._pending_drain_scheduled = False
async def _handle_message_event_data(self, data: Any) -> None:
"""Shared inbound message handling for websocket and webhook transports."""
event = getattr(data, "event", None)
@ -1820,6 +1963,31 @@ class FeishuAdapter(BasePlatformAdapter):
logger.info("[Feishu] Bot removed from chat: %s", chat_id)
self._chat_info_cache.pop(chat_id, None)
def _on_p2p_chat_entered(self, data: Any) -> None:
logger.debug("[Feishu] User entered P2P chat with bot")
def _on_message_recalled(self, data: Any) -> None:
logger.debug("[Feishu] Message recalled by user")
def _on_drive_comment_event(self, data: Any) -> None:
"""Handle drive document comment notification (drive.notice.comment_add_v1).
Delegates to :mod:`gateway.platforms.feishu_comment` for parsing,
logging, and reaction. Scheduling follows the same
``run_coroutine_threadsafe`` pattern used by ``_on_message_event``.
"""
from gateway.platforms.feishu_comment import handle_drive_comment_event
loop = self._loop
if not self._loop_accepts_callbacks(loop):
logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
return
future = asyncio.run_coroutine_threadsafe(
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
loop,
)
future.add_done_callback(self._log_background_failure)
def _on_reaction_event(self, event_type: str, data: Any) -> None:
"""Route user reactions on bot messages as synthetic text events."""
event = getattr(data, "event", None)
@ -2445,6 +2613,8 @@ class FeishuAdapter(BasePlatformAdapter):
self._on_reaction_event(event_type, data)
elif event_type == "card.action.trigger":
self._on_card_action_trigger(data)
elif event_type == "drive.notice.comment_add_v1":
self._on_drive_comment_event(data)
else:
logger.debug("[Feishu] Ignoring webhook event type: %s", event_type or "unknown")
return web.json_response({"code": 0, "msg": "ok"})

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,429 @@
"""
Feishu document comment access-control rules.
3-tier rule resolution: exact doc > wildcard "*" > top-level > code defaults.
Each field (enabled/policy/allow_from) falls back independently.
Config: ~/.hermes/feishu_comment_rules.json (mtime-cached, hot-reload).
Pairing store: ~/.hermes/feishu_comment_pairing.json.
"""
from __future__ import annotations
import json
import logging
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
#
# Uses the canonical ``get_hermes_home()`` helper (HERMES_HOME-aware and
# profile-safe). Resolved at import time; this module is lazy-imported by
# the Feishu comment event handler, which runs long after profile overrides
# have been applied, so freezing paths here is safe.
RULES_FILE = get_hermes_home() / "feishu_comment_rules.json"
PAIRING_FILE = get_hermes_home() / "feishu_comment_pairing.json"
# ---------------------------------------------------------------------------
# Data models
# ---------------------------------------------------------------------------
_VALID_POLICIES = ("allowlist", "pairing")
@dataclass(frozen=True)
class CommentDocumentRule:
"""Per-document rule. ``None`` means 'inherit from lower tier'."""
enabled: Optional[bool] = None
policy: Optional[str] = None
allow_from: Optional[frozenset] = None
@dataclass(frozen=True)
class CommentsConfig:
"""Top-level comment access config."""
enabled: bool = True
policy: str = "pairing"
allow_from: frozenset = field(default_factory=frozenset)
documents: Dict[str, CommentDocumentRule] = field(default_factory=dict)
@dataclass(frozen=True)
class ResolvedCommentRule:
"""Fully resolved rule after field-by-field fallback."""
enabled: bool
policy: str
allow_from: frozenset
match_source: str # e.g. "exact:docx:xxx" | "wildcard" | "top" | "default"
# ---------------------------------------------------------------------------
# Mtime-cached file loading
# ---------------------------------------------------------------------------
class _MtimeCache:
"""Generic mtime-based file cache. ``stat()`` per access, re-read only on change."""
def __init__(self, path: Path):
self._path = path
self._mtime: float = 0.0
self._data: Optional[dict] = None
def load(self) -> dict:
try:
st = self._path.stat()
mtime = st.st_mtime
except FileNotFoundError:
self._mtime = 0.0
self._data = {}
return {}
if mtime == self._mtime and self._data is not None:
return self._data
try:
with open(self._path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
data = {}
except (json.JSONDecodeError, OSError):
logger.warning("[Feishu-Rules] Failed to read %s, using empty config", self._path)
data = {}
self._mtime = mtime
self._data = data
return data
_rules_cache = _MtimeCache(RULES_FILE)
_pairing_cache = _MtimeCache(PAIRING_FILE)
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_frozenset(raw: Any) -> Optional[frozenset]:
"""Parse a list of strings into a frozenset; return None if key absent."""
if raw is None:
return None
if isinstance(raw, (list, tuple)):
return frozenset(str(u).strip() for u in raw if str(u).strip())
return None
def _parse_document_rule(raw: dict) -> CommentDocumentRule:
enabled = raw.get("enabled")
if enabled is not None:
enabled = bool(enabled)
policy = raw.get("policy")
if policy is not None:
policy = str(policy).strip().lower()
if policy not in _VALID_POLICIES:
policy = None
allow_from = _parse_frozenset(raw.get("allow_from"))
return CommentDocumentRule(enabled=enabled, policy=policy, allow_from=allow_from)
def load_config() -> CommentsConfig:
"""Load comment rules from disk (mtime-cached)."""
raw = _rules_cache.load()
if not raw:
return CommentsConfig()
documents: Dict[str, CommentDocumentRule] = {}
raw_docs = raw.get("documents", {})
if isinstance(raw_docs, dict):
for key, rule_raw in raw_docs.items():
if isinstance(rule_raw, dict):
documents[str(key)] = _parse_document_rule(rule_raw)
policy = str(raw.get("policy", "pairing")).strip().lower()
if policy not in _VALID_POLICIES:
policy = "pairing"
return CommentsConfig(
enabled=raw.get("enabled", True),
policy=policy,
allow_from=_parse_frozenset(raw.get("allow_from")) or frozenset(),
documents=documents,
)
# ---------------------------------------------------------------------------
# Rule resolution (§8.4 field-by-field fallback)
# ---------------------------------------------------------------------------
def has_wiki_keys(cfg: CommentsConfig) -> bool:
"""Check if any document rule key starts with 'wiki:'."""
return any(k.startswith("wiki:") for k in cfg.documents)
def resolve_rule(
cfg: CommentsConfig,
file_type: str,
file_token: str,
wiki_token: str = "",
) -> ResolvedCommentRule:
"""Resolve effective rule: exact doc → wiki key → wildcard → top-level → defaults."""
exact_key = f"{file_type}:{file_token}"
exact = cfg.documents.get(exact_key)
exact_src = f"exact:{exact_key}"
if exact is None and wiki_token:
wiki_key = f"wiki:{wiki_token}"
exact = cfg.documents.get(wiki_key)
exact_src = f"exact:{wiki_key}"
wildcard = cfg.documents.get("*")
layers = []
if exact is not None:
layers.append((exact, exact_src))
if wildcard is not None:
layers.append((wildcard, "wildcard"))
def _pick(field_name: str):
for layer, source in layers:
val = getattr(layer, field_name)
if val is not None:
return val, source
return getattr(cfg, field_name), "top"
enabled, en_src = _pick("enabled")
policy, pol_src = _pick("policy")
allow_from, _ = _pick("allow_from")
# match_source = highest-priority tier that contributed any field
priority_order = {"exact": 0, "wildcard": 1, "top": 2}
best_src = min(
[en_src, pol_src],
key=lambda s: priority_order.get(s.split(":")[0], 3),
)
return ResolvedCommentRule(
enabled=enabled,
policy=policy,
allow_from=allow_from,
match_source=best_src,
)
# ---------------------------------------------------------------------------
# Pairing store
# ---------------------------------------------------------------------------
def _load_pairing_approved() -> set:
"""Return set of approved user open_ids (mtime-cached)."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if isinstance(approved, dict):
return set(approved.keys())
if isinstance(approved, list):
return set(str(u) for u in approved if u)
return set()
def _save_pairing(data: dict) -> None:
PAIRING_FILE.parent.mkdir(parents=True, exist_ok=True)
tmp = PAIRING_FILE.with_suffix(".tmp")
with open(tmp, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
tmp.replace(PAIRING_FILE)
# Invalidate cache so next load picks up change
_pairing_cache._mtime = 0.0
_pairing_cache._data = None
def pairing_add(user_open_id: str) -> bool:
"""Add a user to the pairing-approved list. Returns True if newly added."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if not isinstance(approved, dict):
approved = {}
if user_open_id in approved:
return False
approved[user_open_id] = {"approved_at": time.time()}
data["approved"] = approved
_save_pairing(data)
return True
def pairing_remove(user_open_id: str) -> bool:
"""Remove a user from the pairing-approved list. Returns True if removed."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if not isinstance(approved, dict):
return False
if user_open_id not in approved:
return False
del approved[user_open_id]
data["approved"] = approved
_save_pairing(data)
return True
def pairing_list() -> Dict[str, Any]:
"""Return the approved dict {user_open_id: {approved_at: ...}}."""
data = _pairing_cache.load()
approved = data.get("approved", {})
return dict(approved) if isinstance(approved, dict) else {}
# ---------------------------------------------------------------------------
# Access check (public API for feishu_comment.py)
# ---------------------------------------------------------------------------
def is_user_allowed(rule: ResolvedCommentRule, user_open_id: str) -> bool:
"""Check if user passes the resolved rule's policy gate."""
if user_open_id in rule.allow_from:
return True
if rule.policy == "pairing":
return user_open_id in _load_pairing_approved()
return False
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _print_status() -> None:
cfg = load_config()
print(f"Rules file: {RULES_FILE}")
print(f" exists: {RULES_FILE.exists()}")
print(f"Pairing file: {PAIRING_FILE}")
print(f" exists: {PAIRING_FILE.exists()}")
print()
print(f"Top-level:")
print(f" enabled: {cfg.enabled}")
print(f" policy: {cfg.policy}")
print(f" allow_from: {sorted(cfg.allow_from) if cfg.allow_from else '[]'}")
print()
if cfg.documents:
print(f"Document rules ({len(cfg.documents)}):")
for key, rule in sorted(cfg.documents.items()):
parts = []
if rule.enabled is not None:
parts.append(f"enabled={rule.enabled}")
if rule.policy is not None:
parts.append(f"policy={rule.policy}")
if rule.allow_from is not None:
parts.append(f"allow_from={sorted(rule.allow_from)}")
print(f" [{key}] {', '.join(parts) if parts else '(empty — inherits all)'}")
else:
print("Document rules: (none)")
print()
approved = pairing_list()
print(f"Pairing approved ({len(approved)}):")
for uid, meta in sorted(approved.items()):
ts = meta.get("approved_at", 0)
print(f" {uid} (approved_at={ts})")
def _do_check(doc_key: str, user_open_id: str) -> None:
cfg = load_config()
parts = doc_key.split(":", 1)
if len(parts) != 2:
print(f"Error: doc_key must be 'fileType:fileToken', got '{doc_key}'")
return
file_type, file_token = parts
rule = resolve_rule(cfg, file_type, file_token)
allowed = is_user_allowed(rule, user_open_id)
print(f"Document: {doc_key}")
print(f"User: {user_open_id}")
print(f"Resolved rule:")
print(f" enabled: {rule.enabled}")
print(f" policy: {rule.policy}")
print(f" allow_from: {sorted(rule.allow_from) if rule.allow_from else '[]'}")
print(f" match_source: {rule.match_source}")
print(f"Result: {'ALLOWED' if allowed else 'DENIED'}")
def _main() -> int:
import sys
try:
from hermes_cli.env_loader import load_hermes_dotenv
load_hermes_dotenv()
except Exception:
pass
usage = (
"Usage: python -m gateway.platforms.feishu_comment_rules <command> [args]\n"
"\n"
"Commands:\n"
" status Show rules config and pairing state\n"
" check <fileType:token> <user> Simulate access check\n"
" pairing add <user_open_id> Add user to pairing-approved list\n"
" pairing remove <user_open_id> Remove user from pairing-approved list\n"
" pairing list List pairing-approved users\n"
"\n"
f"Rules config file: {RULES_FILE}\n"
" Edit this JSON file directly to configure policies and document rules.\n"
" Changes take effect on the next comment event (no restart needed).\n"
)
args = sys.argv[1:]
if not args:
print(usage)
return 1
cmd = args[0]
if cmd == "status":
_print_status()
elif cmd == "check":
if len(args) < 3:
print("Usage: check <fileType:fileToken> <user_open_id>")
return 1
_do_check(args[1], args[2])
elif cmd == "pairing":
if len(args) < 2:
print("Usage: pairing <add|remove|list> [args]")
return 1
sub = args[1]
if sub == "add":
if len(args) < 3:
print("Usage: pairing add <user_open_id>")
return 1
if pairing_add(args[2]):
print(f"Added: {args[2]}")
else:
print(f"Already approved: {args[2]}")
elif sub == "remove":
if len(args) < 3:
print("Usage: pairing remove <user_open_id>")
return 1
if pairing_remove(args[2]):
print(f"Removed: {args[2]}")
else:
print(f"Not in approved list: {args[2]}")
elif sub == "list":
approved = pairing_list()
if not approved:
print("(no approved users)")
for uid, meta in sorted(approved.items()):
print(f" {uid} approved_at={meta.get('approved_at', '?')}")
else:
print(f"Unknown pairing subcommand: {sub}")
return 1
else:
print(f"Unknown command: {cmd}\n")
print(usage)
return 1
return 0
if __name__ == "__main__":
import sys
sys.exit(_main())

View file

@ -49,7 +49,10 @@ class MessageDeduplicator:
return False
now = time.time()
if msg_id in self._seen:
return True
if now - self._seen[msg_id] < self._ttl:
return True
# Entry has expired — remove it and treat as new
del self._seen[msg_id]
self._seen[msg_id] = now
if len(self._seen) > self._max_size:
cutoff = now - self._ttl

File diff suppressed because it is too large Load diff

View file

@ -718,6 +718,12 @@ class MattermostAdapter(BasePlatformAdapter):
thread_id=thread_id,
)
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent(
text=message_text,
message_type=msg_type,
@ -726,6 +732,7 @@ class MattermostAdapter(BasePlatformAdapter):
message_id=post_id,
media_urls=media_urls if media_urls else None,
media_types=media_types if media_types else None,
channel_prompt=_channel_prompt,
)
await self.handle_message(msg_event)

View file

@ -0,0 +1,57 @@
"""
QQBot platform package.
Re-exports the main adapter symbols from ``adapter.py`` (the original
``qqbot.py``) so that **all existing import paths remain unchanged**::
from gateway.platforms.qqbot import QQAdapter # works
from gateway.platforms.qqbot import check_qq_requirements # works
New modules:
- ``constants`` shared constants (API URLs, timeouts, message types)
- ``utils`` User-Agent builder, config helpers
- ``crypto`` AES-256-GCM key generation and decryption
- ``onboard`` QR-code scan-to-configure flow
"""
# -- Adapter (original qqbot.py) ------------------------------------------
from .adapter import ( # noqa: F401
QQAdapter,
QQCloseError,
check_qq_requirements,
_coerce_list,
_ssrf_redirect_guard,
)
# -- Onboard (QR-code scan-to-configure) -----------------------------------
from .onboard import ( # noqa: F401
BindStatus,
create_bind_task,
poll_bind_result,
build_connect_url,
)
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
# -- Utils -----------------------------------------------------------------
from .utils import build_user_agent, get_api_headers, coerce_list # noqa: F401
__all__ = [
# adapter
"QQAdapter",
"QQCloseError",
"check_qq_requirements",
"_coerce_list",
"_ssrf_redirect_guard",
# onboard
"BindStatus",
"create_bind_task",
"poll_bind_result",
"build_connect_url",
# crypto
"decrypt_secret",
"generate_bind_key",
# utils
"build_user_agent",
"get_api_headers",
"coerce_list",
]

View file

@ -0,0 +1,74 @@
"""QQBot package-level constants shared across adapter, onboard, and other modules."""
from __future__ import annotations
import os
# ---------------------------------------------------------------------------
# QQBot adapter version — bump on functional changes to the adapter package.
# ---------------------------------------------------------------------------
QQBOT_VERSION = "1.1.0"
# ---------------------------------------------------------------------------
# API endpoints
# ---------------------------------------------------------------------------
# The portal domain is configurable via QQ_API_HOST for corporate proxies
# or test environments. Default: q.qq.com (production).
PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com")
API_BASE = "https://api.sgroup.qq.com"
TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
GATEWAY_URL_PATH = "/gateway"
# QR-code onboard endpoints (on the portal host)
ONBOARD_CREATE_PATH = "/lite/create_bind_task"
ONBOARD_POLL_PATH = "/lite/poll_bind_result"
QR_URL_TEMPLATE = (
"https://q.qq.com/qqbot/openclaw/connect.html"
"?task_id={task_id}&_wv=2&source=hermes"
)
# ---------------------------------------------------------------------------
# Timeouts & retry
# ---------------------------------------------------------------------------
DEFAULT_API_TIMEOUT = 30.0
FILE_UPLOAD_TIMEOUT = 120.0
CONNECT_TIMEOUT_SECONDS = 20.0
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
MAX_RECONNECT_ATTEMPTS = 100
RATE_LIMIT_DELAY = 60 # seconds
QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds
MAX_QUICK_DISCONNECT_COUNT = 3
ONBOARD_POLL_INTERVAL = 2.0 # seconds between poll_bind_result calls
ONBOARD_API_TIMEOUT = 10.0
# ---------------------------------------------------------------------------
# Message limits
# ---------------------------------------------------------------------------
MAX_MESSAGE_LENGTH = 4000
DEDUP_WINDOW_SECONDS = 300
DEDUP_MAX_SIZE = 1000
# ---------------------------------------------------------------------------
# QQ Bot message types
# ---------------------------------------------------------------------------
MSG_TYPE_TEXT = 0
MSG_TYPE_MARKDOWN = 2
MSG_TYPE_MEDIA = 7
MSG_TYPE_INPUT_NOTIFY = 6
# ---------------------------------------------------------------------------
# QQ Bot file media types
# ---------------------------------------------------------------------------
MEDIA_TYPE_IMAGE = 1
MEDIA_TYPE_VIDEO = 2
MEDIA_TYPE_VOICE = 3
MEDIA_TYPE_FILE = 4

View file

@ -0,0 +1,45 @@
"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption."""
from __future__ import annotations
import base64
import os
def generate_bind_key() -> str:
"""Generate a 256-bit random AES key and return it as base64.
The key is passed to ``create_bind_task`` so the server can encrypt
the bot's *client_secret* before returning it. Only this CLI holds
the key, ensuring the secret never travels in plaintext.
"""
return base64.b64encode(os.urandom(32)).decode()
def decrypt_secret(encrypted_base64: str, key_base64: str) -> str:
"""Decrypt a base64-encoded AES-256-GCM ciphertext.
Ciphertext layout (after base64-decoding)::
IV (12 bytes) ciphertext (N bytes) AuthTag (16 bytes)
Args:
encrypted_base64: The ``bot_encrypt_secret`` value from
``poll_bind_result``.
key_base64: The base64 AES key generated by
:func:`generate_bind_key`.
Returns:
The decrypted *client_secret* as a UTF-8 string.
"""
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
key = base64.b64decode(key_base64)
raw = base64.b64decode(encrypted_base64)
iv = raw[:12]
ciphertext_with_tag = raw[12:] # AESGCM expects ciphertext + tag concatenated
aesgcm = AESGCM(key)
plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None)
return plaintext.decode("utf-8")

View file

@ -0,0 +1,124 @@
"""
QQBot scan-to-configure (QR code onboard) module.
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
generate a QR-code URL and poll for scan completion. On success the caller
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
scanner's *user_openid* — enough to fully configure the QQBot gateway.
Reference: https://bot.q.qq.com/wiki/develop/api-v2/
"""
from __future__ import annotations
import logging
from enum import IntEnum
from typing import Tuple
from urllib.parse import quote
from .constants import (
ONBOARD_API_TIMEOUT,
ONBOARD_CREATE_PATH,
ONBOARD_POLL_PATH,
PORTAL_HOST,
QR_URL_TEMPLATE,
)
from .crypto import generate_bind_key
from .utils import get_api_headers
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Bind status
# ---------------------------------------------------------------------------
class BindStatus(IntEnum):
"""Status codes returned by ``poll_bind_result``."""
NONE = 0
PENDING = 1
COMPLETED = 2
EXPIRED = 3
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def create_bind_task(
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[str, str]:
"""Create a bind task and return *(task_id, aes_key_base64)*.
The AES key is generated locally and sent to the server so it can
encrypt the bot credentials before returning them.
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
import httpx
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
key = generate_bind_key()
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
if data.get("retcode") != 0:
raise RuntimeError(data.get("msg", "create_bind_task failed"))
task_id = data.get("data", {}).get("task_id")
if not task_id:
raise RuntimeError("create_bind_task: missing task_id in response")
logger.debug("create_bind_task ok: task_id=%s", task_id)
return task_id, key
async def poll_bind_result(
task_id: str,
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[BindStatus, str, str, str]:
"""Poll the bind result for *task_id*.
Returns:
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
* ``bot_encrypt_secret`` is AES-256-GCM encrypted decrypt it with
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
key from :func:`create_bind_task`.
* ``user_openid`` is the OpenID of the person who scanned the code
(available when ``status == COMPLETED``).
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
import httpx
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
if data.get("retcode") != 0:
raise RuntimeError(data.get("msg", "poll_bind_result failed"))
d = data.get("data", {})
return (
BindStatus(d.get("status", 0)),
str(d.get("bot_appid", "")),
d.get("bot_encrypt_secret", ""),
d.get("user_openid", ""),
)
def build_connect_url(task_id: str) -> str:
"""Build the QR-code target URL for a given *task_id*."""
return QR_URL_TEMPLATE.format(task_id=quote(task_id))

View file

@ -0,0 +1,71 @@
"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion."""
from __future__ import annotations
import platform
import sys
from typing import Any, Dict, List
from .constants import QQBOT_VERSION
# ---------------------------------------------------------------------------
# User-Agent
# ---------------------------------------------------------------------------
def _get_hermes_version() -> str:
"""Return the hermes-agent package version, or 'dev' if unavailable."""
try:
from importlib.metadata import version
return version("hermes-agent")
except Exception:
return "dev"
def build_user_agent() -> str:
"""Build a descriptive User-Agent string.
Format::
QQBotAdapter/<qqbot_version> (Python/<py_version>; <os>; Hermes/<hermes_version>)
Example::
QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0)
"""
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
os_name = platform.system().lower()
hermes_version = _get_hermes_version()
return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})"
def get_api_headers() -> Dict[str, str]:
"""Return standard HTTP headers for QQBot API requests.
Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``.
``q.qq.com`` requires ``Accept: application/json`` without it,
the server returns a JavaScript anti-bot challenge page.
"""
return {
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": build_user_agent(),
}
# ---------------------------------------------------------------------------
# Config helpers
# ---------------------------------------------------------------------------
def coerce_list(value: Any) -> List[str]:
"""Coerce config values into a trimmed string list.
Accepts comma-separated strings, lists, tuples, sets, or single values.
"""
if value is None:
return []
if isinstance(value, str):
return [item.strip() for item in value.split(",") if item.strip()]
if isinstance(value, (list, tuple, set)):
return [str(item).strip() for item in value if str(item).strip()]
return [str(value).strip()] if str(value).strip() else []

View file

@ -160,6 +160,14 @@ class SignalAdapter(BasePlatformAdapter):
self._sse_task: Optional[asyncio.Task] = None
self._health_monitor_task: Optional[asyncio.Task] = None
self._typing_tasks: Dict[str, asyncio.Task] = {}
# Per-chat typing-indicator backoff. When signal-cli reports
# NETWORK_FAILURE (recipient offline / unroutable), base.py's
# _keep_typing refresh loop would otherwise hammer sendTyping every
# ~2s indefinitely, producing WARNING-level log spam and pointless
# RPC traffic. We track consecutive failures per chat and skip the
# RPC during a cooldown window instead.
self._typing_failures: Dict[str, int] = {}
self._typing_skip_until: Dict[str, float] = {}
self._running = False
self._last_sse_activity = 0.0
self._sse_response: Optional[httpx.Response] = None
@ -548,8 +556,22 @@ class SignalAdapter(BasePlatformAdapter):
# JSON-RPC Communication
# ------------------------------------------------------------------
async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
"""Send a JSON-RPC 2.0 request to signal-cli daemon."""
async def _rpc(
self,
method: str,
params: dict,
rpc_id: str = None,
*,
log_failures: bool = True,
) -> Any:
"""Send a JSON-RPC 2.0 request to signal-cli daemon.
When ``log_failures=False``, error and exception paths log at DEBUG
instead of WARNING used by the typing-indicator path to silence
repeated NETWORK_FAILURE spam for unreachable recipients while
still preserving visibility for the first occurrence and for
unrelated RPCs.
"""
if not self.client:
logger.warning("Signal: RPC called but client not connected")
return None
@ -574,13 +596,19 @@ class SignalAdapter(BasePlatformAdapter):
data = resp.json()
if "error" in data:
logger.warning("Signal RPC error (%s): %s", method, data["error"])
if log_failures:
logger.warning("Signal RPC error (%s): %s", method, data["error"])
else:
logger.debug("Signal RPC error (%s): %s", method, data["error"])
return None
return data.get("result")
except Exception as e:
logger.warning("Signal RPC %s failed: %s", method, e)
if log_failures:
logger.warning("Signal RPC %s failed: %s", method, e)
else:
logger.debug("Signal RPC %s failed: %s", method, e)
return None
# ------------------------------------------------------------------
@ -627,7 +655,28 @@ class SignalAdapter(BasePlatformAdapter):
self._recent_sent_timestamps.pop()
async def send_typing(self, chat_id: str, metadata=None) -> None:
"""Send a typing indicator."""
"""Send a typing indicator.
base.py's ``_keep_typing`` refresh loop calls this every ~2s while
the agent is processing. If signal-cli returns NETWORK_FAILURE for
this recipient (offline, unroutable, group membership lost, etc.)
the unmitigated behaviour is: a WARNING log every 2 seconds for as
long as the agent keeps running. Instead we:
- silence the WARNING after the first consecutive failure (subsequent
attempts log at DEBUG) so transport issues are still visible once
but don't flood the log,
- skip the RPC entirely during an exponential cooldown window once
three consecutive failures have happened, so we stop hammering
signal-cli with requests it can't deliver.
A successful sendTyping clears the counters.
"""
now = time.monotonic()
skip_until = self._typing_skip_until.get(chat_id, 0.0)
if now < skip_until:
return
params: Dict[str, Any] = {
"account": self.account,
}
@ -637,7 +686,26 @@ class SignalAdapter(BasePlatformAdapter):
else:
params["recipient"] = [chat_id]
await self._rpc("sendTyping", params, rpc_id="typing")
fails = self._typing_failures.get(chat_id, 0)
result = await self._rpc(
"sendTyping",
params,
rpc_id="typing",
log_failures=(fails == 0),
)
if result is None:
fails += 1
self._typing_failures[chat_id] = fails
# After 3 consecutive failures, back off exponentially (16s,
# 32s, 60s cap) to stop spamming signal-cli for a recipient
# that clearly isn't reachable right now.
if fails >= 3:
backoff = min(60.0, 16.0 * (2 ** (fails - 3)))
self._typing_skip_until[chat_id] = now + backoff
else:
self._typing_failures.pop(chat_id, None)
self._typing_skip_until.pop(chat_id, None)
async def send_image(
self,
@ -789,6 +857,10 @@ class SignalAdapter(BasePlatformAdapter):
await task
except asyncio.CancelledError:
pass
# Reset per-chat typing backoff state so the next agent turn starts
# fresh rather than inheriting a cooldown from a prior conversation.
self._typing_failures.pop(chat_id, None)
self._typing_skip_until.pop(chat_id, None)
async def stop_typing(self, chat_id: str) -> None:
"""Public interface for stopping typing — called by base adapter's

View file

@ -366,6 +366,20 @@ class SlackAdapter(BasePlatformAdapter):
# in an assistant-enabled context. Falls back to reactions.
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
def _dm_top_level_threads_as_sessions(self) -> bool:
"""Whether top-level Slack DMs get per-message session threads.
Defaults to ``True`` so each visible DM reply thread is isolated as its
own Hermes session matching the per-thread behavior channels already
have. Set ``platforms.slack.extra.dm_top_level_threads_as_sessions``
to ``false`` in config.yaml to revert to the legacy behavior where all
top-level DMs share one continuous session.
"""
raw = self.config.extra.get("dm_top_level_threads_as_sessions")
if raw is None:
return True # default: each DM thread is its own session
return str(raw).strip().lower() in ("1", "true", "yes", "on")
def _resolve_thread_ts(
self,
reply_to: Optional[str] = None,
@ -996,10 +1010,14 @@ class SlackAdapter(BasePlatformAdapter):
# Build thread_ts for session keying.
# In channels: fall back to ts so each top-level @mention starts a
# new thread/session (the bot always replies in a thread).
# In DMs: only use the real thread_ts — top-level DMs should share
# one continuous session, threaded DMs get their own session.
# In DMs: fall back to ts so each top-level DM reply thread gets
# its own session key (matching channel behavior). Set
# dm_top_level_threads_as_sessions: false in config to revert to
# legacy single-session-per-DM-channel behavior.
if is_dm:
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")
if not thread_ts and self._dm_top_level_threads_as_sessions():
thread_ts = ts
else:
thread_ts = event.get("thread_ts") or ts # ts fallback for channels
@ -1167,6 +1185,12 @@ class SlackAdapter(BasePlatformAdapter):
thread_id=thread_ts,
)
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent(
text=text,
message_type=msg_type,
@ -1176,6 +1200,7 @@ class SlackAdapter(BasePlatformAdapter):
media_urls=media_urls,
media_types=media_types,
reply_to_message_id=thread_ts if thread_ts != ts else None,
channel_prompt=_channel_prompt,
)
# Only react when bot is directly addressed (DM or @mention).

View file

@ -11,6 +11,7 @@ import asyncio
import json
import logging
import os
import html as _html
import re
from typing import Dict, List, Optional, Any
@ -18,6 +19,10 @@ logger = logging.getLogger(__name__)
try:
from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
try:
from telegram import LinkPreviewOptions
except ImportError:
LinkPreviewOptions = None
from telegram.ext import (
Application,
CommandHandler,
@ -36,6 +41,7 @@ except ImportError:
Message = Any
InlineKeyboardButton = Any
InlineKeyboardMarkup = Any
LinkPreviewOptions = None
Application = Any
CommandHandler = Any
CallbackQueryHandler = Any
@ -112,6 +118,84 @@ def _strip_mdv2(text: str) -> str:
return cleaned
# ---------------------------------------------------------------------------
# Markdown table → code block conversion
# ---------------------------------------------------------------------------
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
# so pipe tables render as noisy backslash-pipe text with no alignment.
# Wrapping the table in a fenced code block makes Telegram render it as
# monospace preformatted text with columns intact.
# Matches a GFM table delimiter row: optional outer pipes, cells containing
# only dashes (with optional leading/trailing colons for alignment) separated
# by '|'. Requires at least one internal '|' so lone '---' horizontal rules
# are NOT matched.
_TABLE_SEPARATOR_RE = re.compile(
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
)
def _is_table_row(line: str) -> bool:
"""Return True if *line* could plausibly be a table data row."""
stripped = line.strip()
return bool(stripped) and '|' in stripped
def _wrap_markdown_tables(text: str) -> str:
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
Detected by a row containing '|' immediately followed by a delimiter
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
non-blank lines are consumed as the table body and included in the
wrapped block. Tables inside existing fenced code blocks are left
alone.
"""
if '|' not in text or '-' not in text:
return text
lines = text.split('\n')
out: list[str] = []
in_fence = False
i = 0
while i < len(lines):
line = lines[i]
stripped = line.lstrip()
# Track existing fenced code blocks — never touch content inside.
if stripped.startswith('```'):
in_fence = not in_fence
out.append(line)
i += 1
continue
if in_fence:
out.append(line)
i += 1
continue
# Look for a header row (contains '|') immediately followed by a
# delimiter row.
if (
'|' in line
and i + 1 < len(lines)
and _TABLE_SEPARATOR_RE.match(lines[i + 1])
):
table_block = [line, lines[i + 1]]
j = i + 2
while j < len(lines) and _is_table_row(lines[j]):
table_block.append(lines[j])
j += 1
out.append('```')
out.extend(table_block)
out.append('```')
i = j
continue
out.append(line)
i += 1
return '\n'.join(out)
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@ -129,6 +213,7 @@ class TelegramAdapter(BasePlatformAdapter):
# When a chunk is near this limit, a continuation is almost certain.
_SPLIT_THRESHOLD = 4000
MEDIA_GROUP_WAIT_SECONDS = 0.8
_GENERAL_TOPIC_THREAD_ID = "1"
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.TELEGRAM)
@ -137,6 +222,7 @@ class TelegramAdapter(BasePlatformAdapter):
self._webhook_mode: bool = False
self._mention_patterns = self._compile_mention_patterns()
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False)
# Buffer rapid/album photo updates so Telegram image bursts are handled
# as a single MessageEvent instead of self-interrupting multiple turns.
self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@ -163,6 +249,38 @@ class TelegramAdapter(BasePlatformAdapter):
# Approval button state: message_id → session_key
self._approval_state: Dict[int, str] = {}
@staticmethod
def _is_callback_user_authorized(user_id: str) -> bool:
"""Return whether a Telegram inline-button caller may perform gated actions."""
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if not allowed_csv:
return True
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
return "*" in allowed_ids or user_id in allowed_ids
@classmethod
def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
if not metadata:
return None
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
return str(thread_id) if thread_id is not None else None
@classmethod
def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
return None
return int(thread_id)
@classmethod
def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id:
return None
return int(thread_id)
@staticmethod
def _is_thread_not_found_error(error: Exception) -> bool:
return "thread not found" in str(error).lower()
def _fallback_ips(self) -> list[str]:
"""Return validated fallback IPs from config (populated by _apply_env_overrides)."""
configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else []
@ -193,6 +311,26 @@ class TelegramAdapter(BasePlatformAdapter):
pass
return isinstance(error, OSError)
def _coerce_bool_extra(self, key: str, default: bool = False) -> bool:
value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
if value is None:
return default
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in ("true", "1", "yes", "on"):
return True
if lowered in ("false", "0", "no", "off"):
return False
return default
return bool(value)
def _link_preview_kwargs(self) -> Dict[str, Any]:
if not getattr(self, "_disable_link_previews", False):
return {}
if LinkPreviewOptions is not None:
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
return {"disable_web_page_preview": True}
async def _handle_polling_network_error(self, error: Exception) -> None:
"""Reconnect polling after a transient network interruption.
@ -540,7 +678,7 @@ class TelegramAdapter(BasePlatformAdapter):
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
}
proxy_url = resolve_proxy_url()
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
fallback_ips = self._fallback_ips()
if not fallback_ips:
@ -606,14 +744,14 @@ class TelegramAdapter(BasePlatformAdapter):
from telegram.error import NetworkError, TimedOut
except ImportError:
NetworkError = TimedOut = OSError # type: ignore[misc,assignment]
_max_connect = 3
_max_connect = 8
for _attempt in range(_max_connect):
try:
await self._app.initialize()
break
except (NetworkError, TimedOut, OSError) as init_err:
if _attempt < _max_connect - 1:
wait = 2 ** _attempt
wait = min(2 ** _attempt, 15)
logger.warning(
"[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
self.name, _attempt + 1, _max_connect, init_err, wait,
@ -814,7 +952,7 @@ class TelegramAdapter(BasePlatformAdapter):
]
message_ids = []
thread_id = metadata.get("thread_id") if metadata else None
thread_id = self._metadata_thread_id(metadata)
try:
from telegram.error import NetworkError as _NetErr
@ -834,7 +972,7 @@ class TelegramAdapter(BasePlatformAdapter):
for i, chunk in enumerate(chunks):
should_thread = self._should_thread_reply(reply_to, i)
reply_to_id = int(reply_to) if should_thread else None
effective_thread_id = int(thread_id) if thread_id else None
effective_thread_id = self._message_thread_id_for_send(thread_id)
msg = None
for _send_attempt in range(3):
@ -847,6 +985,7 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
except Exception as md_error:
# Markdown parsing failed, try plain text
@ -859,6 +998,7 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=None,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
else:
raise
@ -869,8 +1009,7 @@ class TelegramAdapter(BasePlatformAdapter):
# (not transient network issues). Detect and handle
# specific cases instead of blindly retrying.
if _BadReq and isinstance(send_err, _BadReq):
err_lower = str(send_err).lower()
if "thread not found" in err_lower and effective_thread_id is not None:
if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
# Thread doesn't exist — retry without
# message_thread_id so the message still
# reaches the chat.
@ -880,6 +1019,7 @@ class TelegramAdapter(BasePlatformAdapter):
)
effective_thread_id = None
continue
err_lower = str(send_err).lower()
if "message to be replied not found" in err_lower and reply_to_id is not None:
# Original message was deleted before we
# could reply — clear reply target and retry
@ -1046,6 +1186,7 @@ class TelegramAdapter(BasePlatformAdapter):
text=text,
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
**self._link_preview_kwargs(),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1068,15 +1209,13 @@ class TelegramAdapter(BasePlatformAdapter):
try:
cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
text = (
f"⚠️ *Command Approval Required*\n\n"
f"`{cmd_preview}`\n\n"
f"Reason: {description}"
f"⚠️ <b>Command Approval Required</b>\n\n"
f"<pre>{_html.escape(cmd_preview)}</pre>\n\n"
f"Reason: {_html.escape(description)}"
)
# Resolve thread context for thread replies
thread_id = None
if metadata:
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
thread_id = self._metadata_thread_id(metadata)
# We'll use the message_id as part of callback_data to look up session_key
# Send a placeholder first, then update — or use a counter.
@ -1100,11 +1239,13 @@ class TelegramAdapter(BasePlatformAdapter):
kwargs: Dict[str, Any] = {
"chat_id": int(chat_id),
"text": text,
"parse_mode": ParseMode.MARKDOWN,
"parse_mode": ParseMode.HTML,
"reply_markup": keyboard,
**self._link_preview_kwargs(),
}
if thread_id:
kwargs["message_thread_id"] = int(thread_id)
message_thread_id = self._message_thread_id_for_send(thread_id)
if message_thread_id is not None:
kwargs["message_thread_id"] = message_thread_id
msg = await self._bot.send_message(**kwargs)
@ -1172,6 +1313,7 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
message_thread_id=int(thread_id) if thread_id else None,
**self._link_preview_kwargs(),
)
# Store picker state keyed by chat_id
@ -1440,12 +1582,9 @@ class TelegramAdapter(BasePlatformAdapter):
# Only authorized users may click approval buttons.
caller_id = str(getattr(query.from_user, "id", ""))
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if allowed_csv:
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
if "*" not in allowed_ids and caller_id not in allowed_ids:
await query.answer(text="⛔ You are not authorized to approve commands.")
return
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to approve commands.")
return
session_key = self._approval_state.pop(approval_id, None)
if not session_key:
@ -1490,6 +1629,10 @@ class TelegramAdapter(BasePlatformAdapter):
if not data.startswith("update_prompt:"):
return
answer = data.split(":", 1)[1] # "y" or "n"
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to answer update prompts.")
return
await query.answer(text=f"Sent '{answer}' to the update process.")
# Edit the message to show the choice and remove buttons
label = "Yes" if answer == "y" else "No"
@ -1535,23 +1678,23 @@ class TelegramAdapter(BasePlatformAdapter):
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
if audio_path.endswith((".ogg", ".opus")):
_voice_thread = metadata.get("thread_id") if metadata else None
_voice_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_voice(
chat_id=int(chat_id),
voice=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_voice_thread) if _voice_thread else None,
message_thread_id=self._message_thread_id_for_send(_voice_thread),
)
else:
# .mp3 and others -> send as audio file
_audio_thread = metadata.get("thread_id") if metadata else None
_audio_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_audio(
chat_id=int(chat_id),
audio=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_audio_thread) if _audio_thread else None,
message_thread_id=self._message_thread_id_for_send(_audio_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1581,14 +1724,14 @@ class TelegramAdapter(BasePlatformAdapter):
if not os.path.exists(image_path):
return SendResult(success=False, error=f"Image file not found: {image_path}")
_thread = metadata.get("thread_id") if metadata else None
_thread = self._metadata_thread_id(metadata)
with open(image_path, "rb") as image_file:
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_thread) if _thread else None,
message_thread_id=self._message_thread_id_for_send(_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1619,7 +1762,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
_thread = metadata.get("thread_id") if metadata else None
_thread = self._metadata_thread_id(metadata)
with open(file_path, "rb") as f:
msg = await self._bot.send_document(
@ -1628,7 +1771,7 @@ class TelegramAdapter(BasePlatformAdapter):
filename=display_name,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_thread) if _thread else None,
message_thread_id=self._message_thread_id_for_send(_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1652,14 +1795,14 @@ class TelegramAdapter(BasePlatformAdapter):
if not os.path.exists(video_path):
return SendResult(success=False, error=f"Video file not found: {video_path}")
_thread = metadata.get("thread_id") if metadata else None
_thread = self._metadata_thread_id(metadata)
with open(video_path, "rb") as f:
msg = await self._bot.send_video(
chat_id=int(chat_id),
video=f,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_thread) if _thread else None,
message_thread_id=self._message_thread_id_for_send(_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1689,13 +1832,13 @@ class TelegramAdapter(BasePlatformAdapter):
try:
# Telegram can send photos directly from URLs (up to ~5MB)
_photo_thread = metadata.get("thread_id") if metadata else None
_photo_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_url,
caption=caption[:1024] if caption else None, # Telegram caption limit
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_photo_thread) if _photo_thread else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1718,6 +1861,7 @@ class TelegramAdapter(BasePlatformAdapter):
photo=image_data,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e2:
@ -1743,13 +1887,13 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
_anim_thread = metadata.get("thread_id") if metadata else None
_anim_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_anim_thread) if _anim_thread else None,
message_thread_id=self._message_thread_id_for_send(_anim_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1766,12 +1910,23 @@ class TelegramAdapter(BasePlatformAdapter):
"""Send typing indicator."""
if self._bot:
try:
_typing_thread = metadata.get("thread_id") if metadata else None
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=int(_typing_thread) if _typing_thread else None,
)
_typing_thread = self._metadata_thread_id(metadata)
message_thread_id = self._message_thread_id_for_typing(_typing_thread)
try:
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=message_thread_id,
)
except Exception as e:
if message_thread_id is not None and self._is_thread_not_found_error(e):
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=None,
)
else:
raise
except Exception as e:
# Typing failures are non-fatal; log at debug level only.
logger.debug(
@ -1839,6 +1994,12 @@ class TelegramAdapter(BasePlatformAdapter):
text = content
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
# render tables natively, but fenced code blocks render as
# monospace preformatted text with columns intact. The wrapped
# tables then flow through step (1) below as protected regions.
text = _wrap_markdown_tables(text)
# 1) Protect fenced code blocks (``` ... ```)
# Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
def _protect_fenced(m):
@ -2165,7 +2326,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message):
return
event = self._build_message_event(update.message, MessageType.TEXT)
event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
event.text = self._clean_bot_trigger_text(event.text)
self._enqueue_text_event(event)
@ -2176,7 +2337,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message, is_command=True):
return
event = self._build_message_event(update.message, MessageType.COMMAND)
event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
await self.handle_message(event)
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@ -2212,7 +2373,7 @@ class TelegramAdapter(BasePlatformAdapter):
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
event = self._build_message_event(msg, MessageType.LOCATION)
event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
event.text = "\n".join(parts)
await self.handle_message(event)
@ -2363,7 +2524,7 @@ class TelegramAdapter(BasePlatformAdapter):
else:
msg_type = MessageType.DOCUMENT
event = self._build_message_event(msg, msg_type)
event = self._build_message_event(msg, msg_type, update_id=update.update_id)
# Add caption as text
if msg.caption:
@ -2702,8 +2863,19 @@ class TelegramAdapter(BasePlatformAdapter):
self.name, cache_key, thread_id,
)
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
"""Build a MessageEvent from a Telegram message."""
def _build_message_event(
self,
message: Message,
msg_type: MessageType,
update_id: Optional[int] = None,
) -> MessageEvent:
"""Build a MessageEvent from a Telegram message.
``update_id`` is the ``Update.update_id`` from PTB; passing it through
lets ``/restart`` record the triggering offset so the new gateway
process can advance past it (prevents ``/restart`` being re-delivered
when PTB's graceful-shutdown ACK fails).
"""
chat = message.chat
user = message.from_user
@ -2716,7 +2888,9 @@ class TelegramAdapter(BasePlatformAdapter):
# Resolve DM topic name and skill binding
thread_id_raw = message.message_thread_id
thread_id_str = str(thread_id_raw) if thread_id_raw else None
thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None
if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False):
thread_id_str = self._GENERAL_TOPIC_THREAD_ID
chat_topic = None
topic_skill = None
@ -2765,15 +2939,26 @@ class TelegramAdapter(BasePlatformAdapter):
reply_to_id = str(message.reply_to_message.message_id)
reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
# Per-channel/topic ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_chat_id_str = str(chat.id)
_channel_prompt = resolve_channel_prompt(
self.config.extra,
thread_id_str or _chat_id_str,
_chat_id_str if thread_id_str else None,
)
return MessageEvent(
text=message.text or "",
message_type=msg_type,
source=source,
raw_message=message,
message_id=str(message.message_id),
platform_update_id=update_id,
reply_to_message_id=reply_to_id,
reply_to_text=reply_to_text,
auto_skill=topic_skill,
channel_prompt=_channel_prompt,
timestamp=message.date,
)

View file

@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
def _resolve_proxy_url() -> str | None:
# Delegate to shared implementation (env vars + macOS system proxy detection)
from gateway.platforms.base import resolve_proxy_url
return resolve_proxy_url()
return resolve_proxy_url("TELEGRAM_PROXY")
class TelegramFallbackTransport(httpx.AsyncBaseTransport):

View file

@ -180,6 +180,8 @@ class WeComAdapter(BasePlatformAdapter):
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
self._device_id = uuid.uuid4().hex
self._last_chat_req_ids: Dict[str, str] = {}
# ------------------------------------------------------------------
# Connection lifecycle
@ -277,7 +279,11 @@ class WeComAdapter(BasePlatformAdapter):
{
"cmd": APP_CMD_SUBSCRIBE,
"headers": {"req_id": req_id},
"body": {"bot_id": self._bot_id, "secret": self._secret},
"body": {
"bot_id": self._bot_id,
"secret": self._secret,
"device_id": self._device_id,
},
}
)
@ -496,6 +502,11 @@ class WeComAdapter(BasePlatformAdapter):
logger.debug("[%s] DM sender %s blocked by policy", self.name, sender_id)
return
# Cache the inbound req_id after policy checks so proactive sends to
# this chat can fall back to APP_CMD_RESPONSE (required for groups —
# WeCom AI Bots cannot initiate APP_CMD_SEND in group chats).
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
text, reply_text = self._extract_text(body)
media_urls, media_types = await self._extract_media(body)
message_type = self._derive_message_type(body, text, media_types)
@ -847,6 +858,23 @@ class WeComAdapter(BasePlatformAdapter):
while len(self._reply_req_ids) > DEDUP_MAX_SIZE:
self._reply_req_ids.pop(next(iter(self._reply_req_ids)))
def _remember_chat_req_id(self, chat_id: str, req_id: str) -> None:
"""Cache the most recent inbound req_id per chat.
Used as a fallback reply target when we need to send into a group
without an explicit ``reply_to`` WeCom AI Bots are blocked from
APP_CMD_SEND in groups and must use APP_CMD_RESPONSE bound to some
prior req_id. Bounded like _reply_req_ids so long-running gateways
don't leak memory across many chats.
"""
normalized_chat_id = str(chat_id or "").strip()
normalized_req_id = str(req_id or "").strip()
if not normalized_chat_id or not normalized_req_id:
return
self._last_chat_req_ids[normalized_chat_id] = normalized_req_id
while len(self._last_chat_req_ids) > DEDUP_MAX_SIZE:
self._last_chat_req_ids.pop(next(iter(self._last_chat_req_ids)))
def _reply_req_id_for_message(self, reply_to: Optional[str]) -> Optional[str]:
normalized = str(reply_to or "").strip()
if not normalized or normalized.startswith("quote:"):
@ -1163,19 +1191,15 @@ class WeComAdapter(BasePlatformAdapter):
self._raise_for_wecom_error(response, "send media message")
return response
async def _send_reply_stream(self, reply_req_id: str, content: str) -> Dict[str, Any]:
async def _send_reply_markdown(self, reply_req_id: str, content: str) -> Dict[str, Any]:
response = await self._send_reply_request(
reply_req_id,
{
"msgtype": "stream",
"stream": {
"id": self._new_req_id("stream"),
"finish": True,
"content": content[:self.MAX_MESSAGE_LENGTH],
},
"msgtype": "markdown",
"markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]},
},
)
self._raise_for_wecom_error(response, "send reply stream")
self._raise_for_wecom_error(response, "send reply markdown")
return response
async def _send_reply_media_message(
@ -1235,6 +1259,9 @@ class WeComAdapter(BasePlatformAdapter):
return SendResult(success=False, error=prepared["reject_reason"])
reply_req_id = self._reply_req_id_for_message(reply_to)
if not reply_req_id and chat_id in self._last_chat_req_ids:
reply_req_id = self._last_chat_req_ids[chat_id]
try:
upload_result = await self._upload_media_bytes(
prepared["data"],
@ -1302,8 +1329,12 @@ class WeComAdapter(BasePlatformAdapter):
try:
reply_req_id = self._reply_req_id_for_message(reply_to)
if not reply_req_id and chat_id in self._last_chat_req_ids:
reply_req_id = self._last_chat_req_ids[chat_id]
if reply_req_id:
response = await self._send_reply_stream(reply_req_id, content)
response = await self._send_reply_markdown(reply_req_id, content)
else:
response = await self._send_request(
APP_CMD_SEND,

View file

@ -258,6 +258,20 @@ class WecomCallbackAdapter(BasePlatformAdapter):
)
event = self._build_event(app, decrypted)
if event is not None:
# Deduplicate: WeCom retries callbacks on timeout,
# producing duplicate inbound messages (#10305).
if event.message_id:
now = time.time()
if event.message_id in self._seen_messages:
if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS:
logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id)
return web.Response(text="success", content_type="text/plain")
del self._seen_messages[event.message_id]
self._seen_messages[event.message_id] = now
# Prune expired entries when cache grows large
if len(self._seen_messages) > 2000:
cutoff = now - MESSAGE_DEDUP_TTL_SECONDS
self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
# Record which app this user belongs to.
if event.source and event.source.user_id:
map_key = self._user_app_key(

View file

@ -28,7 +28,7 @@ import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import quote
from urllib.parse import quote, urlparse
logger = logging.getLogger(__name__)
@ -96,6 +96,28 @@ MEDIA_VIDEO = 2
MEDIA_FILE = 3
MEDIA_VOICE = 4
_LIVE_ADAPTERS: Dict[str, Any] = {}
def _make_ssl_connector() -> Optional["aiohttp.TCPConnector"]:
"""Return a TCPConnector with a certifi CA bundle, or None if certifi is unavailable.
Tencent's iLink server (``ilinkai.weixin.qq.com``) is not verifiable against
some system CA stores (notably Homebrew's OpenSSL on macOS Apple Silicon).
When ``certifi`` is installed, use its Mozilla CA bundle to guarantee
verification. Otherwise fall back to aiohttp's default (which honors
``SSL_CERT_FILE`` env var via ``trust_env=True``).
"""
try:
import ssl
import certifi
except ImportError:
return None
if not AIOHTTP_AVAILABLE:
return None
ssl_ctx = ssl.create_default_context(cafile=certifi.where())
return aiohttp.TCPConnector(ssl=ssl_ctx)
ITEM_TEXT = 1
ITEM_IMAGE = 2
ITEM_VOICE = 3
@ -398,7 +420,12 @@ async def _send_message(
text: str,
context_token: Optional[str],
client_id: str,
) -> None:
) -> Dict[str, Any]:
"""Send a text message via iLink sendmessage API.
Returns the raw API response dict (may contain error codes like
``errcode: -14`` for session expiry that the caller can inspect).
"""
if not text or not text.strip():
raise ValueError("_send_message: text must not be empty")
message: Dict[str, Any] = {
@ -411,7 +438,7 @@ async def _send_message(
}
if context_token:
message["context_token"] = context_token
await _api_post(
return await _api_post(
session,
base_url=base_url,
endpoint=EP_SEND_MESSAGE,
@ -533,6 +560,39 @@ async def _download_bytes(
return await response.read()
_WEIXIN_CDN_ALLOWLIST: frozenset[str] = frozenset(
{
"novac2c.cdn.weixin.qq.com",
"ilinkai.weixin.qq.com",
"wx.qlogo.cn",
"thirdwx.qlogo.cn",
"res.wx.qq.com",
"mmbiz.qpic.cn",
"mmbiz.qlogo.cn",
}
)
def _assert_weixin_cdn_url(url: str) -> None:
"""Raise ValueError if *url* does not point at a known WeChat CDN host."""
try:
parsed = urlparse(url)
scheme = parsed.scheme.lower()
host = parsed.hostname or ""
except Exception as exc: # noqa: BLE001
raise ValueError(f"Unparseable media URL: {url!r}") from exc
if scheme not in ("http", "https"):
raise ValueError(
f"Media URL has disallowed scheme {scheme!r}; only http/https are permitted."
)
if host not in _WEIXIN_CDN_ALLOWLIST:
raise ValueError(
f"Media URL host {host!r} is not in the WeChat CDN allowlist. "
"Refusing to fetch to prevent SSRF."
)
def _media_reference(item: Dict[str, Any], key: str) -> Dict[str, Any]:
return (item.get(key) or {}).get("media") or {}
@ -553,6 +613,7 @@ async def _download_and_decrypt_media(
timeout_seconds=timeout_seconds,
)
elif full_url:
_assert_weixin_cdn_url(full_url)
raw = await _download_bytes(session, url=full_url, timeout_seconds=timeout_seconds)
else:
raise RuntimeError("media item had neither encrypt_query_param nor full_url")
@ -623,42 +684,31 @@ def _rewrite_table_block_for_weixin(lines: List[str]) -> str:
def _normalize_markdown_blocks(content: str) -> str:
lines = content.splitlines()
result: List[str] = []
i = 0
in_code_block = False
blank_run = 0
while i < len(lines):
line = lines[i].rstrip()
fence_match = _FENCE_RE.match(line.strip())
if fence_match:
for raw_line in lines:
line = raw_line.rstrip()
if _FENCE_RE.match(line.strip()):
in_code_block = not in_code_block
result.append(line)
i += 1
blank_run = 0
continue
if in_code_block:
result.append(line)
i += 1
continue
if (
i + 1 < len(lines)
and "|" in lines[i]
and _TABLE_RULE_RE.match(lines[i + 1].rstrip())
):
table_lines = [lines[i].rstrip(), lines[i + 1].rstrip()]
i += 2
while i < len(lines) and "|" in lines[i]:
table_lines.append(lines[i].rstrip())
i += 1
result.append(_rewrite_table_block_for_weixin(table_lines))
if not line.strip():
blank_run += 1
if blank_run <= 1:
result.append("")
continue
result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
i += 1
blank_run = 0
result.append(line)
normalized = "\n".join(item.rstrip() for item in result)
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.strip()
return "\n".join(result).strip()
def _split_markdown_blocks(content: str) -> List[str]:
@ -704,8 +754,8 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]:
Weixin can render Markdown, but chat readability is better when top-level
line breaks become separate messages. Keep fenced code blocks intact and
attach indented continuation lines to the previous top-level line so
transformed tables/lists do not get torn apart.
attach indented continuation lines to the previous top-level line so nested
list items do not get torn apart.
"""
units: List[str] = []
@ -747,7 +797,9 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
return False
if line.startswith((" ", "\t")):
return False
if stripped.startswith((">", "-", "*", "")):
if stripped.startswith((">", "-", "*", "", "#", "|")):
return False
if _TABLE_RULE_RE.match(stripped):
return False
if re.match(r"^\*\*[^*]+\*\*$", stripped):
return False
@ -757,10 +809,12 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
def _looks_like_heading_line_for_weixin(line: str) -> bool:
"""Return True when a short line behaves like a plain-text heading."""
"""Return True when a short line behaves like a heading."""
stripped = line.strip()
if not stripped:
return False
if _HEADER_RE.match(stripped):
return True
return len(stripped) <= 24 and stripped.endswith((":", ""))
@ -935,7 +989,7 @@ async def qr_login(
if not AIOHTTP_AVAILABLE:
raise RuntimeError("aiohttp is required for Weixin QR login")
async with aiohttp.ClientSession(trust_env=True) as session:
async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
try:
qr_resp = await _api_get(
session,
@ -953,6 +1007,10 @@ async def qr_login(
logger.error("weixin: QR response missing qrcode")
return None
# qrcode_url is the full scannable liteapp URL; qrcode_value is just the hex token
# WeChat needs to scan the full URL, not the raw hex string
qr_scan_data = qrcode_url if qrcode_url else qrcode_value
print("\n请使用微信扫描以下二维码:")
if qrcode_url:
print(qrcode_url)
@ -960,11 +1018,11 @@ async def qr_login(
import qrcode
qr = qrcode.QRCode()
qr.add_data(qrcode_url or qrcode_value)
qr.add_data(qr_scan_data)
qr.make(fit=True)
qr.print_ascii(invert=True)
except Exception:
print("(终端二维码渲染失败,请直接打开上面的二维码链接)")
except Exception as _qr_exc:
print(f"(终端二维码渲染失败: {_qr_exc},请直接打开上面的二维码链接)")
deadline = time.time() + timeout_seconds
current_base_url = ILINK_BASE_URL
@ -1010,8 +1068,17 @@ async def qr_login(
)
qrcode_value = str(qr_resp.get("qrcode") or "")
qrcode_url = str(qr_resp.get("qrcode_img_content") or "")
qr_scan_data = qrcode_url if qrcode_url else qrcode_value
if qrcode_url:
print(qrcode_url)
try:
import qrcode as _qrcode
qr = _qrcode.QRCode()
qr.add_data(qr_scan_data)
qr.make(fit=True)
qr.print_ascii(invert=True)
except Exception:
pass
except Exception as exc:
logger.error("weixin: QR refresh failed: %s", exc)
return None
@ -1059,7 +1126,8 @@ class WeixinAdapter(BasePlatformAdapter):
self._hermes_home = hermes_home
self._token_store = ContextTokenStore(hermes_home)
self._typing_cache = TypingTicketCache()
self._session: Optional[aiohttp.ClientSession] = None
self._poll_session: Optional[aiohttp.ClientSession] = None
self._send_session: Optional[aiohttp.ClientSession] = None
self._poll_task: Optional[asyncio.Task] = None
self._dedup = MessageDeduplicator(ttl_seconds=MESSAGE_DEDUP_TTL_SECONDS)
@ -1134,14 +1202,17 @@ class WeixinAdapter(BasePlatformAdapter):
except Exception as exc:
logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)
self._session = aiohttp.ClientSession(trust_env=True)
self._poll_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
self._send_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
self._token_store.restore(self._account_id)
self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
self._mark_connected()
_LIVE_ADAPTERS[self._token] = self
logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
return True
async def disconnect(self) -> None:
_LIVE_ADAPTERS.pop(self._token, None)
self._running = False
if self._poll_task and not self._poll_task.done():
self._poll_task.cancel()
@ -1150,15 +1221,18 @@ class WeixinAdapter(BasePlatformAdapter):
except asyncio.CancelledError:
pass
self._poll_task = None
if self._session and not self._session.closed:
await self._session.close()
self._session = None
if self._poll_session and not self._poll_session.closed:
await self._poll_session.close()
self._poll_session = None
if self._send_session and not self._send_session.closed:
await self._send_session.close()
self._send_session = None
self._release_platform_lock()
self._mark_disconnected()
logger.info("[%s] Disconnected", self.name)
async def _poll_loop(self) -> None:
assert self._session is not None
assert self._poll_session is not None
sync_buf = _load_sync_buf(self._hermes_home, self._account_id)
timeout_ms = LONG_POLL_TIMEOUT_MS
consecutive_failures = 0
@ -1166,7 +1240,7 @@ class WeixinAdapter(BasePlatformAdapter):
while self._running:
try:
response = await _get_updates(
self._session,
self._poll_session,
base_url=self._base_url,
token=self._token,
sync_buf=sync_buf,
@ -1223,7 +1297,7 @@ class WeixinAdapter(BasePlatformAdapter):
logger.error("[%s] unhandled inbound error from=%s: %s", self.name, _safe_id(message.get("from_user_id")), exc, exc_info=True)
async def _process_message(self, message: Dict[str, Any]) -> None:
assert self._session is not None
assert self._poll_session is not None
sender_id = str(message.get("from_user_id") or "").strip()
if not sender_id:
return
@ -1316,7 +1390,7 @@ class WeixinAdapter(BasePlatformAdapter):
media = _media_reference(item, "image_item")
try:
data = await _download_and_decrypt_media(
self._session,
self._poll_session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=(item.get("image_item") or {}).get("aeskey")
@ -1334,7 +1408,7 @@ class WeixinAdapter(BasePlatformAdapter):
media = _media_reference(item, "video_item")
try:
data = await _download_and_decrypt_media(
self._session,
self._poll_session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@ -1353,7 +1427,7 @@ class WeixinAdapter(BasePlatformAdapter):
mime = _mime_from_filename(filename)
try:
data = await _download_and_decrypt_media(
self._session,
self._poll_session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@ -1372,7 +1446,7 @@ class WeixinAdapter(BasePlatformAdapter):
return None
try:
data = await _download_and_decrypt_media(
self._session,
self._poll_session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@ -1385,13 +1459,13 @@ class WeixinAdapter(BasePlatformAdapter):
return None
async def _maybe_fetch_typing_ticket(self, user_id: str, context_token: Optional[str]) -> None:
if not self._session or not self._token:
if not self._poll_session or not self._token:
return
if self._typing_cache.get(user_id):
return
try:
response = await _get_config(
self._session,
self._poll_session,
base_url=self._base_url,
token=self._token,
user_id=user_id,
@ -1416,12 +1490,19 @@ class WeixinAdapter(BasePlatformAdapter):
context_token: Optional[str],
client_id: str,
) -> None:
"""Send a single text chunk with per-chunk retry and backoff."""
"""Send a single text chunk with per-chunk retry and backoff.
On session-expired errors (errcode -14), automatically retries
*without* ``context_token`` iLink accepts tokenless sends as a
degraded fallback, which keeps cron-initiated push messages working
even when no user message has refreshed the session recently.
"""
last_error: Optional[Exception] = None
retried_without_token = False
for attempt in range(self._send_chunk_retries + 1):
try:
await _send_message(
self._session,
resp = await _send_message(
self._send_session,
base_url=self._base_url,
token=self._token,
to=chat_id,
@ -1429,6 +1510,31 @@ class WeixinAdapter(BasePlatformAdapter):
context_token=context_token,
client_id=client_id,
)
# Check iLink response for session-expired error
if resp and isinstance(resp, dict):
ret = resp.get("ret")
errcode = resp.get("errcode")
if (ret is not None and ret not in (0,)) or (errcode is not None and errcode not in (0,)):
is_session_expired = (
ret == SESSION_EXPIRED_ERRCODE
or errcode == SESSION_EXPIRED_ERRCODE
)
# Session expired — strip token and retry once
if is_session_expired and not retried_without_token and context_token:
retried_without_token = True
context_token = None
self._token_store._cache.pop(
self._token_store._key(self._account_id, chat_id), None
)
logger.warning(
"[%s] session expired for %s; retrying without context_token",
self.name, _safe_id(chat_id),
)
continue
errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
raise RuntimeError(
f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
)
return
except Exception as exc:
last_error = exc
@ -1456,12 +1562,48 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._session or not self._token:
if not self._send_session or not self._token:
return SendResult(success=False, error="Not connected")
context_token = self._token_store.get(self._account_id, chat_id)
last_message_id: Optional[str] = None
# Extract MEDIA: tags and bare local file paths before text delivery.
media_files, cleaned_content = self.extract_media(content)
_, image_cleaned = self.extract_images(cleaned_content)
local_files, final_content = self.extract_local_files(image_cleaned)
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
async def _deliver_media(path: str, is_voice: bool = False) -> None:
ext = Path(path).suffix.lower()
if is_voice or ext in _AUDIO_EXTS:
await self.send_voice(chat_id=chat_id, audio_path=path, metadata=metadata)
elif ext in _VIDEO_EXTS:
await self.send_video(chat_id=chat_id, video_path=path, metadata=metadata)
elif ext in _IMAGE_EXTS:
await self.send_image_file(chat_id=chat_id, image_path=path, metadata=metadata)
else:
await self.send_document(chat_id=chat_id, file_path=path, metadata=metadata)
try:
chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
# Deliver extracted MEDIA: attachments first.
for media_path, is_voice in media_files:
try:
await _deliver_media(media_path, is_voice)
except Exception as exc:
logger.warning("[%s] media delivery failed for %s: %s", self.name, media_path, exc)
# Deliver bare local file paths.
for file_path in local_files:
try:
await _deliver_media(file_path, is_voice=False)
except Exception as exc:
logger.warning("[%s] local file delivery failed for %s: %s", self.name, file_path, exc)
# Deliver text content.
chunks = [c for c in self._split_text(self.format_message(final_content)) if c and c.strip()]
for idx, chunk in enumerate(chunks):
client_id = f"hermes-weixin-{uuid.uuid4().hex}"
await self._send_text_chunk(
@ -1479,14 +1621,14 @@ class WeixinAdapter(BasePlatformAdapter):
return SendResult(success=False, error=str(exc))
async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
if not self._session or not self._token:
if not self._send_session or not self._token:
return
typing_ticket = self._typing_cache.get(chat_id)
if not typing_ticket:
return
try:
await _send_typing(
self._session,
self._send_session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@ -1497,14 +1639,14 @@ class WeixinAdapter(BasePlatformAdapter):
logger.debug("[%s] typing start failed for %s: %s", self.name, _safe_id(chat_id), exc)
async def stop_typing(self, chat_id: str) -> None:
if not self._session or not self._token:
if not self._send_session or not self._token:
return
typing_ticket = self._typing_cache.get(chat_id)
if not typing_ticket:
return
try:
await _send_typing(
self._session,
self._send_session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@ -1542,24 +1684,35 @@ class WeixinAdapter(BasePlatformAdapter):
async def send_image_file(
self,
chat_id: str,
path: str,
caption: str = "",
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata)
del reply_to, kwargs
return await self.send_document(
chat_id=chat_id,
file_path=image_path,
caption=caption,
metadata=metadata,
)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: str = "",
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
if not self._session or not self._token:
del file_name, reply_to, metadata, kwargs
if not self._send_session or not self._token:
return SendResult(success=False, error="Not connected")
try:
message_id = await self._send_file(chat_id, file_path, caption)
message_id = await self._send_file(chat_id, file_path, caption or "")
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
@ -1573,7 +1726,7 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._session or not self._token:
if not self._send_session or not self._token:
return SendResult(success=False, error="Not connected")
try:
message_id = await self._send_file(chat_id, video_path, caption or "")
@ -1590,7 +1743,24 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
if not self._send_session or not self._token:
return SendResult(success=False, error="Not connected")
# Native outbound Weixin voice bubbles are not proven-working in the
# upstream reference implementation. Prefer a reliable file attachment
# fallback so users at least receive playable audio, even for .silk.
fallback_caption = caption or "[voice message as attachment]"
try:
message_id = await self._send_file(
chat_id,
audio_path,
fallback_caption,
force_file_attachment=True,
)
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
return SendResult(success=False, error=str(exc))
async def _download_remote_media(self, url: str) -> str:
from tools.url_safety import is_safe_url
@ -1598,8 +1768,8 @@ class WeixinAdapter(BasePlatformAdapter):
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {url}")
assert self._session is not None
async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
assert self._send_session is not None
async with self._send_session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
response.raise_for_status()
data = await response.read()
suffix = Path(url.split("?", 1)[0]).suffix or ".bin"
@ -1607,16 +1777,22 @@ class WeixinAdapter(BasePlatformAdapter):
handle.write(data)
return handle.name
async def _send_file(self, chat_id: str, path: str, caption: str) -> str:
assert self._session is not None and self._token is not None
async def _send_file(
self,
chat_id: str,
path: str,
caption: str,
force_file_attachment: bool = False,
) -> str:
assert self._send_session is not None and self._token is not None
plaintext = Path(path).read_bytes()
media_type, item_builder = self._outbound_media_builder(path)
media_type, item_builder = self._outbound_media_builder(path, force_file_attachment=force_file_attachment)
filekey = secrets.token_hex(16)
aes_key = secrets.token_bytes(16)
rawsize = len(plaintext)
rawfilemd5 = hashlib.md5(plaintext).hexdigest()
upload_response = await _get_upload_url(
self._session,
self._send_session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@ -1642,30 +1818,34 @@ class WeixinAdapter(BasePlatformAdapter):
raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")
encrypted_query_param = await _upload_ciphertext(
self._session,
self._send_session,
ciphertext=ciphertext,
upload_url=upload_url,
)
context_token = self._token_store.get(self._account_id, chat_id)
# The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
# Sending base64(raw_bytes) causes images to show as grey boxes on the
# receiver side because the decryption key doesn't match.
aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
media_item = item_builder(
encrypt_query_param=encrypted_query_param,
aes_key_for_api=aes_key_for_api,
ciphertext_size=len(ciphertext),
plaintext_size=rawsize,
filename=Path(path).name,
rawfilemd5=rawfilemd5,
)
item_kwargs = {
"encrypt_query_param": encrypted_query_param,
"aes_key_for_api": aes_key_for_api,
"ciphertext_size": len(ciphertext),
"plaintext_size": rawsize,
"filename": Path(path).name,
"rawfilemd5": rawfilemd5,
}
if media_type == MEDIA_VOICE and path.endswith(".silk"):
item_kwargs["encode_type"] = 6
item_kwargs["sample_rate"] = 24000
item_kwargs["bits_per_sample"] = 16
media_item = item_builder(**item_kwargs)
last_message_id = None
if caption:
last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
await _send_message(
self._session,
self._send_session,
base_url=self._base_url,
token=self._token,
to=chat_id,
@ -1676,7 +1856,7 @@ class WeixinAdapter(BasePlatformAdapter):
last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
await _api_post(
self._session,
self._send_session,
base_url=self._base_url,
endpoint=EP_SEND_MESSAGE,
payload={
@ -1695,7 +1875,7 @@ class WeixinAdapter(BasePlatformAdapter):
)
return last_message_id
def _outbound_media_builder(self, path: str):
def _outbound_media_builder(self, path: str, force_file_attachment: bool = False):
mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
if mime.startswith("image/"):
return MEDIA_IMAGE, lambda **kw: {
@ -1723,7 +1903,7 @@ class WeixinAdapter(BasePlatformAdapter):
"video_md5": kw.get("rawfilemd5", ""),
},
}
if mime.startswith("audio/") or path.endswith(".silk"):
if path.endswith(".silk") and not force_file_attachment:
return MEDIA_VOICE, lambda **kw: {
"type": ITEM_VOICE,
"voice_item": {
@ -1732,9 +1912,25 @@ class WeixinAdapter(BasePlatformAdapter):
"aes_key": kw["aes_key_for_api"],
"encrypt_type": 1,
},
"encode_type": kw.get("encode_type"),
"bits_per_sample": kw.get("bits_per_sample"),
"sample_rate": kw.get("sample_rate"),
"playtime": kw.get("playtime", 0),
},
}
if mime.startswith("audio/"):
return MEDIA_FILE, lambda **kw: {
"type": ITEM_FILE,
"file_item": {
"media": {
"encrypt_query_param": kw["encrypt_query_param"],
"aes_key": kw["aes_key_for_api"],
"encrypt_type": 1,
},
"file_name": kw["filename"],
"len": str(kw["plaintext_size"]),
},
}
return MEDIA_FILE, lambda **kw: {
"type": ITEM_FILE,
"file_item": {
@ -1784,7 +1980,34 @@ async def send_weixin_direct(
token_store.restore(account_id)
context_token = token_store.get(account_id, chat_id)
async with aiohttp.ClientSession(trust_env=True) as session:
live_adapter = _LIVE_ADAPTERS.get(resolved_token)
send_session = getattr(live_adapter, '_send_session', None)
if live_adapter is not None and send_session is not None and not send_session.closed:
last_result: Optional[SendResult] = None
cleaned = live_adapter.format_message(message)
if cleaned:
last_result = await live_adapter.send(chat_id, cleaned)
if not last_result.success:
return {"error": f"Weixin send failed: {last_result.error}"}
for media_path, _is_voice in media_files or []:
ext = Path(media_path).suffix.lower()
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
last_result = await live_adapter.send_image_file(chat_id, media_path)
else:
last_result = await live_adapter.send_document(chat_id, media_path)
if not last_result.success:
return {"error": f"Weixin media send failed: {last_result.error}"}
return {
"success": True,
"platform": "weixin",
"chat_id": chat_id,
"message_id": last_result.message_id if last_result else None,
"context_token_used": bool(context_token),
}
async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
adapter = WeixinAdapter(
PlatformConfig(
enabled=True,
@ -1797,6 +2020,7 @@ async def send_weixin_direct(
},
)
)
adapter._send_session = session
adapter._session = session
adapter._token = resolved_token
adapter._account_id = account_id

File diff suppressed because it is too large Load diff

View file

@ -82,6 +82,7 @@ class SessionSource:
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number)
chat_id_alt: Optional[str] = None # Signal group internal ID
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
@property
def description(self) -> str:
@ -301,6 +302,8 @@ def build_session_context_prompt(
lines.append("")
lines.append("**Delivery options for scheduled tasks:**")
from hermes_constants import display_hermes_home
# Origin delivery
if context.source.platform == Platform.LOCAL:
lines.append("- `\"origin\"` → Local output (saved to files)")
@ -309,9 +312,11 @@ def build_session_context_prompt(
_hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
)
lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
# Local always available
lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
lines.append(
f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
)
# Platform home channels
for platform, home in context.home_channels.items():
@ -797,6 +802,57 @@ class SessionStore:
return True
return False
def prune_old_entries(self, max_age_days: int) -> int:
"""Drop SessionEntry records older than max_age_days.
Pruning is based on ``updated_at`` (last activity), not ``created_at``.
A session that's been active within the window is kept regardless of
how old it is. Entries marked ``suspended`` are kept the user
explicitly paused them for later resume. Entries held by an active
process (via has_active_processes_fn) are also kept so long-running
background work isn't orphaned.
Pruning is functionally identical to a natural reset-policy expiry:
the transcript in SQLite stays, but the session_key session_id
mapping is dropped and the user starts a fresh session on return.
``max_age_days <= 0`` disables pruning; returns 0 immediately.
Returns the number of entries removed.
"""
if max_age_days is None or max_age_days <= 0:
return 0
from datetime import timedelta
cutoff = _now() - timedelta(days=max_age_days)
removed_keys: list[str] = []
with self._lock:
self._ensure_loaded_locked()
for key, entry in list(self._entries.items()):
if entry.suspended:
continue
# Never prune sessions with an active background process
# attached — the user may still be waiting on output.
if self._has_active_processes_fn is not None:
try:
if self._has_active_processes_fn(entry.session_id):
continue
except Exception:
pass
if entry.updated_at < cutoff:
removed_keys.append(key)
for key in removed_keys:
self._entries.pop(key, None)
if removed_keys:
self._save()
if removed_keys:
logger.info(
"SessionStore pruned %d entries older than %d days",
len(removed_keys), max_age_days,
)
return len(removed_keys)
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
"""Mark recently-active sessions as suspended.

View file

@ -37,18 +37,24 @@ needs to replace the import + call site:
"""
from contextvars import ContextVar
from typing import Any
# Sentinel to distinguish "never set in this context" from "explicitly set to empty".
# When a contextvar holds _UNSET, we fall back to os.environ (CLI/cron compat).
# When it holds "" (after clear_session_vars resets it), we return "" — no fallback.
_UNSET: Any = object()
# ---------------------------------------------------------------------------
# Per-task session variables
# ---------------------------------------------------------------------------
_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="")
_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="")
_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="")
_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="")
_SESSION_USER_ID: ContextVar[str] = ContextVar("HERMES_SESSION_USER_ID", default="")
_SESSION_USER_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_USER_NAME", default="")
_SESSION_KEY: ContextVar[str] = ContextVar("HERMES_SESSION_KEY", default="")
_SESSION_PLATFORM: ContextVar = ContextVar("HERMES_SESSION_PLATFORM", default=_UNSET)
_SESSION_CHAT_ID: ContextVar = ContextVar("HERMES_SESSION_CHAT_ID", default=_UNSET)
_SESSION_CHAT_NAME: ContextVar = ContextVar("HERMES_SESSION_CHAT_NAME", default=_UNSET)
_SESSION_THREAD_ID: ContextVar = ContextVar("HERMES_SESSION_THREAD_ID", default=_UNSET)
_SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNSET)
_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
_VAR_MAP = {
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
@ -91,10 +97,17 @@ def set_session_vars(
def clear_session_vars(tokens: list) -> None:
"""Restore session context variables to their pre-handler values."""
if not tokens:
return
vars_in_order = [
"""Mark session context variables as explicitly cleared.
Sets all variables to ``""`` so that ``get_session_env`` returns an empty
string instead of falling back to (potentially stale) ``os.environ``
values. The *tokens* argument is accepted for API compatibility with
callers that saved the return value of ``set_session_vars``, but the
actual clearing uses ``var.set("")`` rather than ``var.reset(token)``
to ensure the "explicitly cleared" state is distinguishable from
"never set" (which holds the ``_UNSET`` sentinel).
"""
for var in (
_SESSION_PLATFORM,
_SESSION_CHAT_ID,
_SESSION_CHAT_NAME,
@ -102,9 +115,8 @@ def clear_session_vars(tokens: list) -> None:
_SESSION_USER_ID,
_SESSION_USER_NAME,
_SESSION_KEY,
]
for var, token in zip(vars_in_order, tokens):
var.reset(token)
):
var.set("")
def get_session_env(name: str, default: str = "") -> str:
@ -113,8 +125,13 @@ def get_session_env(name: str, default: str = "") -> str:
Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``.
Resolution order:
1. Context variable (set by the gateway for concurrency-safe access)
2. ``os.environ`` (used by CLI, cron scheduler, and tests)
1. Context variable (set by the gateway for concurrency-safe access).
If the variable was explicitly set (even to ``""``) via
``set_session_vars`` or ``clear_session_vars``, that value is
returned **no fallback to os.environ**.
2. ``os.environ`` (only when the context variable was never set in
this context i.e. CLI, cron scheduler, and test processes that
don't use ``set_session_vars`` at all).
3. *default*
"""
import os
@ -122,7 +139,7 @@ def get_session_env(name: str, default: str = "") -> str:
var = _VAR_MAP.get(name)
if var is not None:
value = var.get()
if value:
if value is not _UNSET:
return value
# Fall back to os.environ for CLI, cron, and test compatibility
return os.getenv(name, default)

View file

@ -188,8 +188,8 @@ def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
path.write_text(json.dumps(payload))
def _read_pid_record() -> Optional[dict]:
pid_path = _get_pid_path()
def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
pid_path = pid_path or _get_pid_path()
if not pid_path.exists():
return None
@ -212,6 +212,18 @@ def _read_pid_record() -> Optional[dict]:
return None
def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
if not cleanup_stale:
return
try:
if pid_path == _get_pid_path():
remove_pid_file()
else:
pid_path.unlink(missing_ok=True)
except Exception:
pass
def write_pid_file() -> None:
"""Write the current process PID and metadata to the gateway PID file."""
_write_json_file(_get_pid_path(), _build_pid_record())
@ -413,43 +425,179 @@ def release_all_scoped_locks() -> int:
return removed
def get_running_pid() -> Optional[int]:
# ── --replace takeover marker ─────────────────────────────────────────
#
# When a new gateway starts with ``--replace``, it SIGTERMs the existing
# gateway so it can take over the bot token. PR #5646 made SIGTERM exit
# the gateway with code 1 so ``Restart=on-failure`` can revive it after
# unexpected kills — but that also means a --replace takeover target
# exits 1, which tricks systemd into reviving it 30 seconds later,
# starting a flap loop against the replacer when both services are
# enabled in the user's systemd (e.g. ``hermes.service`` + ``hermes-
# gateway.service``).
#
# The takeover marker breaks the loop: the replacer writes a short-lived
# file naming the target PID + start_time BEFORE sending SIGTERM.
# The target's shutdown handler reads the marker and, if it names
# this process, treats the SIGTERM as a planned takeover and exits 0.
# The marker is unlinked after the target has consumed it, so a stale
# marker left by a crashed replacer can grief at most one future
# shutdown on the same PID — and only within _TAKEOVER_MARKER_TTL_S.
_TAKEOVER_MARKER_FILENAME = ".gateway-takeover.json"
_TAKEOVER_MARKER_TTL_S = 60 # Marker older than this is treated as stale
def _get_takeover_marker_path() -> Path:
"""Return the path to the --replace takeover marker file."""
home = get_hermes_home()
return home / _TAKEOVER_MARKER_FILENAME
def write_takeover_marker(target_pid: int) -> bool:
"""Record that ``target_pid`` is being replaced by the current process.
Captures the target's ``start_time`` so that PID reuse after the
target exits cannot later match the marker. Also records the
replacer's PID and a UTC timestamp for TTL-based staleness checks.
Returns True on successful write, False on any failure. The caller
should proceed with the SIGTERM even if the write fails (the marker
is a best-effort signal, not a correctness requirement).
"""
try:
target_start_time = _get_process_start_time(target_pid)
record = {
"target_pid": target_pid,
"target_start_time": target_start_time,
"replacer_pid": os.getpid(),
"written_at": _utc_now_iso(),
}
_write_json_file(_get_takeover_marker_path(), record)
return True
except (OSError, PermissionError):
return False
def consume_takeover_marker_for_self() -> bool:
"""Check & unlink the takeover marker if it names the current process.
Returns True only when a valid (non-stale) marker names this PID +
start_time. A returning True indicates the current SIGTERM is a
planned --replace takeover; the caller should exit 0 instead of
signalling ``_signal_initiated_shutdown``.
Always unlinks the marker on match (and on detected staleness) so
subsequent unrelated signals don't re-trigger.
"""
path = _get_takeover_marker_path()
record = _read_json_file(path)
if not record:
return False
# Any malformed or stale marker → drop it and return False
try:
target_pid = int(record["target_pid"])
target_start_time = record.get("target_start_time")
written_at = record.get("written_at") or ""
except (KeyError, TypeError, ValueError):
try:
path.unlink(missing_ok=True)
except OSError:
pass
return False
# TTL guard: a stale marker older than _TAKEOVER_MARKER_TTL_S is ignored.
stale = False
try:
written_dt = datetime.fromisoformat(written_at)
age = (datetime.now(timezone.utc) - written_dt).total_seconds()
if age > _TAKEOVER_MARKER_TTL_S:
stale = True
except (TypeError, ValueError):
stale = True # Unparseable timestamp — treat as stale
if stale:
try:
path.unlink(missing_ok=True)
except OSError:
pass
return False
# Does the marker name THIS process?
our_pid = os.getpid()
our_start_time = _get_process_start_time(our_pid)
matches = (
target_pid == our_pid
and target_start_time is not None
and our_start_time is not None
and target_start_time == our_start_time
)
# Consume the marker whether it matched or not — a marker that doesn't
# match our identity is stale-for-us anyway.
try:
path.unlink(missing_ok=True)
except OSError:
pass
return matches
def clear_takeover_marker() -> None:
"""Remove the takeover marker unconditionally. Safe to call repeatedly."""
try:
_get_takeover_marker_path().unlink(missing_ok=True)
except OSError:
pass
def get_running_pid(
pid_path: Optional[Path] = None,
*,
cleanup_stale: bool = True,
) -> Optional[int]:
"""Return the PID of a running gateway instance, or ``None``.
Checks the PID file and verifies the process is actually alive.
Cleans up stale PID files automatically.
"""
record = _read_pid_record()
resolved_pid_path = pid_path or _get_pid_path()
record = _read_pid_record(resolved_pid_path)
if not record:
remove_pid_file()
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
try:
pid = int(record["pid"])
except (KeyError, TypeError, ValueError):
remove_pid_file()
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
try:
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
except (ProcessLookupError, PermissionError):
remove_pid_file()
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
recorded_start = record.get("start_time")
current_start = _get_process_start_time(pid)
if recorded_start is not None and current_start is not None and current_start != recorded_start:
remove_pid_file()
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
if not _looks_like_gateway_process(pid):
if not _record_looks_like_gateway(record):
remove_pid_file()
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
return pid
def is_gateway_running() -> bool:
def is_gateway_running(
pid_path: Optional[Path] = None,
*,
cleanup_stale: bool = True,
) -> bool:
"""Check if the gateway daemon is currently running."""
return get_running_pid() is not None
return get_running_pid(pid_path, cleanup_stale=cleanup_stale) is not None

View file

@ -43,6 +43,7 @@ class StreamConsumerConfig:
edit_interval: float = 1.0
buffer_threshold: int = 40
cursor: str = ""
buffer_only: bool = False
class GatewayStreamConsumer:
@ -99,6 +100,14 @@ class GatewayStreamConsumer:
self._flood_strikes = 0 # Consecutive flood-control edit failures
self._current_edit_interval = self.cfg.edit_interval # Adaptive backoff
self._final_response_sent = False
# Cache adapter lifecycle capability: only platforms that need an
# explicit finalize call (e.g. DingTalk AI Cards) force us to make
# a redundant final edit. Everyone else keeps the fast path.
# Use ``is True`` (not ``bool(...)``) so MagicMock attribute access
# in tests doesn't incorrectly enable this path.
self._adapter_requires_finalize: bool = (
getattr(adapter, "REQUIRES_EDIT_FINALIZE", False) is True
)
# Think-block filter state (mirrors CLI's _stream_delta tag suppression)
self._in_think_block = False
@ -295,10 +304,13 @@ class GatewayStreamConsumer:
got_done
or got_segment_break
or commentary_text is not None
or (elapsed >= self._current_edit_interval
and self._accumulated)
or len(self._accumulated) >= self.cfg.buffer_threshold
)
if not self.cfg.buffer_only:
should_edit = should_edit or (
(elapsed >= self._current_edit_interval
and self._accumulated)
or len(self._accumulated) >= self.cfg.buffer_threshold
)
current_update_visible = False
if should_edit and self._accumulated:
@ -357,7 +369,16 @@ class GatewayStreamConsumer:
if not got_done and not got_segment_break and commentary_text is None:
display_text += self.cfg.cursor
current_update_visible = await self._send_or_edit(display_text)
# Segment break: finalize the current message so platforms
# that need explicit closure (e.g. DingTalk AI Cards) don't
# leave the previous segment stuck in a loading state when
# the next segment (tool progress, next chunk) creates a
# new message below it. got_done has its own finalize
# path below so we don't finalize here for it.
current_update_visible = await self._send_or_edit(
display_text,
finalize=got_segment_break,
)
self._last_edit_time = time.monotonic()
if got_done:
@ -368,10 +389,22 @@ class GatewayStreamConsumer:
if self._accumulated:
if self._fallback_final_send:
await self._send_fallback_final(self._accumulated)
elif current_update_visible:
elif (
current_update_visible
and not self._adapter_requires_finalize
):
# Mid-stream edit above already delivered the
# final accumulated content. Skip the redundant
# final edit — but only for adapters that don't
# need an explicit finalize signal.
self._final_response_sent = True
elif self._message_id:
self._final_response_sent = await self._send_or_edit(self._accumulated)
# Either the mid-stream edit didn't run (no
# visible update this tick) OR the adapter needs
# explicit finalize=True to close the stream.
self._final_response_sent = await self._send_or_edit(
self._accumulated, finalize=True,
)
elif not self._already_sent:
self._final_response_sent = await self._send_or_edit(self._accumulated)
return
@ -403,18 +436,20 @@ class GatewayStreamConsumer:
except asyncio.CancelledError:
# Best-effort final edit on cancellation
_best_effort_ok = False
if self._accumulated and self._message_id:
try:
await self._send_or_edit(self._accumulated)
_best_effort_ok = bool(await self._send_or_edit(self._accumulated))
except Exception:
pass
# If we delivered any content before being cancelled, mark the
# final response as sent so the gateway's already_sent check
# doesn't trigger a duplicate message. The 5-second
# stream_task timeout (gateway/run.py) can cancel us while
# waiting on a slow Telegram API call — without this flag the
# gateway falls through to the normal send path.
if self._already_sent:
# Only confirm final delivery if the best-effort send above
# actually succeeded OR if the final response was already
# confirmed before we were cancelled. Previously this
# promoted any partial send (already_sent=True) to
# final_response_sent — which suppressed the gateway's
# fallback send even when only intermediate text (e.g.
# "Let me search…") had been delivered, not the real answer.
if _best_effort_ok and not self._final_response_sent:
self._final_response_sent = True
except Exception as e:
logger.error("Stream consumer error: %s", e)
@ -513,9 +548,17 @@ class GatewayStreamConsumer:
self._fallback_final_send = False
if not continuation.strip():
# Nothing new to send — the visible partial already matches final text.
self._already_sent = True
self._final_response_sent = True
return
# BUT: if final_text itself has meaningful content (e.g. a timeout
# message after a long tool call), the prefix-based continuation
# calculation may wrongly conclude "already shown" because the
# streamed prefix was from a *previous* segment (before the tool
# boundary). In that case, send the full final_text as-is (#10807).
if final_text.strip() and final_text != self._visible_prefix():
continuation = final_text
else:
self._already_sent = True
self._final_response_sent = True
return
raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
safe_limit = max(500, raw_limit - 100)
@ -609,19 +652,25 @@ class GatewayStreamConsumer:
content=text,
metadata=self.metadata,
)
if result.success:
self._already_sent = True
return True
# Note: do NOT set _already_sent = True here.
# Commentary messages are interim status updates (e.g. "Using browser
# tool..."), not the final response. Setting already_sent would cause
# the final response to be incorrectly suppressed when there are
# multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
return result.success
except Exception as e:
logger.error("Commentary send error: %s", e)
return False
return False
async def _send_or_edit(self, text: str) -> bool:
async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
"""Send or edit the streaming message.
Returns True if the text was successfully delivered (sent or edited),
False otherwise. Callers like the overflow split loop use this to
decide whether to advance past the delivered chunk.
``finalize`` is True when this is the last edit in a streaming
sequence.
"""
# Strip MEDIA: directives so they don't appear as visible text.
# Media files are delivered as native attachments after the stream
@ -655,14 +704,22 @@ class GatewayStreamConsumer:
try:
if self._message_id is not None:
if self._edit_supported:
# Skip if text is identical to what we last sent
if text == self._last_sent_text:
# Skip if text is identical to what we last sent.
# Exception: adapters that require an explicit finalize
# call (REQUIRES_EDIT_FINALIZE) must still receive the
# finalize=True edit even when content is unchanged, so
# their streaming UI can transition out of the in-
# progress state. Everyone else short-circuits.
if text == self._last_sent_text and not (
finalize and self._adapter_requires_finalize
):
return True
# Edit existing message
result = await self.adapter.edit_message(
chat_id=self.chat_id,
message_id=self._message_id,
content=text,
finalize=finalize,
)
if result.success:
self._already_sent = True

View file

@ -11,5 +11,5 @@ Provides subcommands for:
- hermes cron - Manage cron jobs
"""
__version__ = "0.9.0"
__release_date__ = "2026.4.13"
__version__ = "0.10.0"
__release_date__ = "2026.4.16"

View file

@ -70,6 +70,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@ -77,6 +78,10 @@ QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
# Google Gemini OAuth (google-gemini-cli provider, Cloud Code Assist backend)
DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60 # refresh 60s before expiry
# =============================================================================
# Provider Registry
@ -121,6 +126,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="oauth_external",
inference_base_url=DEFAULT_QWEN_BASE_URL,
),
"google-gemini-cli": ProviderConfig(
id="google-gemini-cli",
name="Google Gemini (OAuth)",
auth_type="oauth_external",
inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
),
"copilot": ProviderConfig(
id="copilot",
name="GitHub Copilot",
@ -222,6 +233,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XAI_API_KEY",),
base_url_env_var="XAI_BASE_URL",
),
"nvidia": ProviderConfig(
id="nvidia",
name="NVIDIA NIM",
auth_type="api_key",
inference_base_url="https://integrate.api.nvidia.com/v1",
api_key_env_vars=("NVIDIA_API_KEY",),
base_url_env_var="NVIDIA_BASE_URL",
),
"ai-gateway": ProviderConfig(
id="ai-gateway",
name="Vercel AI Gateway",
@ -274,6 +293,22 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XIAOMI_API_KEY",),
base_url_env_var="XIAOMI_BASE_URL",
),
"ollama-cloud": ProviderConfig(
id="ollama-cloud",
name="Ollama Cloud",
auth_type="api_key",
inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL,
api_key_env_vars=("OLLAMA_API_KEY",),
base_url_env_var="OLLAMA_BASE_URL",
),
"bedrock": ProviderConfig(
id="bedrock",
name="AWS Bedrock",
auth_type="aws_sdk",
inference_base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
api_key_env_vars=(),
base_url_env_var="BEDROCK_BASE_URL",
),
}
@ -746,6 +781,28 @@ def is_source_suppressed(provider_id: str, source: str) -> bool:
return False
def unsuppress_credential_source(provider_id: str, source: str) -> bool:
"""Clear a suppression marker so the source will be re-seeded on the next load.
Returns True if a marker was cleared, False if no marker existed.
"""
with _auth_store_lock():
auth_store = _load_auth_store()
suppressed = auth_store.get("suppressed_sources")
if not isinstance(suppressed, dict):
return False
provider_list = suppressed.get(provider_id)
if not isinstance(provider_list, list) or source not in provider_list:
return False
provider_list.remove(source)
if not provider_list:
suppressed.pop(provider_id, None)
if not suppressed:
auth_store.pop("suppressed_sources", None)
_save_auth_store(auth_store)
return True
def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
"""Return persisted auth state for a provider, or None."""
auth_store = _load_auth_store()
@ -911,6 +968,7 @@ def resolve_provider(
_PROVIDER_ALIASES = {
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"arcee-ai": "arcee", "arceeai": "arcee",
@ -921,14 +979,16 @@ def resolve_provider(
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
"opencode": "opencode-zen", "zen": "opencode-zen",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
"go": "opencode-go", "opencode-go-sub": "opencode-go",
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
# Local server aliases — route through the generic custom provider
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
"ollama": "custom", "ollama_cloud": "ollama-cloud",
"vllm": "custom", "llamacpp": "custom",
"llama.cpp": "custom", "llama-cpp": "custom",
}
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
@ -980,6 +1040,15 @@ def resolve_provider(
if has_usable_secret(os.getenv(env_var, "")):
return pid
# AWS Bedrock — detect via boto3 credential chain (IAM roles, SSO, env vars).
# This runs after API-key providers so explicit keys always win.
try:
from agent.bedrock_adapter import has_aws_credentials
if has_aws_credentials():
return "bedrock"
except ImportError:
pass # boto3 not installed — skip Bedrock auto-detection
raise AuthError(
"No inference provider configured. Run 'hermes model' to choose a "
"provider and model, or set an API key (OPENROUTER_API_KEY, "
@ -1222,6 +1291,83 @@ def get_qwen_auth_status() -> Dict[str, Any]:
}
# =============================================================================
# Google Gemini OAuth (google-gemini-cli) — PKCE flow + Cloud Code Assist.
#
# Tokens live in ~/.hermes/auth/google_oauth.json (managed by agent.google_oauth).
# The `base_url` here is the marker "cloudcode-pa://google" that run_agent.py
# uses to construct a GeminiCloudCodeClient instead of the default OpenAI SDK.
# Actual HTTP traffic goes to https://cloudcode-pa.googleapis.com/v1internal:*.
# =============================================================================
def resolve_gemini_oauth_runtime_credentials(
*,
force_refresh: bool = False,
) -> Dict[str, Any]:
"""Resolve runtime OAuth creds for google-gemini-cli."""
try:
from agent.google_oauth import (
GoogleOAuthError,
_credentials_path,
get_valid_access_token,
load_credentials,
)
except ImportError as exc:
raise AuthError(
f"agent.google_oauth is not importable: {exc}",
provider="google-gemini-cli",
code="google_oauth_module_missing",
) from exc
try:
access_token = get_valid_access_token(force_refresh=force_refresh)
except GoogleOAuthError as exc:
raise AuthError(
str(exc),
provider="google-gemini-cli",
code=exc.code,
) from exc
creds = load_credentials()
base_url = DEFAULT_GEMINI_CLOUDCODE_BASE_URL
return {
"provider": "google-gemini-cli",
"base_url": base_url,
"api_key": access_token,
"source": "google-oauth",
"expires_at_ms": (creds.expires_ms if creds else None),
"auth_file": str(_credentials_path()),
"email": (creds.email if creds else "") or "",
"project_id": (creds.project_id if creds else "") or "",
}
def get_gemini_oauth_auth_status() -> Dict[str, Any]:
"""Return a status dict for `hermes auth list` / `hermes status`."""
try:
from agent.google_oauth import _credentials_path, load_credentials
except ImportError:
return {"logged_in": False, "error": "agent.google_oauth unavailable"}
auth_path = _credentials_path()
creds = load_credentials()
if creds is None or not creds.access_token:
return {
"logged_in": False,
"auth_file": str(auth_path),
"error": "not logged in",
}
return {
"logged_in": True,
"auth_file": str(auth_path),
"source": "google-oauth",
"api_key": creds.access_token,
"expires_at_ms": creds.expires_ms,
"email": creds.email,
"project_id": creds.project_id,
}
# =============================================================================
# SSH / remote session detection
# =============================================================================
@ -2013,6 +2159,62 @@ def refresh_nous_oauth_from_state(
)
NOUS_DEVICE_CODE_SOURCE = "device_code"
def persist_nous_credentials(
creds: Dict[str, Any],
*,
label: Optional[str] = None,
):
"""Persist minted Nous OAuth credentials as the singleton provider state
and ensure the credential pool is in sync.
Nous credentials are read at runtime from two independent locations:
- ``providers.nous``: singleton state read by
``resolve_nous_runtime_credentials()`` during 401 recovery and by
``_seed_from_singletons()`` during pool load.
- ``credential_pool.nous``: used by the runtime ``pool.select()`` path.
Historically ``hermes auth add nous`` wrote a ``manual:device_code`` pool
entry only, skipping ``providers.nous``. When the 24h agent_key TTL
expired, the recovery path read the empty singleton state and raised
``AuthError`` silently (``logger.debug`` at INFO level).
This helper writes ``providers.nous`` then calls ``load_pool("nous")`` so
``_seed_from_singletons`` materialises the canonical ``device_code`` pool
entry from the singleton. Re-running login upserts the same entry in
place; the pool never accumulates duplicate device_code rows.
``label`` is an optional user-chosen display name (from
``hermes auth add nous --label <name>``). It gets embedded in the
singleton state so that ``_seed_from_singletons`` uses it as the pool
entry's label on every subsequent ``load_pool("nous")`` instead of the
auto-derived token fingerprint. When ``None``, the auto-derived label
via ``label_from_token`` is used (unchanged default behaviour).
Returns the upserted :class:`PooledCredential` entry (or ``None`` if
seeding somehow produced no match shouldn't happen).
"""
from agent.credential_pool import load_pool
state = dict(creds)
if label and str(label).strip():
state["label"] = str(label).strip()
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "nous", state)
_save_auth_store(auth_store)
pool = load_pool("nous")
return next(
(e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE),
None,
)
def resolve_nous_runtime_credentials(
*,
min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
@ -2384,7 +2586,7 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id == "kimi-coding":
if provider_id in ("kimi-coding", "kimi-coding-cn"):
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif env_url:
base_url = env_url
@ -2440,12 +2642,21 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_codex_auth_status()
if target == "qwen-oauth":
return get_qwen_auth_status()
if target == "google-gemini-cli":
return get_gemini_oauth_auth_status()
if target == "copilot-acp":
return get_external_process_provider_status(target)
# API-key providers
pconfig = PROVIDER_REGISTRY.get(target)
if pconfig and pconfig.auth_type == "api_key":
return get_api_key_provider_status(target)
# AWS SDK providers (Bedrock) — check via boto3 credential chain
if pconfig and pconfig.auth_type == "aws_sdk":
try:
from agent.bedrock_adapter import has_aws_credentials
return {"logged_in": has_aws_credentials(), "provider": target}
except ImportError:
return {"logged_in": False, "provider": target, "error": "boto3 not installed"}
return {"logged_in": False}
@ -2470,7 +2681,7 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id == "kimi-coding":
if provider_id in ("kimi-coding", "kimi-coding-cn"):
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif provider_id == "zai":
base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
@ -3172,6 +3383,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
inference_base_url = auth_state["inference_base_url"]
# Snapshot the prior active_provider BEFORE _save_provider_state
# overwrites it to "nous". If the user picks "Skip (keep current)"
# during model selection below, we restore this so the user's previous
# provider (e.g. openrouter) is preserved.
with _auth_store_lock():
_prior_store = _load_auth_store()
prior_active_provider = _prior_store.get("active_provider")
with _auth_store_lock():
auth_store = _load_auth_store()
_save_provider_state(auth_store, "nous", auth_state)
@ -3231,6 +3450,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
print(f"Login succeeded, but could not fetch available models. Reason: {message}")
# Write provider + model atomically so config is never mismatched.
# If no model was selected (user picked "Skip (keep current)",
# model list fetch failed, or no curated models were available),
# preserve the user's previous provider — don't silently switch
# them to Nous with a mismatched model. The Nous OAuth tokens
# stay saved for future use.
if not selected_model:
# Restore the prior active_provider that _save_provider_state
# overwrote to "nous". config.yaml model.provider is left
# untouched, so the user's previous provider is fully preserved.
with _auth_store_lock():
auth_store = _load_auth_store()
if prior_active_provider:
auth_store["active_provider"] = prior_active_provider
else:
auth_store.pop("active_provider", None)
_save_auth_store(auth_store)
print()
print("No provider change. Nous credentials saved for future use.")
print(" Run `hermes model` again to switch to Nous Portal.")
return
config_path = _update_config_for_provider(
"nous", inference_base_url, default_model=selected_model,
)

View file

@ -4,6 +4,7 @@ from __future__ import annotations
from getpass import getpass
import math
import sys
import time
from types import SimpleNamespace
import uuid
@ -32,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL
# Providers that support OAuth login in addition to API keys.
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
def _get_custom_provider_names() -> list:
@ -147,7 +148,7 @@ def auth_add_command(args) -> None:
if provider.startswith(CUSTOM_POOL_PREFIX):
requested_type = AUTH_TYPE_API_KEY
else:
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
pool = load_pool(provider)
@ -160,7 +161,10 @@ def auth_add_command(args) -> None:
default_label = _api_key_default_label(len(pool.entries()) + 1)
label = (getattr(args, "label", None) or "").strip()
if not label:
label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
if sys.stdin.isatty():
label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
else:
label = default_label
entry = PooledCredential(
provider=provider,
id=uuid.uuid4().hex[:6],
@ -213,22 +217,21 @@ def auth_add_command(args) -> None:
ca_bundle=getattr(args, "ca_bundle", None),
min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))),
)
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds.get("access_token", ""),
_oauth_default_label(provider, len(pool.entries()) + 1),
# Honor `--label <name>` so nous matches other providers' UX. The
# helper embeds this into providers.nous so that label_from_token
# doesn't overwrite it on every subsequent load_pool("nous").
custom_label = (getattr(args, "label", None) or "").strip() or None
entry = auth_mod.persist_nous_credentials(creds, label=custom_label)
shown_label = entry.label if entry is not None else label_from_token(
creds.get("access_token", ""), _oauth_default_label(provider, 1),
)
entry = PooledCredential.from_dict(provider, {
**creds,
"label": label,
"auth_type": AUTH_TYPE_OAUTH,
"source": f"{SOURCE_MANUAL}:device_code",
"base_url": creds.get("inference_base_url"),
})
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
print(f'Saved {provider} OAuth device-code credentials: "{shown_label}"')
return
if provider == "openai-codex":
# Clear any existing suppression marker so a re-link after `hermes auth
# remove openai-codex` works without the new tokens being skipped.
auth_mod.unsuppress_credential_source(provider, "device_code")
creds = auth_mod._codex_device_code_login()
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds["tokens"]["access_token"],
@ -250,6 +253,27 @@ def auth_add_command(args) -> None:
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "google-gemini-cli":
from agent.google_oauth import run_gemini_oauth_login_pure
creds = run_gemini_oauth_login_pure()
label = (getattr(args, "label", None) or "").strip() or (
creds.get("email") or _oauth_default_label(provider, len(pool.entries()) + 1)
)
entry = PooledCredential(
provider=provider,
id=uuid.uuid4().hex[:6],
label=label,
auth_type=AUTH_TYPE_OAUTH,
priority=0,
source=f"{SOURCE_MANUAL}:google_pkce",
access_token=creds["access_token"],
refresh_token=creds.get("refresh_token"),
)
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
return
if provider == "qwen-oauth":
creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False)
label = (getattr(args, "label", None) or "").strip() or label_from_token(
@ -327,7 +351,34 @@ def auth_remove_command(args) -> None:
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
# clear the underlying auth store / credential file so it doesn't get
# re-seeded on the next load_pool() call.
elif removed.source == "device_code" and provider in ("openai-codex", "nous"):
elif provider == "openai-codex" and (
removed.source == "device_code" or removed.source.endswith(":device_code")
):
# Codex tokens live in TWO places: the Hermes auth store and
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
# refresh_codex_oauth_pure() writes to both. So clearing only the
# Hermes auth store is not enough — _seed_from_singletons() will
# auto-import from ~/.codex/auth.json on the next load_pool() and
# the removal is instantly undone. Mark the source as suppressed
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
# the Codex CLI itself keeps working.
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
suppress_credential_source,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
suppress_credential_source(provider, "device_code")
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
print("Run `hermes auth add openai-codex` to re-enable if needed.")
elif removed.source == "device_code" and provider == "nous":
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
)
@ -368,6 +419,27 @@ def _interactive_auth() -> None:
print("=" * 50)
auth_list_command(SimpleNamespace(provider=None))
# Show AWS Bedrock credential status (not in the pool — uses boto3 chain)
try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
if has_aws_credentials():
auth_source = resolve_aws_auth_env_var() or "unknown"
region = resolve_bedrock_region()
print(f"bedrock (AWS SDK credential chain):")
print(f" Auth: {auth_source}")
print(f" Region: {region}")
try:
import boto3
sts = boto3.client("sts", region_name=region)
identity = sts.get_caller_identity()
arn = identity.get("Arn", "unknown")
print(f" Identity: {arn}")
except Exception:
print(f" Identity: (could not resolve — boto3 STS call failed)")
print()
except ImportError:
pass # boto3 or bedrock_adapter not available
print()
# Main menu

View file

@ -7,8 +7,8 @@ CLI tools that ship with the platform (or are commonly installed).
Platform support:
macOS osascript (always available), pngpaste (if installed)
Windows PowerShell via .NET System.Windows.Forms.Clipboard
WSL2 powershell.exe via .NET System.Windows.Forms.Clipboard
Windows PowerShell via WinForms, Get-Clipboard, file-drop fallback
WSL2 powershell.exe via WinForms, Get-Clipboard, file-drop fallback
Linux wl-paste (Wayland), xclip (X11)
"""
@ -46,10 +46,11 @@ def has_clipboard_image() -> bool:
return _macos_has_image()
if sys.platform == "win32":
return _windows_has_image()
if _is_wsl():
return _wsl_has_image()
if os.environ.get("WAYLAND_DISPLAY"):
return _wayland_has_image()
# Match _linux_save fallthrough order: WSL → Wayland → X11
if _is_wsl() and _wsl_has_image():
return True
if os.environ.get("WAYLAND_DISPLAY") and _wayland_has_image():
return True
return _xclip_has_image()
@ -135,6 +136,114 @@ _PS_EXTRACT_IMAGE = (
"[System.Convert]::ToBase64String($ms.ToArray())"
)
_PS_CHECK_IMAGE_GET_CLIPBOARD = (
"try { "
"$img = Get-Clipboard -Format Image -ErrorAction Stop;"
"if ($null -ne $img) { 'True' } else { 'False' }"
"} catch { 'False' }"
)
_PS_EXTRACT_IMAGE_GET_CLIPBOARD = (
"try { "
"Add-Type -AssemblyName System.Drawing;"
"Add-Type -AssemblyName PresentationCore;"
"Add-Type -AssemblyName WindowsBase;"
"$img = Get-Clipboard -Format Image -ErrorAction Stop;"
"if ($null -eq $img) { exit 1 }"
"$ms = New-Object System.IO.MemoryStream;"
"if ($img -is [System.Drawing.Image]) {"
"$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)"
"} elseif ($img -is [System.Windows.Media.Imaging.BitmapSource]) {"
"$enc = New-Object System.Windows.Media.Imaging.PngBitmapEncoder;"
"$enc.Frames.Add([System.Windows.Media.Imaging.BitmapFrame]::Create($img));"
"$enc.Save($ms)"
"} else { exit 2 }"
"[System.Convert]::ToBase64String($ms.ToArray())"
"} catch { exit 1 }"
)
_FILEDROP_IMAGE_EXTS = "'.png','.jpg','.jpeg','.gif','.webp','.bmp','.tiff','.tif'"
_PS_CHECK_FILEDROP_IMAGE = (
"try { "
"$files = Get-Clipboard -Format FileDropList -ErrorAction Stop;"
f"$exts = @({_FILEDROP_IMAGE_EXTS});"
"$hit = $files | Where-Object { $exts -contains ([System.IO.Path]::GetExtension($_).ToLowerInvariant()) } | Select-Object -First 1;"
"if ($null -ne $hit) { 'True' } else { 'False' }"
"} catch { 'False' }"
)
_PS_EXTRACT_FILEDROP_IMAGE = (
"try { "
"$files = Get-Clipboard -Format FileDropList -ErrorAction Stop;"
f"$exts = @({_FILEDROP_IMAGE_EXTS});"
"$hit = $files | Where-Object { $exts -contains ([System.IO.Path]::GetExtension($_).ToLowerInvariant()) } | Select-Object -First 1;"
"if ($null -eq $hit) { exit 1 }"
"[System.Convert]::ToBase64String([System.IO.File]::ReadAllBytes($hit))"
"} catch { exit 1 }"
)
_POWERSHELL_HAS_IMAGE_SCRIPTS = (
_PS_CHECK_IMAGE,
_PS_CHECK_IMAGE_GET_CLIPBOARD,
_PS_CHECK_FILEDROP_IMAGE,
)
_POWERSHELL_EXTRACT_IMAGE_SCRIPTS = (
_PS_EXTRACT_IMAGE,
_PS_EXTRACT_IMAGE_GET_CLIPBOARD,
_PS_EXTRACT_FILEDROP_IMAGE,
)
def _run_powershell(exe: str, script: str, timeout: int) -> subprocess.CompletedProcess:
return subprocess.run(
[exe, "-NoProfile", "-NonInteractive", "-Command", script],
capture_output=True, text=True, timeout=timeout,
)
def _write_base64_image(dest: Path, b64_data: str) -> bool:
image_bytes = base64.b64decode(b64_data, validate=True)
dest.write_bytes(image_bytes)
return dest.exists() and dest.stat().st_size > 0
def _powershell_has_image(exe: str, *, timeout: int, label: str) -> bool:
for script in _POWERSHELL_HAS_IMAGE_SCRIPTS:
try:
r = _run_powershell(exe, script, timeout=timeout)
if r.returncode == 0 and "True" in r.stdout:
return True
except FileNotFoundError:
logger.debug("%s not found — clipboard unavailable", exe)
return False
except Exception as e:
logger.debug("%s clipboard image check failed: %s", label, e)
return False
def _powershell_save_image(exe: str, dest: Path, *, timeout: int, label: str) -> bool:
for script in _POWERSHELL_EXTRACT_IMAGE_SCRIPTS:
try:
r = _run_powershell(exe, script, timeout=timeout)
if r.returncode != 0:
continue
b64_data = r.stdout.strip()
if not b64_data:
continue
if _write_base64_image(dest, b64_data):
return True
except FileNotFoundError:
logger.debug("%s not found — clipboard unavailable", exe)
return False
except Exception as e:
logger.debug("%s clipboard image extraction failed: %s", label, e)
dest.unlink(missing_ok=True)
return False
# ── Native Windows ────────────────────────────────────────────────────────
@ -175,15 +284,7 @@ def _windows_has_image() -> bool:
ps = _get_ps_exe()
if ps is None:
return False
try:
r = subprocess.run(
[ps, "-NoProfile", "-NonInteractive", "-Command", _PS_CHECK_IMAGE],
capture_output=True, text=True, timeout=5,
)
return r.returncode == 0 and "True" in r.stdout
except Exception as e:
logger.debug("Windows clipboard image check failed: %s", e)
return False
return _powershell_has_image(ps, timeout=5, label="Windows")
def _windows_save(dest: Path) -> bool:
@ -192,26 +293,7 @@ def _windows_save(dest: Path) -> bool:
if ps is None:
logger.debug("No PowerShell found — Windows clipboard image paste unavailable")
return False
try:
r = subprocess.run(
[ps, "-NoProfile", "-NonInteractive", "-Command", _PS_EXTRACT_IMAGE],
capture_output=True, text=True, timeout=15,
)
if r.returncode != 0:
return False
b64_data = r.stdout.strip()
if not b64_data:
return False
png_bytes = base64.b64decode(b64_data)
dest.write_bytes(png_bytes)
return dest.exists() and dest.stat().st_size > 0
except Exception as e:
logger.debug("Windows clipboard image extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
return _powershell_save_image(ps, dest, timeout=15, label="Windows")
# ── Linux ────────────────────────────────────────────────────────────────
@ -235,45 +317,12 @@ def _linux_save(dest: Path) -> bool:
def _wsl_has_image() -> bool:
"""Check if Windows clipboard has an image (via powershell.exe)."""
try:
r = subprocess.run(
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
_PS_CHECK_IMAGE],
capture_output=True, text=True, timeout=8,
)
return r.returncode == 0 and "True" in r.stdout
except FileNotFoundError:
logger.debug("powershell.exe not found — WSL clipboard unavailable")
except Exception as e:
logger.debug("WSL clipboard check failed: %s", e)
return False
return _powershell_has_image("powershell.exe", timeout=8, label="WSL")
def _wsl_save(dest: Path) -> bool:
"""Extract clipboard image via powershell.exe → base64 → decode to PNG."""
try:
r = subprocess.run(
["powershell.exe", "-NoProfile", "-NonInteractive", "-Command",
_PS_EXTRACT_IMAGE],
capture_output=True, text=True, timeout=15,
)
if r.returncode != 0:
return False
b64_data = r.stdout.strip()
if not b64_data:
return False
png_bytes = base64.b64decode(b64_data)
dest.write_bytes(png_bytes)
return dest.exists() and dest.stat().st_size > 0
except FileNotFoundError:
logger.debug("powershell.exe not found — WSL clipboard unavailable")
except Exception as e:
logger.debug("WSL clipboard extraction failed: %s", e)
dest.unlink(missing_ok=True)
return False
return _powershell_save_image("powershell.exe", dest, timeout=15, label="WSL")
# ── Wayland (wl-paste) ──────────────────────────────────────────────────

View file

@ -87,8 +87,12 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("bg",), args_hint="<prompt>"),
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
args_hint="<question>"),
CommandDef("agents", "Show active agents and running tasks", "Session",
aliases=("tasks",)),
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
aliases=("q",), args_hint="<prompt>"),
CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
args_hint="<prompt>"),
CommandDef("status", "Show session info", "Session"),
CommandDef("profile", "Show active profile name and home directory", "Info"),
CommandDef("sethome", "Set this chat as the home channel", "Session",
@ -99,9 +103,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Configuration
CommandDef("config", "Show current configuration", "Configuration",
cli_only=True),
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
CommandDef("provider", "Show available providers and current provider",
"Configuration"),
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),
CommandDef("personality", "Set a predefined personality", "Configuration",
args_hint="[name]"),
@ -119,7 +124,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[normal|fast|status]",
subcommands=("normal", "fast", "status", "on", "off")),
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
cli_only=True, args_hint="[name]"),
args_hint="[name]"),
CommandDef("voice", "Toggle voice mode", "Configuration",
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
@ -154,7 +159,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
args_hint="[days]"),
CommandDef("platforms", "Show gateway/messaging platform status", "Info",
cli_only=True, aliases=("gateway",)),
CommandDef("paste", "Check clipboard for an image and attach it", "Info",
CommandDef("copy", "Copy the last assistant response to clipboard", "Info",
cli_only=True, args_hint="[number]"),
CommandDef("paste", "Attach clipboard image from your clipboard", "Info",
cli_only=True),
CommandDef("image", "Attach a local image file for your next prompt", "Info",
cli_only=True, args_hint="<path>"),
@ -164,7 +171,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Exit
CommandDef("quit", "Exit the CLI", "Exit",
cli_only=True, aliases=("exit", "q")),
cli_only=True, aliases=("exit",)),
]
@ -253,6 +260,36 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
)
# Commands that must never be queued behind an active gateway session.
# These are explicit control/info commands handled by the gateway itself;
# if they get queued as pending text, the safety net in gateway.run will
# discard them before they ever reach the user.
ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
{
"agents",
"approve",
"background",
"commands",
"deny",
"help",
"new",
"profile",
"queue",
"restart",
"status",
"steer",
"stop",
"update",
}
)
def should_bypass_active_session(command_name: str | None) -> bool:
"""Return True when a slash command must bypass active-session queuing."""
cmd = resolve_command(command_name) if command_name else None
return bool(cmd and cmd.name in ACTIVE_SESSION_BYPASS_COMMANDS)
def _resolve_config_gates() -> set[str]:
"""Return canonical names of commands whose ``gateway_config_gate`` is truthy.
@ -450,7 +487,7 @@ def _collect_gateway_skill_entries(
name = sanitize_name(cmd_name) if sanitize_name else cmd_name
if not name:
continue
desc = "Plugin command"
desc = plugin_cmds[cmd_name].get("description", "Plugin command")
if len(desc) > desc_limit:
desc = desc[:desc_limit - 3] + "..."
plugin_pairs.append((name, desc))
@ -844,8 +881,7 @@ class SlashCommandCompleter(Completer):
return None
return word
@staticmethod
def _context_completions(word: str, limit: int = 30):
def _context_completions(self, word: str, limit: int = 30):
"""Yield Claude Code-style @ context completions.
Bare ``@`` or ``@partial`` shows static references and matching
@ -1044,6 +1080,51 @@ class SlashCommandCompleter(Completer):
display_meta=f"{fp} {meta}" if meta else fp,
)
@staticmethod
def _skin_completions(sub_text: str, sub_lower: str):
"""Yield completions for /skin from available skins."""
try:
from hermes_cli.skin_engine import list_skins
for s in list_skins():
name = s["name"]
if name.startswith(sub_lower) and name != sub_lower:
yield Completion(
name,
start_position=-len(sub_text),
display=name,
display_meta=s.get("description", "") or s.get("source", ""),
)
except Exception:
pass
@staticmethod
def _personality_completions(sub_text: str, sub_lower: str):
"""Yield completions for /personality from configured personalities."""
try:
from hermes_cli.config import load_config
personalities = load_config().get("agent", {}).get("personalities", {})
if "none".startswith(sub_lower) and "none" != sub_lower:
yield Completion(
"none",
start_position=-len(sub_text),
display="none",
display_meta="clear personality overlay",
)
for name, prompt in personalities.items():
if name.startswith(sub_lower) and name != sub_lower:
if isinstance(prompt, dict):
meta = prompt.get("description") or prompt.get("system_prompt", "")[:50]
else:
meta = str(prompt)[:50]
yield Completion(
name,
start_position=-len(sub_text),
display=name,
display_meta=meta,
)
except Exception:
pass
def _model_completions(self, sub_text: str, sub_lower: str):
"""Yield completions for /model from config aliases + built-in aliases."""
seen = set()
@ -1098,10 +1179,17 @@ class SlashCommandCompleter(Completer):
sub_text = parts[1] if len(parts) > 1 else ""
sub_lower = sub_text.lower()
# Dynamic model alias completions for /model
if " " not in sub_text and base_cmd == "/model":
yield from self._model_completions(sub_text, sub_lower)
return
# Dynamic completions for commands with runtime lists
if " " not in sub_text:
if base_cmd == "/model":
yield from self._model_completions(sub_text, sub_lower)
return
if base_cmd == "/skin":
yield from self._skin_completions(sub_text, sub_lower)
return
if base_cmd == "/personality":
yield from self._personality_completions(sub_text, sub_lower)
return
# Static subcommand completions
if " " not in sub_text and base_cmd in SUBCOMMANDS and self._command_allowed(base_cmd):
@ -1140,6 +1228,22 @@ class SlashCommandCompleter(Completer):
display_meta=f"{short_desc}",
)
# Plugin-registered slash commands
try:
from hermes_cli.plugins import get_plugin_commands
for cmd_name, cmd_info in get_plugin_commands().items():
if cmd_name.startswith(word):
desc = str(cmd_info.get("description", "Plugin command"))
short_desc = desc[:50] + ("..." if len(desc) > 50 else "")
yield Completion(
self._completion_text(cmd_name, word),
start_position=-len(word),
display=f"/{cmd_name}",
display_meta=f"🔌 {short_desc}",
)
except Exception:
pass
# ---------------------------------------------------------------------------
# Inline auto-suggest (ghost text) for slash commands

View file

@ -12,6 +12,7 @@ This module provides:
- hermes config wizard - Re-run setup wizard
"""
import copy
import os
import platform
import re
@ -23,10 +24,10 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled
_IS_WINDOWS = platform.system() == "Windows"
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
# Env var names written to .env that aren't in OPTIONAL_ENV_VARS
# (managed by setup/provider flows directly).
_EXTRA_ENV_KEYS = frozenset({
@ -45,7 +46,8 @@ _EXTRA_ENV_KEYS = frozenset({
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
"QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat)
"QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
"QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
@ -241,13 +243,41 @@ def _secure_dir(path):
pass
def _is_container() -> bool:
"""Detect if we're running inside a Docker/Podman/LXC container.
When Hermes runs in a container with volume-mounted config files, forcing
0o600 permissions breaks multi-process setups where the gateway and
dashboard run as different UIDs or the volume mount requires broader
permissions.
"""
# Explicit opt-out
if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
return True
# Docker / Podman marker file
if os.path.exists("/.dockerenv"):
return True
# LXC / cgroup-based detection
try:
with open("/proc/1/cgroup", "r") as f:
cgroup_content = f.read()
if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
return True
except (OSError, IOError):
pass
return False
def _secure_file(path):
"""Set file to owner-only read/write (0600). No-op on Windows.
Skipped in managed mode the NixOS activation script sets
group-readable permissions (0640) on config files.
Skipped in containers Docker/Podman volume mounts often need broader
permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
"""
if is_managed():
if is_managed() or _is_container():
return
try:
if os.path.exists(str(path)):
@ -390,10 +420,10 @@ DEFAULT_CONFIG = {
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
"record_sessions": False, # Auto-record browser sessions as WebM videos
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
"camofox": {
# When true, Hermes sends a stable profile-scoped userId to Camofox
# so the server can map it to a persistent browser profile directory.
# Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
# so the server maps it to a persistent Firefox profile automatically.
# When false (default), each session gets a random userId (ephemeral).
"managed_persistence": False,
},
@ -419,6 +449,27 @@ DEFAULT_CONFIG = {
"protect_last_n": 20, # minimum recent messages to keep uncompressed
},
# AWS Bedrock provider configuration.
# Only used when model.provider is "bedrock".
"bedrock": {
"region": "", # AWS region for Bedrock API calls (empty = AWS_REGION env var → us-east-1)
"discovery": {
"enabled": True, # Auto-discover models via ListFoundationModels
"provider_filter": [], # Only show models from these providers (e.g. ["anthropic", "amazon"])
"refresh_interval": 3600, # Cache discovery results for this many seconds
},
"guardrail": {
# Amazon Bedrock Guardrails — content filtering and safety policies.
# Create a guardrail in the Bedrock console, then set the ID and version here.
# See: https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html
"guardrail_identifier": "", # e.g. "abc123def456"
"guardrail_version": "", # e.g. "1" or "DRAFT"
"stream_processing_mode": "async", # "sync" or "async"
"trace": "disabled", # "enabled", "disabled", or "enabled_full"
},
},
"smart_model_routing": {
"enabled": False,
"max_simple_chars": 160,
@ -490,6 +541,13 @@ DEFAULT_CONFIG = {
"api_key": "",
"timeout": 30,
},
"title_generation": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 30,
},
},
"display": {
@ -510,6 +568,11 @@ DEFAULT_CONFIG = {
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
},
# Web dashboard settings
"dashboard": {
"theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
},
# Privacy settings
"privacy": {
"redact_pii": False, # When True, hash user IDs and strip phone numbers from LLM context
@ -517,7 +580,7 @@ DEFAULT_CONFIG = {
# Text-to-speech configuration
"tts": {
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
"edge": {
"voice": "en-US-AriaNeural",
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@ -531,6 +594,12 @@ DEFAULT_CONFIG = {
"voice": "alloy",
# Voices: alloy, echo, fable, onyx, nova, shimmer
},
"xai": {
"voice_id": "eve",
"language": "en",
"sample_rate": 24000,
"bit_rate": 128000,
},
"mistral": {
"model": "voxtral-mini-tts-2603",
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
@ -638,6 +707,7 @@ DEFAULT_CONFIG = {
"allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist)
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
},
# WhatsApp platform settings (gateway mode)
@ -648,6 +718,21 @@ DEFAULT_CONFIG = {
# Supports \n for newlines, e.g. "🤖 *My Bot*\n──────\n"
},
# Telegram platform settings (gateway mode)
"telegram": {
"channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
},
# Slack platform settings (gateway mode)
"slack": {
"channel_prompts": {}, # Per-channel ephemeral system prompts
},
# Mattermost platform settings (gateway mode)
"mattermost": {
"channel_prompts": {}, # Per-channel ephemeral system prompts
},
# Approval mode for dangerous commands:
# manual — always prompt the user (default)
# smart — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
@ -686,6 +771,20 @@ DEFAULT_CONFIG = {
"wrap_response": True,
},
# execute_code settings — controls the tool used for programmatic tool calls.
"code_execution": {
# Execution mode:
# project (default) — scripts run in the session's working directory
# with the active virtualenv/conda env's python, so project deps
# (pandas, torch, project packages) and relative paths resolve.
# strict — scripts run in an isolated temp directory with
# hermes-agent's own python (sys.executable). Maximum isolation
# and reproducibility; project deps and relative paths won't work.
# Env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, ...) and the
# tool whitelist apply identically in both modes.
"mode": "project",
},
# Logging — controls file logging to ~/.hermes/logs/.
# agent.log captures INFO+ (all agent activity); errors.log captures WARNING+.
"logging": {
@ -703,7 +802,7 @@ DEFAULT_CONFIG = {
},
# Config schema version - bump this when adding new required fields
"_config_version": 17,
"_config_version": 19,
}
# =============================================================================
@ -771,6 +870,38 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"XAI_API_KEY": {
"description": "xAI API key",
"prompt": "xAI API key",
"url": "https://console.x.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"XAI_BASE_URL": {
"description": "xAI base URL override",
"prompt": "xAI base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"NVIDIA_API_KEY": {
"description": "NVIDIA NIM API key (build.nvidia.com or local NIM endpoint)",
"prompt": "NVIDIA NIM API key",
"url": "https://build.nvidia.com/",
"password": True,
"category": "provider",
"advanced": True,
},
"NVIDIA_BASE_URL": {
"description": "NVIDIA NIM base URL override (e.g. http://localhost:8000/v1 for local NIM)",
"prompt": "NVIDIA NIM base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"GLM_API_KEY": {
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
"prompt": "Z.AI / GLM API key",
@ -912,6 +1043,30 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"HERMES_GEMINI_CLIENT_ID": {
"description": "Google OAuth client ID for google-gemini-cli (optional; defaults to Google's public gemini-cli client)",
"prompt": "Google OAuth client ID (optional — leave empty to use the public default)",
"url": "https://console.cloud.google.com/apis/credentials",
"password": False,
"category": "provider",
"advanced": True,
},
"HERMES_GEMINI_CLIENT_SECRET": {
"description": "Google OAuth client secret for google-gemini-cli (optional)",
"prompt": "Google OAuth client secret (optional)",
"url": "https://console.cloud.google.com/apis/credentials",
"password": True,
"category": "provider",
"advanced": True,
},
"HERMES_GEMINI_PROJECT_ID": {
"description": "GCP project ID for paid Gemini tiers (free tier auto-provisions)",
"prompt": "GCP project ID for Gemini OAuth (leave empty for free tier)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"OPENCODE_ZEN_API_KEY": {
"description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
"prompt": "OpenCode Zen API key",
@ -959,6 +1114,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"OLLAMA_API_KEY": {
"description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)",
"prompt": "Ollama Cloud API key",
"url": "https://ollama.com/settings",
"password": True,
"category": "provider",
"advanced": True,
},
"OLLAMA_BASE_URL": {
"description": "Ollama Cloud base URL override (default: https://ollama.com/v1)",
"prompt": "Ollama base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"XIAOMI_API_KEY": {
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
"prompt": "Xiaomi MiMo API Key",
@ -974,6 +1145,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"AWS_REGION": {
"description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
"prompt": "AWS Region",
"url": "https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-regions.html",
"password": False,
"category": "provider",
"advanced": True,
},
"AWS_PROFILE": {
"description": "AWS named profile for Bedrock authentication (from ~/.aws/credentials)",
"prompt": "AWS Profile",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
# ── Tool API keys ──
"EXA_API_KEY": {
@ -1171,6 +1358,12 @@ OPTIONAL_ENV_VARS = {
"password": False,
"category": "messaging",
},
"TELEGRAM_PROXY": {
"description": "Proxy URL for Telegram connections (overrides HTTPS_PROXY). Supports http://, https://, socks5://",
"prompt": "Telegram proxy URL (optional)",
"password": False,
"category": "messaging",
},
"DISCORD_BOT_TOKEN": {
"description": "Discord bot token from Developer Portal",
"prompt": "Discord bot token",
@ -1366,12 +1559,12 @@ OPTIONAL_ENV_VARS = {
"prompt": "Allow All QQ Users",
"category": "messaging",
},
"QQ_HOME_CHANNEL": {
"QQBOT_HOME_CHANNEL": {
"description": "Default QQ channel/group for cron delivery and notifications",
"prompt": "QQ Home Channel",
"category": "messaging",
},
"QQ_HOME_CHANNEL_NAME": {
"QQBOT_HOME_CHANNEL_NAME": {
"description": "Display name for the QQ home channel",
"prompt": "QQ Home Channel Name",
"category": "messaging",
@ -1468,13 +1661,8 @@ OPTIONAL_ENV_VARS = {
},
# ── Agent settings ──
"MESSAGING_CWD": {
"description": "Working directory for terminal commands via messaging",
"prompt": "Messaging working directory (default: home)",
"url": None,
"password": False,
"category": "setting",
},
# NOTE: MESSAGING_CWD was removed here — use terminal.cwd in config.yaml
# instead. The gateway reads TERMINAL_CWD (bridged from terminal.cwd).
"SUDO_PASSWORD": {
"description": "Sudo password for terminal commands requiring root access; set to an explicit empty string to try empty without prompting",
"prompt": "Sudo password",
@ -1522,14 +1710,8 @@ OPTIONAL_ENV_VARS = {
},
}
if not _managed_nous_tools_enabled():
for _hidden_var in (
"FIRECRAWL_GATEWAY_URL",
"TOOL_GATEWAY_DOMAIN",
"TOOL_GATEWAY_SCHEME",
"TOOL_GATEWAY_USER_TOKEN",
):
OPTIONAL_ENV_VARS.pop(_hidden_var, None)
# Tool Gateway env vars are always visible — they're useful for
# self-hosted / custom gateway setups regardless of subscription state.
def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
@ -1953,6 +2135,52 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
sys.stderr.write("\n".join(lines) + "\n\n")
def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> None:
"""Warn if MESSAGING_CWD or TERMINAL_CWD is set in .env instead of config.yaml.
These env vars are deprecated the canonical setting is terminal.cwd
in config.yaml. Prints a migration hint to stderr.
"""
import os, sys
messaging_cwd = os.environ.get("MESSAGING_CWD")
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
if config is None:
try:
config = load_config()
except Exception:
return
terminal_cfg = config.get("terminal", {})
config_cwd = terminal_cfg.get("cwd", ".") if isinstance(terminal_cfg, dict) else "."
# Only warn if config.yaml doesn't have an explicit path
config_has_explicit_cwd = config_cwd not in (".", "auto", "cwd", "")
lines: list[str] = []
if messaging_cwd:
lines.append(
f" \033[33m⚠\033[0m MESSAGING_CWD={messaging_cwd} found in .env — "
f"this is deprecated."
)
if terminal_cwd_env and not config_has_explicit_cwd:
# TERMINAL_CWD in env but not from config bridge — likely from .env
lines.append(
f" \033[33m⚠\033[0m TERMINAL_CWD={terminal_cwd_env} found in .env — "
f"this is deprecated."
)
if lines:
hint_path = os.environ.get("HERMES_HOME", "~/.hermes")
lines.insert(0, "\033[33m⚠ Deprecated .env settings detected:\033[0m")
lines.append(
f" \033[2mMove to config.yaml instead: "
f"terminal:\\n cwd: /your/project/path\033[0m"
)
lines.append(
f" \033[2mThen remove the old entries from {hint_path}/.env\033[0m"
)
sys.stderr.write("\n".join(lines) + "\n\n")
def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, Any]:
"""
Migrate config to latest version, prompting for new required fields.
@ -2423,6 +2651,85 @@ def _expand_env_vars(obj):
return obj
def _items_by_unique_name(items):
"""Return a name-indexed dict only when all items have unique string names."""
if not isinstance(items, list):
return None
indexed = {}
for item in items:
if not isinstance(item, dict) or not isinstance(item.get("name"), str):
return None
name = item["name"]
if name in indexed:
return None
indexed[name] = item
return indexed
def _preserve_env_ref_templates(current, raw, loaded_expanded=None):
"""Restore raw ``${VAR}`` templates when a value is otherwise unchanged.
``load_config()`` expands env refs for runtime use. When a caller later
persists that config after modifying some unrelated setting, keep the
original on-disk template instead of writing the expanded plaintext
secret back to ``config.yaml``.
Prefer preserving the raw template when ``current`` still matches either
the value previously returned by ``load_config()`` for this config path or
the current environment expansion of ``raw``. This handles env-var
rotation between load and save while still treating mixed literal/template
string edits as caller-owned once their rendered value diverges.
"""
if isinstance(current, str) and isinstance(raw, str) and re.search(r"\${[^}]+}", raw):
if current == raw:
return raw
if isinstance(loaded_expanded, str) and current == loaded_expanded:
return raw
if _expand_env_vars(raw) == current:
return raw
return current
if isinstance(current, dict) and isinstance(raw, dict):
return {
key: _preserve_env_ref_templates(
value,
raw.get(key),
loaded_expanded.get(key) if isinstance(loaded_expanded, dict) else None,
)
for key, value in current.items()
}
if isinstance(current, list) and isinstance(raw, list):
# Prefer matching named config objects (e.g. custom_providers) by name
# so harmless reordering doesn't drop the original template. If names
# are duplicated, fall back to positional matching instead of silently
# shadowing one entry.
current_by_name = _items_by_unique_name(current)
raw_by_name = _items_by_unique_name(raw)
loaded_by_name = _items_by_unique_name(loaded_expanded)
if current_by_name is not None and raw_by_name is not None:
return [
_preserve_env_ref_templates(
item,
raw_by_name.get(item.get("name")),
loaded_by_name.get(item.get("name")) if loaded_by_name is not None else None,
)
for item in current
]
return [
_preserve_env_ref_templates(
item,
raw[index] if index < len(raw) else None,
loaded_expanded[index]
if isinstance(loaded_expanded, list) and index < len(loaded_expanded)
else None,
)
for index, item in enumerate(current)
]
return current
def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
"""Move stale root-level provider/base_url into model section.
@ -2490,7 +2797,6 @@ def read_raw_config() -> Dict[str, Any]:
def load_config() -> Dict[str, Any]:
"""Load configuration from ~/.hermes/config.yaml."""
import copy
ensure_hermes_home()
config_path = get_config_path()
@ -2511,8 +2817,11 @@ def load_config() -> Dict[str, Any]:
config = _deep_merge(config, user_config)
except Exception as e:
print(f"Warning: Failed to load config: {e}")
return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config)))
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
expanded = _expand_env_vars(normalized)
_LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
return expanded
_SECURITY_COMMENT = """
@ -2621,7 +2930,15 @@ def save_config(config: Dict[str, Any]):
ensure_hermes_home()
config_path = get_config_path()
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
current_normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
normalized = current_normalized
raw_existing = _normalize_root_model_keys(_normalize_max_turns_config(read_raw_config()))
if raw_existing:
normalized = _preserve_env_ref_templates(
normalized,
raw_existing,
_LAST_EXPANDED_CONFIG_BY_PATH.get(str(config_path)),
)
# Build optional commented-out sections for features that are off by
# default or only relevant when explicitly configured.
@ -2639,6 +2956,7 @@ def save_config(config: Dict[str, Any]):
extra_content="".join(parts) if parts else None,
)
_secure_file(config_path)
_LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(current_normalized)
def load_env() -> Dict[str, str]:
@ -2766,6 +3084,47 @@ def sanitize_env_file() -> int:
return fixes
def _check_non_ascii_credential(key: str, value: str) -> str:
"""Warn and strip non-ASCII characters from credential values.
API keys and tokens must be pure ASCII they are sent as HTTP header
values which httpx/httpcore encode as ASCII. Non-ASCII characters
(commonly introduced by copy-pasting from rich-text editors or PDFs
that substitute lookalike Unicode glyphs for ASCII letters) cause
``UnicodeEncodeError: 'ascii' codec can't encode character`` at
request time.
Returns the sanitized (ASCII-only) value. Prints a warning if any
non-ASCII characters were found and removed.
"""
try:
value.encode("ascii")
return value # all ASCII — nothing to do
except UnicodeEncodeError:
pass
# Build a readable list of the offending characters
bad_chars: list[str] = []
for i, ch in enumerate(value):
if ord(ch) > 127:
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
import sys
print(
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
f" or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
f"\n"
+ "\n".join(f" {line}" for line in bad_chars[:5])
+ ("\n ... and more" if len(bad_chars) > 5 else "")
+ f"\n\n The non-ASCII characters have been stripped automatically.\n"
f" If authentication fails, re-copy the key from the provider's dashboard.\n",
file=sys.stderr,
)
return sanitized
def save_env_value(key: str, value: str):
"""Save or update a value in ~/.hermes/.env."""
if is_managed():
@ -2774,6 +3133,8 @@ def save_env_value(key: str, value: str):
if not _ENV_VAR_NAME_RE.match(key):
raise ValueError(f"Invalid environment variable name: {key!r}")
value = value.replace("\n", "").replace("\r", "")
# API keys / tokens must be ASCII — strip non-ASCII with a warning.
value = _check_non_ascii_credential(key, value)
ensure_hermes_home()
env_path = get_env_path()
@ -2804,12 +3165,25 @@ def save_env_value(key: str, value: str):
lines.append(f"{key}={value}\n")
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
if env_path.exists():
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
# Restore original permissions before _secure_file may tighten them.
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)
@ -2820,13 +3194,6 @@ def save_env_value(key: str, value: str):
os.environ[key] = value
# Restrict .env permissions to owner-only (contains API keys)
if not _IS_WINDOWS:
try:
os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
def remove_env_value(key: str) -> bool:
"""Remove a key from ~/.hermes/.env and os.environ.
@ -2855,12 +3222,23 @@ def remove_env_value(key: str) -> bool:
if found:
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(new_lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)

View file

@ -166,6 +166,7 @@ def curses_radiolist(
selected: int = 0,
*,
cancel_returns: int | None = None,
description: str | None = None,
) -> int:
"""Curses single-select radio list. Returns the selected index.
@ -174,6 +175,9 @@ def curses_radiolist(
items: Display labels for each row.
selected: Index that starts selected (pre-selected).
cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
description: Optional multi-line text shown between the title and
the item list. Useful for context that should survive the
curses screen clear.
"""
if cancel_returns is None:
cancel_returns = selected
@ -181,6 +185,10 @@ def curses_radiolist(
if not sys.stdin.isatty():
return cancel_returns
desc_lines: list[str] = []
if description:
desc_lines = description.splitlines()
try:
import curses
result_holder: list = [None]
@ -199,22 +207,35 @@ def curses_radiolist(
stdscr.clear()
max_y, max_x = stdscr.getmaxyx()
row = 0
# Header
try:
hattr = curses.A_BOLD
if curses.has_colors():
hattr |= curses.color_pair(2)
stdscr.addnstr(0, 0, title, max_x - 1, hattr)
stdscr.addnstr(row, 0, title, max_x - 1, hattr)
row += 1
# Description lines
for dline in desc_lines:
if row >= max_y - 1:
break
stdscr.addnstr(row, 0, dline, max_x - 1, curses.A_NORMAL)
row += 1
stdscr.addnstr(
1, 0,
row, 0,
" \u2191\u2193 navigate ENTER/SPACE select ESC cancel",
max_x - 1, curses.A_DIM,
)
row += 1
except curses.error:
pass
# Scrollable item list
visible_rows = max_y - 4
items_start = row + 1
visible_rows = max_y - items_start - 1
if cursor < scroll_offset:
scroll_offset = cursor
elif cursor >= scroll_offset + visible_rows:
@ -223,7 +244,7 @@ def curses_radiolist(
for draw_i, i in enumerate(
range(scroll_offset, min(len(items), scroll_offset + visible_rows))
):
y = draw_i + 3
y = draw_i + items_start
if y >= max_y - 1:
break
radio = "\u25cf" if i == selected else "\u25cb"

View file

@ -6,7 +6,10 @@ Currently supports:
"""
import io
import json
import os
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
@ -27,6 +30,205 @@ _DPASTE_COM_URL = "https://dpaste.com/api/"
# paste.rs caps at ~1 MB; we stay under that with headroom.
_MAX_LOG_BYTES = 512_000
# Auto-delete pastes after this many seconds (6 hours).
_AUTO_DELETE_SECONDS = 21600
# ---------------------------------------------------------------------------
# Pending-deletion tracking (replaces the old fork-and-sleep subprocess).
# ---------------------------------------------------------------------------
def _pending_file() -> Path:
"""Path to ``~/.hermes/pastes/pending.json``.
Each entry: ``{"url": "...", "expire_at": <unix_ts>}``. Scheduled
DELETEs used to be handled by spawning a detached Python process per
paste that slept for 6 hours; those accumulated forever if the user
ran ``hermes debug share`` repeatedly. We now persist the schedule
to disk and sweep expired entries on the next debug invocation.
"""
return get_hermes_home() / "pastes" / "pending.json"
def _load_pending() -> list[dict]:
path = _pending_file()
if not path.exists():
return []
try:
data = json.loads(path.read_text(encoding="utf-8"))
if isinstance(data, list):
# Filter to well-formed entries only
return [
e for e in data
if isinstance(e, dict) and "url" in e and "expire_at" in e
]
except (OSError, ValueError, json.JSONDecodeError):
pass
return []
def _save_pending(entries: list[dict]) -> None:
path = _pending_file()
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(".json.tmp")
tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
os.replace(tmp, path)
except OSError:
# Non-fatal — worst case the user has to run ``hermes debug delete``
# manually.
pass
def _record_pending(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS) -> None:
"""Record *urls* for deletion at ``now + delay_seconds``.
Only paste.rs URLs are recorded (dpaste.com auto-expires). Entries
are merged into any existing pending.json.
"""
paste_rs_urls = [u for u in urls if _extract_paste_id(u)]
if not paste_rs_urls:
return
entries = _load_pending()
# Dedupe by URL: keep the later expire_at if same URL appears twice
by_url: dict[str, float] = {e["url"]: float(e["expire_at"]) for e in entries}
expire_at = time.time() + delay_seconds
for u in paste_rs_urls:
by_url[u] = max(expire_at, by_url.get(u, 0.0))
merged = [{"url": u, "expire_at": ts} for u, ts in by_url.items()]
_save_pending(merged)
def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
"""Synchronously DELETE any pending pastes whose ``expire_at`` has passed.
Returns ``(deleted, remaining)``. Best-effort: failed deletes stay in
the pending file and will be retried on the next sweep. Silent
intended to be called from every ``hermes debug`` invocation with
minimal noise.
"""
entries = _load_pending()
if not entries:
return (0, 0)
current = time.time() if now is None else now
deleted = 0
remaining: list[dict] = []
for entry in entries:
try:
expire_at = float(entry.get("expire_at", 0))
except (TypeError, ValueError):
continue # drop malformed entries
if expire_at > current:
remaining.append(entry)
continue
url = entry.get("url", "")
try:
if delete_paste(url):
deleted += 1
continue
except Exception:
# Network hiccup, 404 (already gone), etc. — drop the entry
# after a grace period; don't retry forever.
pass
# Retain failed deletes for up to 24h past expiration, then give up.
if expire_at + 86400 > current:
remaining.append(entry)
else:
deleted += 1 # count as reaped (paste.rs will GC eventually)
if deleted:
_save_pending(remaining)
return (deleted, len(remaining))
# ---------------------------------------------------------------------------
# Privacy / delete helpers
# ---------------------------------------------------------------------------
_PRIVACY_NOTICE = """\
This will upload the following to a public paste service:
System info (OS, Python version, Hermes version, provider, which API keys
are configured NOT the actual keys)
Recent log lines (agent.log, errors.log, gateway.log may contain
conversation fragments and file paths)
Full agent.log and gateway.log (up to 512 KB each likely contains
conversation content, tool outputs, and file paths)
Pastes auto-delete after 6 hours.
"""
_GATEWAY_PRIVACY_NOTICE = (
"⚠️ **Privacy notice:** This uploads system info + recent log tails "
"(may contain conversation fragments) to a public paste service. "
"Full logs are NOT included from the gateway — use `hermes debug share` "
"from the CLI for full log uploads.\n"
"Pastes auto-delete after 6 hours."
)
def _extract_paste_id(url: str) -> Optional[str]:
"""Extract the paste ID from a paste.rs or dpaste.com URL.
Returns the ID string, or None if the URL doesn't match a known service.
"""
url = url.strip().rstrip("/")
for prefix in ("https://paste.rs/", "http://paste.rs/"):
if url.startswith(prefix):
return url[len(prefix):]
return None
def delete_paste(url: str) -> bool:
"""Delete a paste from paste.rs. Returns True on success.
Only paste.rs supports unauthenticated DELETE. dpaste.com pastes
expire automatically but cannot be deleted via API.
"""
paste_id = _extract_paste_id(url)
if not paste_id:
raise ValueError(
f"Cannot delete: only paste.rs URLs are supported. Got: {url}"
)
target = f"{_PASTE_RS_URL}{paste_id}"
req = urllib.request.Request(
target, method="DELETE",
headers={"User-Agent": "hermes-agent/debug-share"},
)
with urllib.request.urlopen(req, timeout=30) as resp:
return 200 <= resp.status < 300
def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SECONDS):
"""Record *urls* for deletion ``delay_seconds`` from now.
Previously this spawned a detached Python subprocess per call that slept
for 6 hours and then issued DELETE requests. Those subprocesses leaked
every ``hermes debug share`` invocation added ~20 MB of resident Python
interpreters that never exited until the sleep completed.
The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
every ``hermes debug`` invocation. If the user never runs ``hermes debug``
again, paste.rs's own retention policy handles cleanup.
"""
_record_pending(urls, delay_seconds=delay_seconds)
def _delete_hint(url: str) -> str:
"""Return a one-liner delete command for the given paste URL."""
paste_id = _extract_paste_id(url)
if paste_id:
return f"hermes debug delete {url}"
# dpaste.com — no API delete, expires on its own.
return "(auto-expires per dpaste.com policy)"
def _upload_paste_rs(content: str) -> str:
"""Upload to paste.rs. Returns the paste URL.
@ -250,6 +452,9 @@ def run_debug_share(args):
expiry = getattr(args, "expire", 7)
local_only = getattr(args, "local", False)
if not local_only:
print(_PRIVACY_NOTICE)
print("Collecting debug report...")
# Capture dump once — prepended to every paste for context.
@ -315,22 +520,66 @@ def run_debug_share(args):
if failures:
print(f"\n (failed to upload: {', '.join(failures)})")
# Schedule auto-deletion after 6 hours
_schedule_auto_delete(list(urls.values()))
print(f"\n⏱ Pastes will auto-delete in 6 hours.")
# Manual delete fallback
print(f"To delete now: hermes debug delete <url>")
print(f"\nShare these links with the Hermes team for support.")
def run_debug_delete(args):
"""Delete one or more paste URLs uploaded by /debug."""
urls = getattr(args, "urls", [])
if not urls:
print("Usage: hermes debug delete <url> [<url> ...]")
print(" Deletes paste.rs pastes uploaded by 'hermes debug share'.")
return
for url in urls:
try:
ok = delete_paste(url)
if ok:
print(f" ✓ Deleted: {url}")
else:
print(f" ✗ Failed to delete: {url} (unexpected response)")
except ValueError as exc:
print(f"{exc}")
except Exception as exc:
print(f" ✗ Could not delete {url}: {exc}")
def run_debug(args):
"""Route debug subcommands."""
# Opportunistic sweep of expired pastes on every ``hermes debug`` call.
# Replaces the old per-paste sleeping subprocess that used to leak as
# one orphaned Python interpreter per scheduled deletion. Silent and
# best-effort — any failure is swallowed so ``hermes debug`` stays
# reliable even when offline.
try:
_sweep_expired_pastes()
except Exception:
pass
subcmd = getattr(args, "debug_command", None)
if subcmd == "share":
run_debug_share(args)
elif subcmd == "delete":
run_debug_delete(args)
else:
# Default: show help
print("Usage: hermes debug share [--lines N] [--expire N] [--local]")
print("Usage: hermes debug <command>")
print()
print("Commands:")
print(" share Upload debug report to a paste service and print URL")
print(" delete Delete a previously uploaded paste")
print()
print("Options:")
print("Options (share):")
print(" --lines N Number of log lines to include (default: 200)")
print(" --expire N Paste expiry in days (default: 7)")
print(" --local Print report locally instead of uploading")
print()
print("Options (delete):")
print(" <url> ... One or more paste URLs to delete")

294
hermes_cli/dingtalk_auth.py Normal file
View file

@ -0,0 +1,294 @@
"""
DingTalk Device Flow authorization.
Implements the same 3-step registration flow as dingtalk-openclaw-connector:
1. POST /app/registration/init get nonce
2. POST /app/registration/begin get device_code + verification_uri_complete
3. POST /app/registration/poll poll until SUCCESS get client_id + client_secret
The verification_uri_complete is rendered as a QR code in the terminal so the
user can scan it with DingTalk to authorize, yielding AppKey + AppSecret
automatically.
"""
from __future__ import annotations
import io
import os
import sys
import time
import logging
from typing import Optional, Tuple
import requests
logger = logging.getLogger(__name__)
# ── Configuration ──────────────────────────────────────────────────────────
REGISTRATION_BASE_URL = os.environ.get(
"DINGTALK_REGISTRATION_BASE_URL", "https://oapi.dingtalk.com"
).rstrip("/")
REGISTRATION_SOURCE = os.environ.get("DINGTALK_REGISTRATION_SOURCE", "openClaw")
# ── API helpers ────────────────────────────────────────────────────────────
class RegistrationError(Exception):
"""Raised when a DingTalk registration API call fails."""
def _api_post(path: str, payload: dict) -> dict:
"""POST to the registration API and return the parsed JSON body."""
url = f"{REGISTRATION_BASE_URL}{path}"
try:
resp = requests.post(url, json=payload, timeout=15)
resp.raise_for_status()
data = resp.json()
except requests.RequestException as exc:
raise RegistrationError(f"Network error calling {url}: {exc}") from exc
errcode = data.get("errcode", -1)
if errcode != 0:
errmsg = data.get("errmsg", "unknown error")
raise RegistrationError(f"API error [{path}]: {errmsg} (errcode={errcode})")
return data
# ── Core flow ──────────────────────────────────────────────────────────────
def begin_registration() -> dict:
"""Start a device-flow registration.
Returns a dict with keys:
device_code, verification_uri_complete, expires_in, interval
"""
# Step 1: init → nonce
init_data = _api_post("/app/registration/init", {"source": REGISTRATION_SOURCE})
nonce = str(init_data.get("nonce", "")).strip()
if not nonce:
raise RegistrationError("init response missing nonce")
# Step 2: begin → device_code, verification_uri_complete
begin_data = _api_post("/app/registration/begin", {"nonce": nonce})
device_code = str(begin_data.get("device_code", "")).strip()
verification_uri_complete = str(begin_data.get("verification_uri_complete", "")).strip()
if not device_code:
raise RegistrationError("begin response missing device_code")
if not verification_uri_complete:
raise RegistrationError("begin response missing verification_uri_complete")
return {
"device_code": device_code,
"verification_uri_complete": verification_uri_complete,
"expires_in": int(begin_data.get("expires_in", 7200)),
"interval": max(int(begin_data.get("interval", 3)), 2),
}
def poll_registration(device_code: str) -> dict:
"""Poll the registration status once.
Returns a dict with keys: status, client_id?, client_secret?, fail_reason?
"""
data = _api_post("/app/registration/poll", {"device_code": device_code})
status_raw = str(data.get("status", "")).strip().upper()
if status_raw not in ("WAITING", "SUCCESS", "FAIL", "EXPIRED"):
status_raw = "UNKNOWN"
return {
"status": status_raw,
"client_id": str(data.get("client_id", "")).strip() or None,
"client_secret": str(data.get("client_secret", "")).strip() or None,
"fail_reason": str(data.get("fail_reason", "")).strip() or None,
}
def wait_for_registration_success(
device_code: str,
interval: int = 3,
expires_in: int = 7200,
on_waiting: Optional[callable] = None,
) -> Tuple[str, str]:
"""Block until the registration succeeds or times out.
Returns (client_id, client_secret).
"""
deadline = time.monotonic() + expires_in
retry_window = 120 # 2 minutes for transient errors
retry_start = 0.0
while time.monotonic() < deadline:
time.sleep(interval)
try:
result = poll_registration(device_code)
except RegistrationError:
if retry_start == 0:
retry_start = time.monotonic()
if time.monotonic() - retry_start < retry_window:
continue
raise
status = result["status"]
if status == "WAITING":
retry_start = 0
if on_waiting:
on_waiting()
continue
if status == "SUCCESS":
cid = result["client_id"]
csecret = result["client_secret"]
if not cid or not csecret:
raise RegistrationError("authorization succeeded but credentials are missing")
return cid, csecret
# FAIL / EXPIRED / UNKNOWN
if retry_start == 0:
retry_start = time.monotonic()
if time.monotonic() - retry_start < retry_window:
continue
reason = result.get("fail_reason") or status
raise RegistrationError(f"authorization failed: {reason}")
raise RegistrationError("authorization timed out, please retry")
# ── QR code rendering ─────────────────────────────────────────────────────
def _ensure_qrcode_installed() -> bool:
"""Try to import qrcode; if missing, auto-install it via pip/uv."""
try:
import qrcode # noqa: F401
return True
except ImportError:
pass
import subprocess
# Try uv first (Hermes convention), then pip
for cmd in (
[sys.executable, "-m", "uv", "pip", "install", "qrcode"],
[sys.executable, "-m", "pip", "install", "-q", "qrcode"],
):
try:
subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
import qrcode # noqa: F401,F811
return True
except (subprocess.CalledProcessError, ImportError, FileNotFoundError):
continue
return False
def render_qr_to_terminal(url: str) -> bool:
"""Render *url* as a compact QR code in the terminal.
Returns True if the QR code was printed, False if the library is missing.
"""
try:
import qrcode
except ImportError:
return False
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=1,
border=1,
)
qr.add_data(url)
qr.make(fit=True)
# Use half-block characters for compact rendering (2 rows per character)
matrix = qr.get_matrix()
rows = len(matrix)
lines: list[str] = []
TOP_HALF = "\u2580" # ▀
BOTTOM_HALF = "\u2584" # ▄
FULL_BLOCK = "\u2588" # █
EMPTY = " "
for r in range(0, rows, 2):
line_chars: list[str] = []
for c in range(len(matrix[r])):
top = matrix[r][c]
bottom = matrix[r + 1][c] if r + 1 < rows else False
if top and bottom:
line_chars.append(FULL_BLOCK)
elif top:
line_chars.append(TOP_HALF)
elif bottom:
line_chars.append(BOTTOM_HALF)
else:
line_chars.append(EMPTY)
lines.append(" " + "".join(line_chars))
print("\n".join(lines))
return True
# ── High-level entry point for the setup wizard ───────────────────────────
def dingtalk_qr_auth() -> Optional[Tuple[str, str]]:
"""Run the interactive QR-code device-flow authorization.
Returns (client_id, client_secret) on success, or None if the user
cancelled or the flow failed.
"""
from hermes_cli.setup import print_info, print_success, print_warning, print_error
print()
print_info(" Initializing DingTalk device authorization...")
print_info(" Note: the scan page is branded 'OpenClaw' — DingTalk's")
print_info(" ecosystem onboarding bridge. Safe to use.")
try:
reg = begin_registration()
except RegistrationError as exc:
print_error(f" Authorization init failed: {exc}")
return None
url = reg["verification_uri_complete"]
# Ensure qrcode library is available (auto-install if missing)
if not _ensure_qrcode_installed():
print_warning(" qrcode library install failed, will show link only.")
print()
print_info(" Please scan the QR code below with DingTalk to authorize:")
print()
if not render_qr_to_terminal(url):
print_warning(f" QR code render failed, please open the link below to authorize:")
print()
print_info(f" Or open this link manually: {url}")
print()
print_info(" Waiting for QR scan authorization... (timeout: 2 hours)")
dot_count = 0
def _on_waiting():
nonlocal dot_count
dot_count += 1
if dot_count % 10 == 0:
sys.stdout.write(".")
sys.stdout.flush()
try:
client_id, client_secret = wait_for_registration_success(
device_code=reg["device_code"],
interval=reg["interval"],
expires_in=reg["expires_in"],
on_waiting=_on_waiting,
)
except RegistrationError as exc:
print()
print_error(f" Authorization failed: {exc}")
return None
print()
print_success(" QR scan authorization successful!")
print_success(f" Client ID: {client_id}")
print_success(f" Client Secret: {client_secret[:8]}{'*' * (len(client_secret) - 8)}")
return client_id, client_secret

View file

@ -8,6 +8,7 @@ import os
import sys
import subprocess
import shutil
from pathlib import Path
from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
from hermes_constants import display_hermes_home
@ -372,7 +373,11 @@ def run_doctor(args):
print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
try:
from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
from hermes_cli.auth import (
get_nous_auth_status,
get_codex_auth_status,
get_gemini_oauth_auth_status,
)
nous_status = get_nous_auth_status()
if nous_status.get("logged_in"):
@ -387,6 +392,20 @@ def run_doctor(args):
check_warn("OpenAI Codex auth", "(not logged in)")
if codex_status.get("error"):
check_info(codex_status["error"])
gemini_status = get_gemini_oauth_auth_status()
if gemini_status.get("logged_in"):
email = gemini_status.get("email") or ""
project = gemini_status.get("project_id") or ""
pieces = []
if email:
pieces.append(email)
if project:
pieces.append(f"project={project}")
suffix = f" ({', '.join(pieces)})" if pieces else ""
check_ok("Google Gemini OAuth", f"(logged in{suffix})")
else:
check_warn("Google Gemini OAuth", "(not logged in)")
except Exception as e:
check_warn("Auth provider status", f"(could not check: {e})")
@ -513,7 +532,87 @@ def run_doctor(args):
pass
_check_gateway_service_linger(issues)
# =========================================================================
# Check: Command installation (hermes bin symlink)
# =========================================================================
if sys.platform != "win32":
print()
print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
# Determine the venv entry point location
_venv_bin = None
for _venv_name in ("venv", ".venv"):
_candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
if _candidate.exists():
_venv_bin = _candidate
break
# Determine the expected command link directory (mirrors install.sh logic)
_prefix = os.environ.get("PREFIX", "")
_is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
if _is_termux_env and _prefix:
_cmd_link_dir = Path(_prefix) / "bin"
_cmd_link_display = "$PREFIX/bin"
else:
_cmd_link_dir = Path.home() / ".local" / "bin"
_cmd_link_display = "~/.local/bin"
_cmd_link = _cmd_link_dir / "hermes"
if _venv_bin is None:
check_warn(
"Venv entry point not found",
"(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
)
manual_issues.append(
f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
)
else:
check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
# Check the symlink at the command link location
if _cmd_link.is_symlink():
_target = _cmd_link.resolve()
_expected = _venv_bin.resolve()
if _target == _expected:
check_ok(f"{_cmd_link_display}/hermes → correct target")
else:
check_warn(
f"{_cmd_link_display}/hermes points to wrong target",
f"(→ {_target}, expected → {_expected})"
)
if should_fix:
_cmd_link.unlink()
_cmd_link.symlink_to(_venv_bin)
check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
fixed_count += 1
else:
issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
elif _cmd_link.exists():
# It's a regular file, not a symlink — possibly a wrapper script
check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
else:
check_fail(
f"{_cmd_link_display}/hermes not found",
"(hermes command may not work outside the venv)"
)
if should_fix:
_cmd_link_dir.mkdir(parents=True, exist_ok=True)
_cmd_link.symlink_to(_venv_bin)
check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
fixed_count += 1
# Check if the link dir is on PATH
_path_dirs = os.environ.get("PATH", "").split(os.pathsep)
if str(_cmd_link_dir) not in _path_dirs:
check_warn(
f"{_cmd_link_display} is not on your PATH",
"(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
)
manual_issues.append(f"Add {_cmd_link_display} to your PATH")
else:
issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
# =========================================================================
# Check: External tools
# =========================================================================
@ -726,6 +825,7 @@ def run_doctor(args):
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
# MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
@ -733,7 +833,8 @@ def run_doctor(args):
("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
("OpenCode Go", ("OPENCODE_GO_API_KEY",), "https://opencode.ai/zen/go/v1/models", "OPENCODE_GO_BASE_URL", True),
# OpenCode Go has no shared /models endpoint; skip the health check.
("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
]
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
_key = ""
@ -778,6 +879,31 @@ def run_doctor(args):
except Exception as _e:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_e})', Colors.DIM)} ")
# -- AWS Bedrock --
# Bedrock uses the AWS SDK credential chain, not API keys.
try:
from agent.bedrock_adapter import has_aws_credentials, resolve_aws_auth_env_var, resolve_bedrock_region
if has_aws_credentials():
_auth_var = resolve_aws_auth_env_var()
_region = resolve_bedrock_region()
_label = "AWS Bedrock".ljust(20)
print(f" Checking AWS Bedrock...", end="", flush=True)
try:
import boto3
_br_client = boto3.client("bedrock", region_name=_region)
_br_resp = _br_client.list_foundation_models()
_model_count = len(_br_resp.get("modelSummaries", []))
print(f"\r {color('', Colors.GREEN)} {_label} {color(f'({_auth_var}, {_region}, {_model_count} models)', Colors.DIM)} ")
except ImportError:
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'(boto3 not installed — {sys.executable} -m pip install boto3)', Colors.DIM)} ")
issues.append(f"Install boto3 for Bedrock: {sys.executable} -m pip install boto3")
except Exception as _e:
_err_name = type(_e).__name__
print(f"\r {color('', Colors.YELLOW)} {_label} {color(f'({_err_name}: {_e})', Colors.DIM)} ")
issues.append(f"AWS Bedrock: {_err_name} — check IAM permissions for bedrock:ListFoundationModels")
except ImportError:
pass # bedrock_adapter not available — skip silently
# =========================================================================
# Check: Submodules
# =========================================================================

View file

@ -43,41 +43,20 @@ def _redact(value: str) -> str:
def _gateway_status() -> str:
"""Return a short gateway status string."""
if sys.platform.startswith("linux"):
from hermes_constants import is_container
if is_container():
try:
from hermes_cli.gateway import find_gateway_pids
pids = find_gateway_pids()
if pids:
return f"running (docker, pid {pids[0]})"
return "stopped (docker)"
except Exception:
return "stopped (docker)"
try:
from hermes_cli.gateway import get_service_name
svc = get_service_name()
except Exception:
svc = "hermes-gateway"
try:
r = subprocess.run(
["systemctl", "--user", "is-active", svc],
capture_output=True, text=True, timeout=5,
)
return "running (systemd)" if r.stdout.strip() == "active" else "stopped"
except Exception:
return "unknown"
elif sys.platform == "darwin":
try:
from hermes_cli.gateway import get_launchd_label
r = subprocess.run(
["launchctl", "list", get_launchd_label()],
capture_output=True, text=True, timeout=5,
)
return "loaded (launchd)" if r.returncode == 0 else "not loaded"
except Exception:
return "unknown"
return "N/A"
try:
from hermes_cli.gateway import get_gateway_runtime_snapshot
snapshot = get_gateway_runtime_snapshot()
if snapshot.running:
mode = snapshot.manager
if snapshot.has_process_service_mismatch:
mode = "manual"
return f"running ({mode}, pid {snapshot.gateway_pids[0]})"
if snapshot.service_installed and not snapshot.service_running:
return f"stopped ({snapshot.manager})"
return f"stopped ({snapshot.manager})"
except Exception:
return "unknown" if sys.platform.startswith(("linux", "darwin")) else "N/A"
def _count_skills(hermes_home: Path) -> int:
@ -296,6 +275,7 @@ def run_dump(args):
("DEEPSEEK_API_KEY", "deepseek"),
("DASHSCOPE_API_KEY", "dashscope"),
("HF_TOKEN", "huggingface"),
("NVIDIA_API_KEY", "nvidia"),
("AI_GATEWAY_API_KEY", "ai_gateway"),
("OPENCODE_ZEN_API_KEY", "opencode_zen"),
("OPENCODE_GO_API_KEY", "opencode_go"),

View file

@ -8,11 +8,40 @@ from pathlib import Path
from dotenv import load_dotenv
# Env var name suffixes that indicate credential values. These are the
# only env vars whose values we sanitize on load — we must not silently
# alter arbitrary user env vars, but credentials are known to require
# pure ASCII (they become HTTP header values).
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
def _sanitize_loaded_credentials() -> None:
"""Strip non-ASCII characters from credential env vars in os.environ.
Called after dotenv loads so the rest of the codebase never sees
non-ASCII API keys. Only touches env vars whose names end with
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
"""
for key, value in list(os.environ.items()):
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
continue
try:
value.encode("ascii")
except UnicodeEncodeError:
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
try:
load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
except UnicodeDecodeError:
load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
# Strip non-ASCII characters from credential env vars that were just
# loaded. API keys must be pure ASCII since they're sent as HTTP
# header values (httpx encodes headers as ASCII). Non-ASCII chars
# typically come from copy-pasting keys from PDFs or rich-text editors
# that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
_sanitize_loaded_credentials()
def _sanitize_env_file_if_needed(path: Path) -> None:

View file

@ -10,6 +10,7 @@ import shutil
import signal
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@ -41,6 +42,23 @@ from hermes_cli.colors import Colors, color
# Process Management (for manual gateway runs)
# =============================================================================
@dataclass(frozen=True)
class GatewayRuntimeSnapshot:
manager: str
service_installed: bool = False
service_running: bool = False
gateway_pids: tuple[int, ...] = ()
service_scope: str | None = None
@property
def running(self) -> bool:
return self.service_running or bool(self.gateway_pids)
@property
def has_process_service_mismatch(self) -> bool:
return self.service_installed and self.running and not self.service_running
def _get_service_pids() -> set:
"""Return PIDs currently managed by systemd or launchd gateway services.
@ -157,20 +175,22 @@ def _request_gateway_self_restart(pid: int) -> bool:
return True
def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = False) -> list:
"""Find PIDs of running gateway processes.
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
if pid is None or pid <= 0:
return
if pid == os.getpid() or pid in exclude_pids or pid in pids:
return
pids.append(pid)
Args:
exclude_pids: PIDs to exclude from the result (e.g. service-managed
PIDs that should not be killed during a stale-process sweep).
all_profiles: When ``True``, return gateway PIDs across **all**
profiles (the pre-7923 global behaviour). ``hermes update``
needs this because a code update affects every profile.
When ``False`` (default), only PIDs belonging to the current
Hermes profile are returned.
def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> list[int]:
"""Best-effort process-table scan for gateway PIDs.
This supplements the profile-scoped PID file so status views can still spot
a live gateway when the PID file is stale/missing, and ``--all`` sweeps can
discover gateways outside the current profile.
"""
_exclude = exclude_pids or set()
pids = [pid for pid in _get_service_pids() if pid not in _exclude]
pids: list[int] = []
patterns = [
"hermes_cli.main gateway",
"hermes_cli.main --profile",
@ -203,33 +223,39 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
if is_windows():
result = subprocess.run(
["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
capture_output=True, text=True, timeout=10
capture_output=True,
text=True,
timeout=10,
)
if result.returncode != 0:
return []
current_cmd = ""
for line in result.stdout.split('\n'):
for line in result.stdout.split("\n"):
line = line.strip()
if line.startswith("CommandLine="):
current_cmd = line[len("CommandLine="):]
elif line.startswith("ProcessId="):
pid_str = line[len("ProcessId="):]
if any(p in current_cmd for p in patterns) and (all_profiles or _matches_current_profile(current_cmd)):
if any(p in current_cmd for p in patterns) and (
all_profiles or _matches_current_profile(current_cmd)
):
try:
pid = int(pid_str)
if pid != os.getpid() and pid not in pids and pid not in _exclude:
pids.append(pid)
_append_unique_pid(pids, int(pid_str), exclude_pids)
except ValueError:
pass
current_cmd = ""
else:
result = subprocess.run(
["ps", "eww", "-ax", "-o", "pid=,command="],
["ps", "-A", "eww", "-o", "pid=,command="],
capture_output=True,
text=True,
timeout=10,
)
for line in result.stdout.split('\n'):
if result.returncode != 0:
return []
for line in result.stdout.split("\n"):
stripped = line.strip()
if not stripped or 'grep' in stripped:
if not stripped or "grep" in stripped:
continue
pid = None
@ -251,16 +277,137 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
if pid is None:
continue
if pid == os.getpid() or pid in pids or pid in _exclude:
continue
if any(pattern in command for pattern in patterns) and (all_profiles or _matches_current_profile(command)):
pids.append(pid)
if any(pattern in command for pattern in patterns) and (
all_profiles or _matches_current_profile(command)
):
_append_unique_pid(pids, pid, exclude_pids)
except (OSError, subprocess.TimeoutExpired):
pass
return []
return pids
def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = False) -> list:
"""Find PIDs of running gateway processes.
Args:
exclude_pids: PIDs to exclude from the result (e.g. service-managed
PIDs that should not be killed during a stale-process sweep).
all_profiles: When ``True``, return gateway PIDs across **all**
profiles (the pre-7923 global behaviour). ``hermes update``
needs this because a code update affects every profile.
When ``False`` (default), only PIDs belonging to the current
Hermes profile are returned.
"""
_exclude = set(exclude_pids or set())
pids: list[int] = []
if not all_profiles:
try:
from gateway.status import get_running_pid
_append_unique_pid(pids, get_running_pid(), _exclude)
except Exception:
pass
for pid in _get_service_pids():
_append_unique_pid(pids, pid, _exclude)
for pid in _scan_gateway_pids(_exclude, all_profiles=all_profiles):
_append_unique_pid(pids, pid, _exclude)
return pids
def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
selected_system = _select_systemd_scope(system)
unit_exists = get_systemd_unit_path(system=selected_system).exists()
if not unit_exists:
return selected_system, False
try:
result = _run_systemctl(
["is-active", get_service_name()],
system=selected_system,
capture_output=True,
text=True,
timeout=10,
)
except (RuntimeError, subprocess.TimeoutExpired):
return selected_system, False
return selected_system, result.stdout.strip() == "active"
def _probe_launchd_service_running() -> bool:
if not get_launchd_plist_path().exists():
return False
try:
result = subprocess.run(
["launchctl", "list", get_launchd_label()],
capture_output=True,
text=True,
timeout=10,
)
except subprocess.TimeoutExpired:
return False
return result.returncode == 0
def get_gateway_runtime_snapshot(system: bool = False) -> GatewayRuntimeSnapshot:
"""Return a unified view of gateway liveness for the current profile."""
gateway_pids = tuple(find_gateway_pids())
if is_termux():
return GatewayRuntimeSnapshot(
manager="Termux / manual process",
gateway_pids=gateway_pids,
)
from hermes_constants import is_container
if is_linux() and is_container():
return GatewayRuntimeSnapshot(
manager="docker (foreground)",
gateway_pids=gateway_pids,
)
if supports_systemd_services():
selected_system, service_running = _probe_systemd_service_running(system=system)
scope_label = _service_scope_label(selected_system)
return GatewayRuntimeSnapshot(
manager=f"systemd ({scope_label})",
service_installed=get_systemd_unit_path(system=selected_system).exists(),
service_running=service_running,
gateway_pids=gateway_pids,
service_scope=scope_label,
)
if is_macos():
return GatewayRuntimeSnapshot(
manager="launchd",
service_installed=get_launchd_plist_path().exists(),
service_running=_probe_launchd_service_running(),
gateway_pids=gateway_pids,
service_scope="launchd",
)
return GatewayRuntimeSnapshot(
manager="manual process",
gateway_pids=gateway_pids,
)
def _format_gateway_pids(pids: tuple[int, ...] | list[int], *, limit: int | None = 3) -> str:
rendered = [str(pid) for pid in pids[:limit] if pid > 0] if limit is not None else [str(pid) for pid in pids if pid > 0]
if limit is not None and len(pids) > limit:
rendered.append("...")
return ", ".join(rendered)
def _print_gateway_process_mismatch(snapshot: GatewayRuntimeSnapshot) -> None:
if not snapshot.has_process_service_mismatch:
return
print()
print("⚠ Gateway process is running for this profile, but the service is not active")
print(f" PID(s): {_format_gateway_pids(snapshot.gateway_pids, limit=None)}")
print(" This is usually a manual foreground/tmux/nohup run, so `hermes gateway`")
print(" can refuse to start another copy until this process stops.")
def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None,
all_profiles: bool = False) -> int:
"""Kill any running gateway processes. Returns count killed.
@ -340,25 +487,44 @@ def _wsl_systemd_operational() -> bool:
WSL2 with ``systemd=true`` in wsl.conf has working systemd.
WSL2 without it (or WSL1) does not systemctl commands fail.
"""
return _systemd_operational(system=True)
def _systemd_operational(system: bool = False) -> bool:
"""Return True when the requested systemd scope is usable."""
try:
result = subprocess.run(
["systemctl", "is-system-running"],
capture_output=True, text=True, timeout=5,
result = _run_systemctl(
["is-system-running"],
system=system,
capture_output=True,
text=True,
timeout=5,
)
# "running", "degraded", "starting" all mean systemd is PID 1
status = result.stdout.strip().lower()
return status in ("running", "degraded", "starting", "initializing")
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
except (RuntimeError, subprocess.TimeoutExpired, OSError):
return False
def _container_systemd_operational() -> bool:
"""Return True when a container exposes working user or system systemd."""
if _systemd_operational(system=False):
return True
if _systemd_operational(system=True):
return True
return False
def supports_systemd_services() -> bool:
if not is_linux() or is_termux() or is_container():
if not is_linux() or is_termux():
return False
if shutil.which("systemctl") is None:
return False
if is_wsl():
return _wsl_systemd_operational()
if is_container():
return _container_systemd_operational()
return True
@ -521,6 +687,195 @@ def has_conflicting_systemd_units() -> bool:
return len(get_installed_systemd_scopes()) > 1
# Legacy service names from older Hermes installs that predate the
# hermes-gateway rename. Kept as an explicit allowlist (NOT a glob) so
# profile units (hermes-gateway-*.service) and unrelated third-party
# "hermes" units are never matched.
_LEGACY_SERVICE_NAMES: tuple[str, ...] = ("hermes.service",)
# ExecStart content markers that identify a unit as running our gateway.
# A legacy unit is only flagged when its file contains one of these.
_LEGACY_UNIT_EXECSTART_MARKERS: tuple[str, ...] = (
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"gateway/run.py",
" hermes gateway ",
"/hermes gateway ",
)
def _legacy_unit_search_paths() -> list[tuple[bool, Path]]:
"""Return ``[(is_system, base_dir), ...]`` — directories to scan for legacy units.
Factored out so tests can monkeypatch the search roots without touching
real filesystem paths.
"""
return [
(False, Path.home() / ".config" / "systemd" / "user"),
(True, Path("/etc/systemd/system")),
]
def _find_legacy_hermes_units() -> list[tuple[str, Path, bool]]:
"""Return ``[(unit_name, unit_path, is_system)]`` for legacy Hermes gateway units.
Detects unit files installed by older Hermes versions that used a
different service name (e.g. ``hermes.service`` before the rename to
``hermes-gateway.service``). When both a legacy unit and the current
``hermes-gateway.service`` are active, they fight over the same bot
token the PR #5646 signal-recovery change turns this into a 30-second
SIGTERM flap loop.
Safety guards:
* Explicit allowlist of legacy names (no globbing). Profile units such
as ``hermes-gateway-coder.service`` and unrelated third-party
``hermes-*`` services are never matched.
* ExecStart content check only flag units that invoke our gateway
entrypoint. A user-created ``hermes.service`` running an unrelated
binary is left untouched.
* Results are returned purely for caller inspection; this function
never mutates or removes anything.
"""
results: list[tuple[str, Path, bool]] = []
for is_system, base in _legacy_unit_search_paths():
for name in _LEGACY_SERVICE_NAMES:
unit_path = base / name
try:
if not unit_path.exists():
continue
text = unit_path.read_text(encoding="utf-8", errors="ignore")
except (OSError, PermissionError):
continue
if not any(marker in text for marker in _LEGACY_UNIT_EXECSTART_MARKERS):
# Not our gateway — leave alone
continue
results.append((name, unit_path, is_system))
return results
def has_legacy_hermes_units() -> bool:
"""Return True when any legacy Hermes gateway unit files exist."""
return bool(_find_legacy_hermes_units())
def print_legacy_unit_warning() -> None:
"""Warn about legacy Hermes gateway unit files if any are installed.
Idempotent: prints nothing when no legacy units are detected. Safe to
call from any status/install/setup path.
"""
legacy = _find_legacy_hermes_units()
if not legacy:
return
print_warning("Legacy Hermes gateway unit(s) detected from an older install:")
for name, path, is_system in legacy:
scope = "system" if is_system else "user"
print_info(f" {path} ({scope} scope)")
print_info(" These run alongside the current hermes-gateway service and")
print_info(" cause SIGTERM flap loops — both try to use the same bot token.")
print_info(" Remove them with:")
print_info(" hermes gateway migrate-legacy")
def remove_legacy_hermes_units(
interactive: bool = True,
dry_run: bool = False,
) -> tuple[int, list[Path]]:
"""Stop, disable, and remove legacy Hermes gateway unit files.
Iterates over whatever ``_find_legacy_hermes_units()`` returns which is
an explicit allowlist of legacy names (not a glob). Profile units and
unrelated third-party services are never touched.
Args:
interactive: When True, prompt before removing. When False, remove
without asking (used when another prompt has already confirmed,
e.g. from the install flow).
dry_run: When True, list what would be removed and return.
Returns:
``(removed_count, remaining_paths)`` remaining includes units we
couldn't remove (typically system-scope when not running as root).
"""
legacy = _find_legacy_hermes_units()
if not legacy:
print("No legacy Hermes gateway units found.")
return 0, []
user_units = [(n, p) for n, p, is_sys in legacy if not is_sys]
system_units = [(n, p) for n, p, is_sys in legacy if is_sys]
print()
print("Legacy Hermes gateway unit(s) found:")
for name, path, is_system in legacy:
scope = "system" if is_system else "user"
print(f" {path} ({scope} scope)")
print()
if dry_run:
print("(dry-run — nothing removed)")
return 0, [p for _, p, _ in legacy]
if interactive and not prompt_yes_no("Remove these legacy units?", True):
print("Skipped. Run again with: hermes gateway migrate-legacy")
return 0, [p for _, p, _ in legacy]
removed = 0
remaining: list[Path] = []
# User-scope removal
for name, path in user_units:
try:
_run_systemctl(["stop", name], system=False, check=False, timeout=90)
_run_systemctl(["disable", name], system=False, check=False, timeout=30)
path.unlink(missing_ok=True)
print(f" ✓ Removed {path}")
removed += 1
except (OSError, RuntimeError) as e:
print(f" ⚠ Could not remove {path}: {e}")
remaining.append(path)
if user_units:
try:
_run_systemctl(["daemon-reload"], system=False, check=False, timeout=30)
except RuntimeError:
pass
# System-scope removal (needs root)
if system_units:
if os.geteuid() != 0:
print()
print_warning("System-scope legacy units require root to remove.")
print_info(" Re-run with: sudo hermes gateway migrate-legacy")
for _, path in system_units:
remaining.append(path)
else:
for name, path in system_units:
try:
_run_systemctl(["stop", name], system=True, check=False, timeout=90)
_run_systemctl(["disable", name], system=True, check=False, timeout=30)
path.unlink(missing_ok=True)
print(f" ✓ Removed {path}")
removed += 1
except (OSError, RuntimeError) as e:
print(f" ⚠ Could not remove {path}: {e}")
remaining.append(path)
try:
_run_systemctl(["daemon-reload"], system=True, check=False, timeout=30)
except RuntimeError:
pass
print()
if remaining:
print_warning(f"{len(remaining)} legacy unit(s) still present — see messages above.")
else:
print_success(f"Removed {removed} legacy unit(s).")
return removed, remaining
def print_systemd_scope_conflict_warning() -> None:
scopes = get_installed_systemd_scopes()
if len(scopes) < 2:
@ -715,7 +1070,9 @@ def _detect_venv_dir() -> Path | None:
"""Detect the active virtualenv directory.
Checks ``sys.prefix`` first (works regardless of the directory name),
then falls back to probing common directory names under PROJECT_ROOT.
then ``VIRTUAL_ENV`` env var (covers uv-managed environments where
sys.prefix == sys.base_prefix), then falls back to probing common
directory names under PROJECT_ROOT.
Returns ``None`` when no virtualenv can be found.
"""
# If we're running inside a virtualenv, sys.prefix points to it.
@ -724,6 +1081,15 @@ def _detect_venv_dir() -> Path | None:
if venv.is_dir():
return venv
# uv and some other tools set VIRTUAL_ENV without changing sys.prefix.
# This catches `uv run` where sys.prefix == sys.base_prefix but the
# environment IS a venv. (#8620)
_virtual_env = os.environ.get("VIRTUAL_ENV")
if _virtual_env:
venv = Path(_virtual_env)
if venv.is_dir():
return venv
# Fallback: check common virtualenv directory names under the project root.
for candidate in (".venv", "venv"):
venv = PROJECT_ROOT / candidate
@ -1043,6 +1409,19 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
if system:
_require_root_for_system_service("install")
# Offer to remove legacy units (hermes.service from pre-rename installs)
# before installing the new hermes-gateway.service. If both remain, they
# flap-fight for the Telegram bot token on every gateway startup.
# Only removes units matching _LEGACY_SERVICE_NAMES + our ExecStart
# signature — profile units are never touched.
if has_legacy_hermes_units():
print()
print_legacy_unit_warning()
print()
if prompt_yes_no("Remove the legacy unit(s) before installing?", True):
remove_legacy_hermes_units(interactive=False)
print()
unit_path = get_systemd_unit_path(system=system)
scope_flag = " --system" if system else ""
@ -1081,6 +1460,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
_ensure_linger_enabled()
print_systemd_scope_conflict_warning()
print_legacy_unit_warning()
def systemd_uninstall(system: bool = False):
@ -1204,6 +1584,10 @@ def systemd_status(deep: bool = False, system: bool = False):
print_systemd_scope_conflict_warning()
print()
if has_legacy_hermes_units():
print_legacy_unit_warning()
print()
if not systemd_unit_is_current(system=system):
print("⚠ Installed gateway service definition is outdated")
print(f" Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag} # auto-refreshes the unit")
@ -1987,7 +2371,7 @@ _PLATFORMS = [
{"name": "QQ_ALLOWED_USERS", "prompt": "Allowed user OpenIDs (comma-separated, leave empty for open access)", "password": False,
"is_allowlist": True,
"help": "Optional — restrict DM access to specific user OpenIDs."},
{"name": "QQ_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
{"name": "QQBOT_HOME_CHANNEL", "prompt": "Home channel (user/group OpenID for cron delivery, or empty)", "password": False,
"help": "OpenID to deliver cron results and notifications to."},
],
},
@ -2200,9 +2584,62 @@ def _setup_sms():
def _setup_dingtalk():
"""Configure DingTalk via the standard platform setup."""
"""Configure DingTalk — QR scan (recommended) or manual credential entry."""
from hermes_cli.setup import (
prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
)
dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
_setup_standard_platform(dingtalk_platform)
emoji = dingtalk_platform["emoji"]
label = dingtalk_platform["label"]
print()
print(color(f" ─── {emoji} {label} Setup ───", Colors.CYAN))
existing = get_env_value("DINGTALK_CLIENT_ID")
if existing:
print()
print_success(f"{label} is already configured (Client ID: {existing}).")
if not prompt_yes_no(f" Reconfigure {label}?", False):
return
print()
method = prompt_choice(
" Choose setup method",
[
"QR Code Scan (Recommended, auto-obtain Client ID and Client Secret)",
"Manual Input (Client ID and Client Secret)",
],
default=0,
)
if method == 0:
# ── QR-code device-flow authorization ──
try:
from hermes_cli.dingtalk_auth import dingtalk_qr_auth
except ImportError as exc:
print_warning(f" QR auth module failed to load ({exc}), falling back to manual input.")
_setup_standard_platform(dingtalk_platform)
return
result = dingtalk_qr_auth()
if result is None:
print_warning(" QR auth incomplete, falling back to manual input.")
_setup_standard_platform(dingtalk_platform)
return
client_id, client_secret = result
save_env_value("DINGTALK_CLIENT_ID", client_id)
save_env_value("DINGTALK_CLIENT_SECRET", client_secret)
save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
print()
print_success(f"{emoji} {label} configured via QR scan!")
else:
# ── Manual entry ──
_setup_standard_platform(dingtalk_platform)
# Also enable allow-all by default for convenience
if get_env_value("DINGTALK_CLIENT_ID"):
save_env_value("DINGTALK_ALLOW_ALL_USERS", "true")
def _setup_wecom():
@ -2561,6 +2998,215 @@ def _setup_feishu():
print_info(f" Bot: {bot_name}")
def _setup_qqbot():
"""Interactive setup for QQ Bot — scan-to-configure or manual credentials."""
print()
print(color(" ─── 🐧 QQ Bot Setup ───", Colors.CYAN))
existing_app_id = get_env_value("QQ_APP_ID")
existing_secret = get_env_value("QQ_CLIENT_SECRET")
if existing_app_id and existing_secret:
print()
print_success("QQ Bot is already configured.")
if not prompt_yes_no(" Reconfigure QQ Bot?", False):
return
# ── Choose setup method ──
print()
method_choices = [
"Scan QR code to add bot automatically (recommended)",
"Enter existing App ID and App Secret manually",
]
method_idx = prompt_choice(" How would you like to set up QQ Bot?", method_choices, 0)
credentials = None
used_qr = False
if method_idx == 0:
# ── QR scan-to-configure ──
try:
credentials = _qqbot_qr_flow()
except KeyboardInterrupt:
print()
print_warning(" QQ Bot setup cancelled.")
return
if credentials:
used_qr = True
if not credentials:
print_info(" QR setup did not complete. Continuing with manual input.")
# ── Manual credential input ──
if not credentials:
print()
print_info(" Go to https://q.qq.com to register a QQ Bot application.")
print_info(" Note your App ID and App Secret from the application page.")
print()
app_id = prompt(" App ID", password=False)
if not app_id:
print_warning(" Skipped — QQ Bot won't work without an App ID.")
return
app_secret = prompt(" App Secret", password=True)
if not app_secret:
print_warning(" Skipped — QQ Bot won't work without an App Secret.")
return
credentials = {"app_id": app_id.strip(), "client_secret": app_secret.strip(), "user_openid": ""}
# ── Save core credentials ──
save_env_value("QQ_APP_ID", credentials["app_id"])
save_env_value("QQ_CLIENT_SECRET", credentials["client_secret"])
user_openid = credentials.get("user_openid", "")
# ── DM security policy ──
print()
access_choices = [
"Use DM pairing approval (recommended)",
"Allow all direct messages",
"Only allow listed user OpenIDs",
]
access_idx = prompt_choice(" How should direct messages be authorized?", access_choices, 0)
if access_idx == 0:
save_env_value("QQ_ALLOW_ALL_USERS", "false")
if user_openid:
print()
if prompt_yes_no(f" Add yourself ({user_openid}) to the allow list?", True):
save_env_value("QQ_ALLOWED_USERS", user_openid)
print_success(f" Allow list set to {user_openid}")
else:
save_env_value("QQ_ALLOWED_USERS", "")
else:
save_env_value("QQ_ALLOWED_USERS", "")
print_success(" DM pairing enabled.")
print_info(" Unknown users can request access; approve with `hermes pairing approve`.")
elif access_idx == 1:
save_env_value("QQ_ALLOW_ALL_USERS", "true")
save_env_value("QQ_ALLOWED_USERS", "")
print_warning(" Open DM access enabled for QQ Bot.")
else:
default_allow = user_openid or ""
allowlist = prompt(" Allowed user OpenIDs (comma-separated)", default_allow, password=False).replace(" ", "")
save_env_value("QQ_ALLOW_ALL_USERS", "false")
save_env_value("QQ_ALLOWED_USERS", allowlist)
print_success(" Allowlist saved.")
# ── Home channel ──
if user_openid:
print()
if prompt_yes_no(f" Use your QQ user ID ({user_openid}) as the home channel?", True):
save_env_value("QQBOT_HOME_CHANNEL", user_openid)
print_success(f" Home channel set to {user_openid}")
else:
print()
home_channel = prompt(" Home channel OpenID (for cron/notifications, or empty)", password=False)
if home_channel:
save_env_value("QQBOT_HOME_CHANNEL", home_channel.strip())
print_success(f" Home channel set to {home_channel.strip()}")
print()
print_success("🐧 QQ Bot configured!")
print_info(f" App ID: {credentials['app_id']}")
def _qqbot_render_qr(url: str) -> bool:
"""Try to render a QR code in the terminal. Returns True if successful."""
try:
import qrcode as _qr
qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
qr.add_data(url)
qr.make(fit=True)
qr.print_ascii(invert=True)
return True
except Exception:
return False
def _qqbot_qr_flow():
"""Run the QR-code scan-to-configure flow.
Returns a dict with app_id, client_secret, user_openid on success,
or None on failure/cancel.
"""
try:
from gateway.platforms.qqbot import (
create_bind_task, poll_bind_result, build_connect_url,
decrypt_secret, BindStatus,
)
from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
except Exception as exc:
print_error(f" QQBot onboard import failed: {exc}")
return None
import asyncio
import time
MAX_REFRESHES = 3
refresh_count = 0
while refresh_count <= MAX_REFRESHES:
loop = asyncio.new_event_loop()
# ── Create bind task ──
try:
task_id, aes_key = loop.run_until_complete(create_bind_task())
except Exception as e:
print_warning(f" Failed to create bind task: {e}")
loop.close()
return None
url = build_connect_url(task_id)
# ── Display QR code + URL ──
print()
if _qqbot_render_qr(url):
print(f" Scan the QR code above, or open this URL directly:\n {url}")
else:
print(f" Open this URL in QQ on your phone:\n {url}")
print_info(" Tip: pip install qrcode to show a scannable QR code here")
# ── Poll loop (silent — keep QR visible at bottom) ──
try:
while True:
try:
status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
poll_bind_result(task_id)
)
except Exception:
time.sleep(ONBOARD_POLL_INTERVAL)
continue
if status == BindStatus.COMPLETED:
client_secret = decrypt_secret(encrypted_secret, aes_key)
print()
print_success(f" QR scan complete! (App ID: {app_id})")
if user_openid:
print_info(f" Scanner's OpenID: {user_openid}")
return {
"app_id": app_id,
"client_secret": client_secret,
"user_openid": user_openid,
}
if status == BindStatus.EXPIRED:
refresh_count += 1
if refresh_count > MAX_REFRESHES:
print()
print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.")
return None
print()
print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
loop.close()
break # outer while creates a new task
time.sleep(ONBOARD_POLL_INTERVAL)
except KeyboardInterrupt:
loop.close()
raise
finally:
loop.close()
return None
def _setup_signal():
"""Interactive setup for Signal messenger."""
import shutil
@ -2698,6 +3344,10 @@ def gateway_setup():
print_systemd_scope_conflict_warning()
print()
if supports_systemd_services() and has_legacy_hermes_units():
print_legacy_unit_warning()
print()
if service_installed and service_running:
print_success("Gateway service is installed and running.")
elif service_installed:
@ -2738,8 +3388,12 @@ def gateway_setup():
_setup_signal()
elif platform["key"] == "weixin":
_setup_weixin()
elif platform["key"] == "dingtalk":
_setup_dingtalk()
elif platform["key"] == "feishu":
_setup_feishu()
elif platform["key"] == "qqbot":
_setup_qqbot()
else:
_setup_standard_platform(platform)
@ -2919,6 +3573,15 @@ def gateway_command(args):
elif subcmd == "start":
system = getattr(args, 'system', False)
start_all = getattr(args, 'all', False)
if start_all:
# Kill all stale gateway processes across all profiles before starting
killed = kill_gateway_processes(all_profiles=True)
if killed:
print(f"✓ Killed {killed} stale gateway process(es) across all profiles")
_wait_for_gateway_exit(timeout=10.0, force_after=5.0)
if is_termux():
print("Gateway service start is not supported on Termux because there is no system service manager.")
print("Run manually: hermes gateway")
@ -3004,7 +3667,39 @@ def gateway_command(args):
# Try service first, fall back to killing and restarting
service_available = False
system = getattr(args, 'system', False)
restart_all = getattr(args, 'all', False)
service_configured = False
if restart_all:
# --all: stop every gateway process across all profiles, then start fresh
service_stopped = False
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
try:
systemd_stop(system=system)
service_stopped = True
except subprocess.CalledProcessError:
pass
elif is_macos() and get_launchd_plist_path().exists():
try:
launchd_stop()
service_stopped = True
except subprocess.CalledProcessError:
pass
killed = kill_gateway_processes(all_profiles=True)
total = killed + (1 if service_stopped else 0)
if total:
print(f"✓ Stopped {total} gateway process(es) across all profiles")
_wait_for_gateway_exit(timeout=10.0, force_after=5.0)
# Start the current profile's service fresh
print("Starting gateway...")
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
systemd_start(system=system)
elif is_macos() and get_launchd_plist_path().exists():
launchd_start()
else:
run_gateway(verbose=0)
return
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
service_configured = True
@ -3058,15 +3753,18 @@ def gateway_command(args):
elif subcmd == "status":
deep = getattr(args, 'deep', False)
system = getattr(args, 'system', False)
snapshot = get_gateway_runtime_snapshot(system=system)
# Check for service first
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
systemd_status(deep, system=system)
_print_gateway_process_mismatch(snapshot)
elif is_macos() and get_launchd_plist_path().exists():
launchd_status(deep)
_print_gateway_process_mismatch(snapshot)
else:
# Check for manually running processes
pids = find_gateway_pids()
pids = list(snapshot.gateway_pids)
if pids:
print(f"✓ Gateway is running (PID: {', '.join(map(str, pids))})")
print(" (Running manually, not as a system service)")
@ -3107,3 +3805,14 @@ def gateway_command(args):
else:
print(" hermes gateway install # Install as user service")
print(" sudo hermes gateway install --system # Install as boot-time system service")
elif subcmd == "migrate-legacy":
# Stop, disable, and remove legacy Hermes gateway unit files from
# pre-rename installs (e.g. hermes.service). Profile units and
# unrelated third-party services are never touched.
dry_run = getattr(args, 'dry_run', False)
yes = getattr(args, 'yes', False)
if not supports_systemd_services() and not is_macos():
print("Legacy unit migration only applies to systemd-based Linux hosts.")
return
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)

File diff suppressed because it is too large Load diff

View file

@ -279,8 +279,8 @@ def cmd_mcp_add(args):
_info(f"Starting OAuth flow for '{name}'...")
oauth_ok = False
try:
from tools.mcp_oauth import build_oauth_auth
oauth_auth = build_oauth_auth(name, url)
from tools.mcp_oauth_manager import get_manager
oauth_auth = get_manager().get_or_build_provider(name, url, None)
if oauth_auth:
server_config["auth"] = "oauth"
_success("OAuth configured (tokens will be acquired on first connection)")
@ -428,10 +428,12 @@ def cmd_mcp_remove(args):
_remove_mcp_server(name)
_success(f"Removed '{name}' from config")
# Clean up OAuth tokens if they exist
# Clean up OAuth tokens if they exist — route through MCPOAuthManager so
# any provider instance cached in the current process (e.g. from an
# earlier `hermes mcp test` in the same session) is evicted too.
try:
from tools.mcp_oauth import remove_oauth_tokens
remove_oauth_tokens(name)
from tools.mcp_oauth_manager import get_manager
get_manager().remove(name)
_success("Cleaned up OAuth tokens")
except Exception:
pass
@ -577,6 +579,63 @@ def _interpolate_value(value: str) -> str:
return re.sub(r"\$\{(\w+)\}", _replace, value)
# ─── hermes mcp login ────────────────────────────────────────────────────────
def cmd_mcp_login(args):
"""Force re-authentication for an OAuth-based MCP server.
Deletes cached tokens (both on disk and in the running process's
MCPOAuthManager cache) and triggers a fresh OAuth flow via the
existing probe path.
Use this when:
- Tokens are stuck in a bad state (server revoked, refresh token
consumed by an external process, etc.)
- You want to re-authenticate to change scopes or account
- A tool call returned ``needs_reauth: true``
"""
name = args.name
servers = _get_mcp_servers()
if name not in servers:
_error(f"Server '{name}' not found in config.")
if servers:
_info(f"Available servers: {', '.join(servers)}")
return
server_config = servers[name]
url = server_config.get("url")
if not url:
_error(f"Server '{name}' has no URL — not an OAuth-capable server")
return
if server_config.get("auth") != "oauth":
_error(f"Server '{name}' is not configured for OAuth (auth={server_config.get('auth')})")
_info("Use `hermes mcp remove` + `hermes mcp add` to reconfigure auth.")
return
# Wipe both disk and in-memory cache so the next probe forces a fresh
# OAuth flow.
try:
from tools.mcp_oauth_manager import get_manager
mgr = get_manager()
mgr.remove(name)
except Exception as exc:
_warning(f"Could not clear existing OAuth state: {exc}")
print()
_info(f"Starting OAuth flow for '{name}'...")
# Probe triggers the OAuth flow (browser redirect + callback capture).
try:
tools = _probe_single_server(name, server_config)
if tools:
_success(f"Authenticated — {len(tools)} tool(s) available")
else:
_success("Authenticated (server reported no tools)")
except Exception as exc:
_error(f"Authentication failed: {exc}")
# ─── hermes mcp configure ────────────────────────────────────────────────────
def cmd_mcp_configure(args):
@ -696,6 +755,7 @@ def mcp_command(args):
"test": cmd_mcp_test,
"configure": cmd_mcp_configure,
"config": cmd_mcp_configure,
"login": cmd_mcp_login,
}
handler = handlers.get(action)
@ -713,4 +773,5 @@ def mcp_command(args):
_info("hermes mcp list List servers")
_info("hermes mcp test <name> Test connection")
_info("hermes mcp configure <name> Toggle tools")
_info("hermes mcp login <name> Re-authenticate OAuth")
print()

View file

@ -58,9 +58,11 @@ def _prompt(label: str, default: str | None = None, secret: bool = False) -> str
def _install_dependencies(provider_name: str) -> None:
"""Install pip dependencies declared in plugin.yaml."""
import subprocess
from pathlib import Path as _Path
from plugins.memory import find_provider_dir
plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name
plugin_dir = find_provider_dir(provider_name)
if not plugin_dir:
return
yaml_path = plugin_dir / "plugin.yaml"
if not yaml_path.exists():
return

View file

@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"ollama-cloud",
"custom",
})
@ -373,7 +374,26 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
return bare
return _dots_to_hyphens(bare)
# --- Copilot: strip matching provider prefix, keep dots ---
# --- Copilot / Copilot ACP: delegate to the Copilot-specific
# normalizer. It knows about the alias table (vendor-prefix
# stripping for Anthropic/OpenAI, dash-to-dot repair for Claude)
# and live-catalog lookups. Without this, vendor-prefixed or
# dash-notation Claude IDs survive to the Copilot API and hit
# HTTP 400 "model_not_supported". See issue #6879.
if provider in {"copilot", "copilot-acp"}:
try:
from hermes_cli.models import normalize_copilot_model_id
normalized = normalize_copilot_model_id(name)
if normalized:
return normalized
except Exception:
# Fall through to the generic strip-vendor behaviour below
# if the Copilot-specific path is unavailable for any reason.
pass
# --- Copilot / Copilot ACP / openai-codex fallback:
# strip matching provider prefix, keep dots ---
if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
stripped = _strip_matching_provider_prefix(name, provider)
if stripped == name and name.startswith("openai/"):

View file

@ -274,6 +274,11 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
is_global = False
explicit_provider = ""
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
# A single Unicode dash before a flag keyword becomes "--"
import re as _re
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
# Extract --global
if "--global" in raw_args:
is_global = True
@ -452,6 +457,7 @@ def switch_model(
ModelSwitchResult with all information the caller needs.
"""
from hermes_cli.models import (
copilot_model_api_mode,
detect_provider_for_model,
validate_requested_model,
opencode_model_api_mode,
@ -686,12 +692,12 @@ def switch_model(
api_key=api_key,
base_url=base_url,
)
except Exception:
except Exception as e:
validation = {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": None,
"message": f"Could not validate `{new_model}`: {e}",
}
if not validation.get("accepted"):
@ -709,14 +715,34 @@ def switch_model(
if validation.get("corrected_model"):
new_model = validation["corrected_model"]
# --- Copilot api_mode override ---
if target_provider in {"copilot", "github-copilot"}:
api_mode = copilot_model_api_mode(new_model, api_key=api_key)
# --- OpenCode api_mode override ---
if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}:
if target_provider in {"opencode-zen", "opencode-go", "opencode"}:
api_mode = opencode_model_api_mode(target_provider, new_model)
# --- Determine api_mode if not already set ---
if not api_mode:
api_mode = determine_api_mode(target_provider, base_url)
# OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
# Anthropic SDK prepends its own /v1/messages to the base_url. Strip the
# trailing /v1 so the SDK constructs the correct path (e.g.
# https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
# Mirrors the same logic in hermes_cli.runtime_provider.resolve_runtime_provider;
# without it, /model switches into an anthropic_messages-routed OpenCode
# model (e.g. `/model minimax-m2.7` on opencode-go, `/model claude-sonnet-4-6`
# on opencode-zen) hit a double /v1 and returned OpenCode's website 404 page.
if (
api_mode == "anthropic_messages"
and target_provider in {"opencode-zen", "opencode-go"}
and isinstance(base_url, str)
and base_url
):
base_url = re.sub(r"/v1/?$", "", base_url)
# --- Get capabilities (legacy) ---
capabilities = get_model_capabilities(target_provider, new_model)
@ -786,7 +812,8 @@ def list_authenticated_providers(
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
results: List[dict] = []
seen_slugs: set = set()
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
seen_mdev_ids: set = set() # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
data = fetch_models_dev()
@ -796,9 +823,18 @@ def list_authenticated_providers(
# "nous" shares OpenRouter's curated list if not separately defined
if "nous" not in curated:
curated["nous"] = curated["openrouter"]
# Ollama Cloud uses dynamic discovery (no static curated list)
if "ollama-cloud" not in curated:
from hermes_cli.models import fetch_ollama_cloud_models
curated["ollama-cloud"] = fetch_ollama_cloud_models()
# --- 1. Check Hermes-mapped providers ---
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
# Skip aliases that map to the same models.dev provider (e.g.
# kimi-coding and kimi-coding-cn both → kimi-for-coding).
# The first one with valid credentials wins (#10526).
if mdev_id in seen_mdev_ids:
continue
pdata = data.get(mdev_id)
if not isinstance(pdata, dict):
continue
@ -837,7 +873,8 @@ def list_authenticated_providers(
"total_models": total,
"source": "built-in",
})
seen_slugs.add(slug)
seen_slugs.add(slug.lower())
seen_mdev_ids.add(mdev_id)
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
from hermes_cli.providers import HERMES_OVERLAYS
@ -849,12 +886,12 @@ def list_authenticated_providers(
_mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
for pid, overlay in HERMES_OVERLAYS.items():
if pid in seen_slugs:
if pid.lower() in seen_slugs:
continue
# Resolve Hermes slug — e.g. "github-copilot" → "copilot"
hermes_slug = _mdev_to_hermes.get(pid, pid)
if hermes_slug in seen_slugs:
if hermes_slug.lower() in seen_slugs:
continue
# Check if credentials exist
@ -935,8 +972,8 @@ def list_authenticated_providers(
"total_models": total,
"source": "hermes",
})
seen_slugs.add(pid)
seen_slugs.add(hermes_slug)
seen_slugs.add(pid.lower())
seen_slugs.add(hermes_slug.lower())
# --- 2b. Cross-check canonical provider list ---
# Catches providers that are in CANONICAL_PROVIDERS but weren't found
@ -948,7 +985,7 @@ def list_authenticated_providers(
_canon_provs = []
for _cp in _canon_provs:
if _cp.slug in seen_slugs:
if _cp.slug.lower() in seen_slugs:
continue
# Check credentials via PROVIDER_REGISTRY (auth.py)
@ -995,7 +1032,7 @@ def list_authenticated_providers(
"total_models": _cp_total,
"source": "canonical",
})
seen_slugs.add(_cp.slug)
seen_slugs.add(_cp.slug.lower())
# --- 3. User-defined endpoints from config ---
if user_providers and isinstance(user_providers, dict):
@ -1068,7 +1105,7 @@ def list_authenticated_providers(
groups[slug]["models"].append(default_model)
for slug, grp in groups.items():
if slug in seen_slugs:
if slug.lower() in seen_slugs:
continue
results.append({
"slug": slug,
@ -1080,11 +1117,9 @@ def list_authenticated_providers(
"source": "user-config",
"api_url": grp["api_url"],
})
seen_slugs.add(slug)
seen_slugs.add(slug.lower())
# Sort: current provider first, then by model count descending
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
return results

View file

@ -11,7 +11,9 @@ import json
import os
import urllib.request
import urllib.error
import time
from difflib import get_close_matches
from pathlib import Path
from typing import Any, NamedTuple, Optional
COPILOT_BASE_URL = "https://api.githubcopilot.com"
@ -24,7 +26,9 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("anthropic/claude-opus-4.6", "recommended"),
("moonshotai/kimi-k2.5", "recommended"),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-sonnet-4.6", ""),
("qwen/qwen3.6-plus", ""),
("anthropic/claude-sonnet-4.5", ""),
@ -46,7 +50,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("z-ai/glm-5.1", ""),
("z-ai/glm-5v-turbo", ""),
("z-ai/glm-5-turbo", ""),
("moonshotai/kimi-k2.5", ""),
("x-ai/grok-4.20", ""),
("nvidia/nemotron-3-super-120b-a12b", ""),
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
@ -72,7 +75,9 @@ def _codex_curated_models() -> list[str]:
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"moonshotai/kimi-k2.5",
"xiaomi/mimo-v2-pro",
"anthropic/claude-opus-4.7",
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5",
@ -92,7 +97,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"z-ai/glm-5.1",
"z-ai/glm-5v-turbo",
"z-ai/glm-5-turbo",
"moonshotai/kimi-k2.5",
"x-ai/grok-4.20-beta",
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-super-120b-a12b:free",
@ -131,7 +135,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gemini-2.5-flash-lite",
# Gemma open models (also served via AI Studio)
"gemma-4-31b-it",
"gemma-4-26b-it",
],
"google-gemini-cli": [
"gemini-2.5-pro",
"gemini-2.5-flash",
"gemini-2.5-flash-lite",
],
"zai": [
"glm-5.1",
@ -143,21 +151,26 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"glm-4.5-flash",
],
"xai": [
"grok-4.20-0309-reasoning",
"grok-4.20-0309-non-reasoning",
"grok-4.20-multi-agent-0309",
"grok-4.20-reasoning",
"grok-4-1-fast-reasoning",
"grok-4-1-fast-non-reasoning",
"grok-4-fast-reasoning",
"grok-4-fast-non-reasoning",
"grok-4-0709",
"grok-code-fast-1",
"grok-3",
"grok-3-mini",
],
"nvidia": [
# NVIDIA flagship reasoning models
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-nano-30b-a3b",
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
# Third-party agentic models hosted on build.nvidia.com
# (map to OpenRouter defaults — users get familiar picks on NIM)
"qwen/qwen3.5-397b-a17b",
"deepseek-ai/deepseek-v3.2",
"moonshotai/kimi-k2.5",
"minimaxai/minimax-m2.5",
"z-ai/glm5",
"openai/gpt-oss-120b",
],
"kimi-coding": [
"kimi-for-coding",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
"kimi-k2-turbo-preview",
@ -188,6 +201,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"MiniMax-M2",
],
"anthropic": [
"claude-opus-4-7",
"claude-opus-4-6",
"claude-sonnet-4-6",
"claude-opus-4-5-20251101",
@ -211,6 +225,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"trinity-mini",
],
"opencode-zen": [
"kimi-k2.5",
"gpt-5.4-pro",
"gpt-5.4",
"gpt-5.3-codex",
@ -242,15 +257,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"glm-5",
"glm-4.7",
"glm-4.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2",
"qwen3-coder",
"big-pickle",
],
"opencode-go": [
"glm-5",
"kimi-k2.5",
"glm-5.1",
"glm-5",
"mimo-v2-pro",
"mimo-v2-omni",
"minimax-m2.7",
@ -283,26 +298,42 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
"alibaba": [
"kimi-k2.5",
"qwen3.5-plus",
"qwen3-coder-plus",
"qwen3-coder-next",
# Third-party models available on coding-intl
"glm-5",
"glm-4.7",
"kimi-k2.5",
"MiniMax-M2.5",
],
# Curated HF model list — only agentic models that map to OpenRouter defaults.
"huggingface": [
"moonshotai/Kimi-K2.5",
"Qwen/Qwen3.5-397B-A17B",
"Qwen/Qwen3.5-35B-A3B",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"MiniMaxAI/MiniMax-M2.5",
"zai-org/GLM-5",
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
# prefers live discovery via ListFoundationModels + ListInferenceProfiles.
# Use inference profile IDs (us.*) since most models require them.
"bedrock": [
"us.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-opus-4-6-v1",
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
"us.anthropic.claude-sonnet-4-5-20250929-v1:0",
"us.amazon.nova-pro-v1:0",
"us.amazon.nova-lite-v1:0",
"us.amazon.nova-micro-v1:0",
"deepseek.v3.2",
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
}
# ---------------------------------------------------------------------------
@ -518,30 +549,35 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"),
ProviderEntry("gemini", "Google AI Studio", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
ProviderEntry("google-gemini-cli", "Google Gemini (OAuth)", "Google Gemini via OAuth + Code Assist (free tier supported; no API key needed)"),
ProviderEntry("deepseek", "DeepSeek", "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
ProviderEntry("xai", "xAI", "xAI (Grok models — direct API)"),
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
ProviderEntry("kimi-coding", "Kimi / Moonshot", "Kimi / Moonshot (Moonshot AI direct API)"),
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
]
# Derived dicts — used throughout the codebase
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
_PROVIDER_ALIASES = {
"glm": "zai",
"z-ai": "zai",
@ -582,14 +618,26 @@ _PROVIDER_ALIASES = {
"qwen": "alibaba",
"alibaba-cloud": "alibaba",
"qwen-portal": "qwen-oauth",
"gemini-cli": "google-gemini-cli",
"gemini-oauth": "google-gemini-cli",
"hf": "huggingface",
"hugging-face": "huggingface",
"huggingface-hub": "huggingface",
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
"grok": "xai",
"x-ai": "xai",
"x.ai": "xai",
"nim": "nvidia",
"nvidia-nim": "nvidia",
"build-nvidia": "nvidia",
"nemotron": "nvidia",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama_cloud": "ollama-cloud",
}
@ -1026,7 +1074,7 @@ def detect_provider_for_model(
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
_AGGREGATORS = {"nous", "openrouter"}
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
# If the model belongs to the current provider's catalog, don't suggest switching
current_models = _PROVIDER_MODELS.get(current_provider, [])
@ -1043,7 +1091,8 @@ def detect_provider_for_model(
break
if direct_match:
# Check if we have credentials for this provider
# Check if we have credentials for this provider — env vars,
# credential pool, or auth store entries.
has_creds = False
try:
from hermes_cli.auth import PROVIDER_REGISTRY
@ -1056,16 +1105,28 @@ def detect_provider_for_model(
break
except Exception:
pass
# Also check credential pool and auth store — covers OAuth,
# Claude Code tokens, and other non-env-var credentials (#10300).
if not has_creds:
try:
from agent.credential_pool import load_pool
pool = load_pool(direct_match)
if pool.has_credentials():
has_creds = True
except Exception:
pass
if not has_creds:
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
has_creds = True
except Exception:
pass
if has_creds:
return (direct_match, name)
# No direct creds — try to find this model on OpenRouter instead
or_slug = _find_openrouter_slug(name)
if or_slug:
return ("openrouter", or_slug)
# Still return the direct provider — credential resolution will
# give a clear error rather than silently using the wrong provider
# Always return the direct provider match. If credentials are
# missing, the client init will give a clear error rather than
# silently routing through the wrong provider (#10300).
return (direct_match, name)
# --- Step 2: check OpenRouter catalog ---
@ -1255,6 +1316,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = _fetch_ai_gateway_models()
if live:
return live
if normalized == "ollama-cloud":
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
if live:
return live
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:
@ -1441,6 +1506,19 @@ _COPILOT_MODEL_ALIASES = {
"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
# Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use
# hyphens (anthropic native format), but Copilot's API only accepts
# dot-notation. Accept both so users who configure copilot + a
# default hyphenated Claude model don't hit HTTP 400
# "model_not_supported". See issue #6879.
"claude-opus-4-6": "claude-opus-4.6",
"claude-sonnet-4-6": "claude-sonnet-4.6",
"claude-sonnet-4-5": "claude-sonnet-4.5",
"claude-haiku-4-5": "claude-haiku-4.5",
"anthropic/claude-opus-4-6": "claude-opus-4.6",
"anthropic/claude-sonnet-4-6": "claude-sonnet-4.6",
"anthropic/claude-sonnet-4-5": "claude-sonnet-4.5",
"anthropic/claude-haiku-4-5": "claude-haiku-4.5",
}
@ -1539,6 +1617,11 @@ def copilot_model_api_mode(
primary signal. Falls back to the catalog's ``supported_endpoints``
only for models not covered by the pattern check.
"""
# Fetch the catalog once so normalize + endpoint check share it
# (avoids two redundant network calls for non-GPT-5 models).
if catalog is None and api_key:
catalog = fetch_github_model_catalog(api_key=api_key)
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
if not normalized:
return "chat_completions"
@ -1548,9 +1631,6 @@ def copilot_model_api_mode(
return "codex_responses"
# Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
if catalog is None and api_key:
catalog = fetch_github_model_catalog(api_key=api_key)
if catalog:
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
if isinstance(catalog_entry, dict):
@ -1765,6 +1845,125 @@ def fetch_api_models(
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
# ---------------------------------------------------------------------------
# Ollama Cloud — merged model discovery with disk cache
# ---------------------------------------------------------------------------
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
def _ollama_cloud_cache_path() -> Path:
"""Return the path for the Ollama Cloud model cache."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "ollama_cloud_models_cache.json"
def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
"""Load cached Ollama Cloud models from disk.
Args:
ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
"""
try:
cache_path = _ollama_cloud_cache_path()
if not cache_path.exists():
return None
with open(cache_path, encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
return None
models = data.get("models")
if not (isinstance(models, list) and models):
return None
if not ignore_ttl:
cached_at = data.get("cached_at", 0)
if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
return None # stale
return data
except Exception:
pass
return None
def _save_ollama_cloud_cache(models: list[str]) -> None:
"""Persist the merged Ollama Cloud model list to disk."""
try:
from utils import atomic_json_write
cache_path = _ollama_cloud_cache_path()
cache_path.parent.mkdir(parents=True, exist_ok=True)
atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
except Exception:
pass
def fetch_ollama_cloud_models(
api_key: Optional[str] = None,
base_url: Optional[str] = None,
*,
force_refresh: bool = False,
) -> list[str]:
"""Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
Resolution order:
1. Disk cache (if fresh, < 1 hour, and not force_refresh)
2. Live ``/v1/models`` endpoint (primary freshest source)
3. models.dev registry (secondary fills gaps for unlisted models)
4. Merge: live models first, then models.dev additions (deduped)
Returns a list of model IDs (never None empty list on total failure).
"""
# 1. Check disk cache
if not force_refresh:
cached = _load_ollama_cloud_cache()
if cached is not None:
return cached["models"]
# 2. Live API probe
if not api_key:
api_key = os.getenv("OLLAMA_API_KEY", "")
if not base_url:
base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
live_models: list[str] = []
if api_key:
result = fetch_api_models(api_key, base_url, timeout=8.0)
if result:
live_models = result
# 3. models.dev registry
mdev_models: list[str] = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models("ollama-cloud")
except Exception:
pass
# 4. Merge: live first, then models.dev additions (deduped, order-preserving)
if live_models or mdev_models:
seen: set[str] = set()
merged: list[str] = []
for m in live_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
for m in mdev_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
if merged:
_save_ollama_cloud_cache(merged)
return merged
# Total failure — return stale cache if available (ignore TTL)
stale = _load_ollama_cloud_cache(ignore_ttl=True)
if stale is not None:
return stale["models"]
return []
def validate_requested_model(
model_name: str,
provider: Optional[str],
@ -1851,8 +2050,8 @@ def validate_requested_model(
)
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": message,
}
@ -1865,8 +2064,8 @@ def validate_requested_model(
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": message,
}
@ -1900,12 +2099,11 @@ def validate_requested_model(
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in the OpenAI Codex model listing. "
f"It may still work if your account has access to it."
f"Model `{requested}` was not found in the OpenAI Codex model listing."
f"{suggestion_text}"
),
}
@ -1944,23 +2142,58 @@ def validate_requested_model(
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"Model `{requested}` was not found in this provider's model listing."
f"{suggestion_text}"
),
}
# api_models is None — couldn't reach API. Accept and persist,
# but warn so typos don't silently break things.
# Bedrock: use our own discovery instead of HTTP /models endpoint.
# Bedrock's bedrock-runtime URL doesn't support /models — it uses the
# AWS SDK control plane (ListFoundationModels + ListInferenceProfiles).
if normalized == "bedrock":
try:
from agent.bedrock_adapter import discover_bedrock_models, resolve_bedrock_region
region = resolve_bedrock_region()
discovered = discover_bedrock_models(region)
discovered_ids = {m["id"] for m in discovered}
if requested in discovered_ids:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
# Not in discovered list — still accept (user may have custom
# inference profiles or cross-account access), but warn.
suggestions = get_close_matches(requested, list(discovered_ids), n=3, cutoff=0.4)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in this provider's model listing. "
f"It may still work if your plan supports it."
f"Note: `{requested}` was not found in Bedrock model discovery for {region}. "
f"It may still work with custom inference profiles or cross-account access."
f"{suggestion_text}"
),
}
except Exception:
pass # Fall through to generic warning
# api_models is None — couldn't reach API. Accept and persist,
# but warn so typos don't silently break things.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"Could not reach the {provider_label} API to validate `{requested}`. "

View file

@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
"openai": "OpenAI TTS",
"elevenlabs": "ElevenLabs",
"edge": "Edge TTS",
"xai": "xAI TTS",
"mistral": "Mistral Voxtral TTS",
"neutts": "NeuTTS",
}
@ -257,6 +258,15 @@ def get_nous_subscription_features(
terminal_cfg.get("modal_mode")
)
# use_gateway flags — when True, the user explicitly opted into the
# Tool Gateway via `hermes model`, so direct credentials should NOT
# prevent gateway routing.
web_use_gateway = bool(web_cfg.get("use_gateway"))
tts_use_gateway = bool(tts_cfg.get("use_gateway"))
browser_use_gateway = bool(browser_cfg.get("use_gateway"))
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
image_use_gateway = bool(image_gen_cfg.get("use_gateway"))
direct_exa = bool(get_env_value("EXA_API_KEY"))
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
@ -269,6 +279,21 @@ def get_nous_subscription_features(
direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
direct_modal = has_direct_modal_credentials()
# When use_gateway is set, suppress direct credentials for managed detection
if web_use_gateway:
direct_firecrawl = False
direct_exa = False
direct_parallel = False
direct_tavily = False
if image_use_gateway:
direct_fal = False
if tts_use_gateway:
direct_openai_tts = False
direct_elevenlabs = False
if browser_use_gateway:
direct_browser_use = False
direct_browserbase = False
managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
@ -439,37 +464,7 @@ def get_nous_subscription_features(
)
def get_nous_subscription_explainer_lines() -> list[str]:
if not managed_nous_tools_enabled():
return []
return [
"Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
"Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
"Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
]
def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
"""Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
if not managed_nous_tools_enabled():
return set()
features = get_nous_subscription_features(config)
if not features.provider_is_nous:
return set()
tts_cfg = config.get("tts")
if not isinstance(tts_cfg, dict):
tts_cfg = {}
config["tts"] = tts_cfg
current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
if current_tts not in {"", "edge"}:
return set()
tts_cfg["provider"] = "openai"
return {"tts"}
def apply_nous_managed_defaults(
@ -529,3 +524,255 @@ def apply_nous_managed_defaults(
changed.add("image_gen")
return changed
# ---------------------------------------------------------------------------
# Tool Gateway offer — single Y/n prompt after model selection
# ---------------------------------------------------------------------------
_GATEWAY_TOOL_LABELS = {
"web": "Web search & extract (Firecrawl)",
"image_gen": "Image generation (FAL)",
"tts": "Text-to-speech (OpenAI TTS)",
"browser": "Browser automation (Browser Use)",
}
def _get_gateway_direct_credentials() -> Dict[str, bool]:
"""Return a dict of tool_key -> has_direct_credentials."""
return {
"web": bool(
get_env_value("FIRECRAWL_API_KEY")
or get_env_value("FIRECRAWL_API_URL")
or get_env_value("PARALLEL_API_KEY")
or get_env_value("TAVILY_API_KEY")
or get_env_value("EXA_API_KEY")
),
"image_gen": bool(get_env_value("FAL_KEY")),
"tts": bool(
resolve_openai_audio_api_key()
or get_env_value("ELEVENLABS_API_KEY")
),
"browser": bool(
get_env_value("BROWSER_USE_API_KEY")
or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
),
}
_GATEWAY_DIRECT_LABELS = {
"web": "Firecrawl/Exa/Parallel/Tavily key",
"image_gen": "FAL key",
"tts": "OpenAI/ElevenLabs key",
"browser": "Browser Use/Browserbase key",
}
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
def get_gateway_eligible_tools(
config: Optional[Dict[str, object]] = None,
) -> tuple[list[str], list[str], list[str]]:
"""Return (unconfigured, has_direct, already_managed) tool key lists.
- unconfigured: tools with no direct credentials (easy switch)
- has_direct: tools where the user has their own API keys
- already_managed: tools already routed through the gateway
All lists are empty when the user is not a paid Nous subscriber or
is not using Nous as their provider.
"""
if not managed_nous_tools_enabled():
return [], [], []
if config is None:
from hermes_cli.config import load_config
config = load_config() or {}
# Quick provider check without the heavy get_nous_subscription_features call
model_cfg = config.get("model")
if not isinstance(model_cfg, dict) or str(model_cfg.get("provider") or "").strip().lower() != "nous":
return [], [], []
direct = _get_gateway_direct_credentials()
# Check which tools the user has explicitly opted into the gateway for.
# This is distinct from managed_by_nous which fires implicitly when
# no direct keys exist — we only skip the prompt for tools where
# use_gateway was explicitly set.
opted_in = {
"web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
"image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
"tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
"browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
}
unconfigured: list[str] = []
has_direct: list[str] = []
already_managed: list[str] = []
for key in _ALL_GATEWAY_KEYS:
if opted_in.get(key):
already_managed.append(key)
elif direct.get(key):
has_direct.append(key)
else:
unconfigured.append(key)
return unconfigured, has_direct, already_managed
def apply_gateway_defaults(
config: Dict[str, object],
tool_keys: list[str],
) -> set[str]:
"""Apply Tool Gateway config for the given tool keys.
Sets ``use_gateway: true`` in each tool's config section so the
runtime prefers the gateway even when direct API keys are present.
Returns the set of tools that were actually changed.
"""
changed: set[str] = set()
web_cfg = config.get("web")
if not isinstance(web_cfg, dict):
web_cfg = {}
config["web"] = web_cfg
tts_cfg = config.get("tts")
if not isinstance(tts_cfg, dict):
tts_cfg = {}
config["tts"] = tts_cfg
browser_cfg = config.get("browser")
if not isinstance(browser_cfg, dict):
browser_cfg = {}
config["browser"] = browser_cfg
if "web" in tool_keys:
web_cfg["backend"] = "firecrawl"
web_cfg["use_gateway"] = True
changed.add("web")
if "tts" in tool_keys:
tts_cfg["provider"] = "openai"
tts_cfg["use_gateway"] = True
changed.add("tts")
if "browser" in tool_keys:
browser_cfg["cloud_provider"] = "browser-use"
browser_cfg["use_gateway"] = True
changed.add("browser")
if "image_gen" in tool_keys:
image_cfg = config.get("image_gen")
if not isinstance(image_cfg, dict):
image_cfg = {}
config["image_gen"] = image_cfg
image_cfg["use_gateway"] = True
changed.add("image_gen")
return changed
def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
"""If eligible tools exist, prompt the user to enable the Tool Gateway.
Uses prompt_choice() with a description parameter so the curses TUI
shows the tool context alongside the choices.
Returns the set of tools that were enabled, or empty set if the user
declined or no tools were eligible.
"""
unconfigured, has_direct, already_managed = get_gateway_eligible_tools(config)
if not unconfigured and not has_direct:
return set()
try:
from hermes_cli.setup import prompt_choice
except Exception:
return set()
# Build description lines showing full status of all gateway tools
desc_parts: list[str] = [
"",
" The Tool Gateway gives you access to web search, image generation,",
" text-to-speech, and browser automation through your Nous subscription.",
" No need to sign up for separate API keys — just pick the tools you want.",
"",
]
if already_managed:
for k in already_managed:
desc_parts.append(f"{_GATEWAY_TOOL_LABELS[k]} — using Tool Gateway")
if unconfigured:
for k in unconfigured:
desc_parts.append(f"{_GATEWAY_TOOL_LABELS[k]} — not configured")
if has_direct:
for k in has_direct:
desc_parts.append(f"{_GATEWAY_TOOL_LABELS[k]} — using {_GATEWAY_DIRECT_LABELS[k]}")
# Build short choice labels — detail is in the description above
choices: list[str] = []
choice_keys: list[str] = [] # maps choice index -> action
if unconfigured and has_direct:
choices.append("Enable for all tools (existing keys kept, not used)")
choice_keys.append("all")
choices.append("Enable only for tools without existing keys")
choice_keys.append("unconfigured")
choices.append("Skip")
choice_keys.append("skip")
elif unconfigured:
choices.append("Enable Tool Gateway")
choice_keys.append("unconfigured")
choices.append("Skip")
choice_keys.append("skip")
else:
choices.append("Enable Tool Gateway (existing keys kept, not used)")
choice_keys.append("all")
choices.append("Skip")
choice_keys.append("skip")
description = "\n".join(desc_parts) if desc_parts else None
# Default to "Enable" when user has no direct keys (new user),
# default to "Skip" when they have existing keys to preserve.
default_idx = 0 if not has_direct else len(choices) - 1
try:
idx = prompt_choice(
"Your Nous subscription includes the Tool Gateway.",
choices,
default_idx,
description=description,
)
except (KeyboardInterrupt, EOFError, OSError, SystemExit):
return set()
action = choice_keys[idx]
if action == "skip":
return set()
if action == "all":
# Apply to switchable tools + ensure already-managed tools also
# have use_gateway persisted in config for consistency.
to_apply = list(_ALL_GATEWAY_KEYS)
else:
to_apply = unconfigured
changed = apply_gateway_defaults(config, to_apply)
if changed:
from hermes_cli.config import save_config
save_config(config)
# Only report the tools that actually switched (not already-managed ones)
newly_switched = changed - set(already_managed)
for key in sorted(newly_switched):
label = _GATEWAY_TOOL_LABELS.get(key, key)
print(f"{label}: enabled via Nous subscription")
if already_managed and not newly_switched:
print(" (all tools already using Tool Gateway)")
return changed

View file

@ -112,6 +112,7 @@ class LoadedPlugin:
module: Optional[types.ModuleType] = None
tools_registered: List[str] = field(default_factory=list)
hooks_registered: List[str] = field(default_factory=list)
commands_registered: List[str] = field(default_factory=list)
enabled: bool = False
error: Optional[str] = None
@ -211,6 +212,84 @@ class PluginContext:
}
logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)
# -- slash command registration -------------------------------------------
def register_command(
self,
name: str,
handler: Callable,
description: str = "",
) -> None:
"""Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
The handler signature is ``fn(raw_args: str) -> str | None``.
It may also be an async callable the gateway dispatch handles both.
Unlike ``register_cli_command()`` (which creates ``hermes <subcommand>``
terminal commands), this registers in-session slash commands that users
invoke during a conversation.
Names conflicting with built-in commands are rejected with a warning.
"""
clean = name.lower().strip().lstrip("/").replace(" ", "-")
if not clean:
logger.warning(
"Plugin '%s' tried to register a command with an empty name.",
self.manifest.name,
)
return
# Reject if it conflicts with a built-in command
try:
from hermes_cli.commands import resolve_command
if resolve_command(clean) is not None:
logger.warning(
"Plugin '%s' tried to register command '/%s' which conflicts "
"with a built-in command. Skipping.",
self.manifest.name, clean,
)
return
except Exception:
pass # If commands module isn't available, skip the check
self._manager._plugin_commands[clean] = {
"handler": handler,
"description": description or "Plugin command",
"plugin": self.manifest.name,
}
logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
# -- tool dispatch -------------------------------------------------------
def dispatch_tool(self, tool_name: str, args: dict, **kwargs) -> str:
"""Dispatch a tool call through the registry, with parent agent context.
This is the public interface for plugin slash commands that need to call
tools like ``delegate_task`` without reaching into framework internals.
The parent agent (if available) is resolved automatically plugins never
need to access the agent directly.
Args:
tool_name: Registry name of the tool (e.g. ``"delegate_task"``).
args: Tool arguments dict (same as what the model would pass).
**kwargs: Extra keyword args forwarded to the registry dispatch.
Returns:
JSON string from the tool handler (same format as model tool calls).
"""
from tools.registry import registry
# Wire up parent agent context when available (CLI mode).
# In gateway mode _cli_ref is None — tools degrade gracefully
# (workspace hints fall back to TERMINAL_CWD, no spinner).
if "parent_agent" not in kwargs:
cli = self._manager._cli_ref
agent = getattr(cli, "agent", None) if cli else None
if agent is not None:
kwargs["parent_agent"] = agent
return registry.dispatch(tool_name, args, **kwargs)
# -- context engine registration -----------------------------------------
def register_context_engine(self, engine) -> None:
@ -323,6 +402,7 @@ class PluginManager:
self._plugin_tool_names: Set[str] = set()
self._cli_commands: Dict[str, dict] = {}
self._context_engine = None # Set by a plugin via register_context_engine()
self._plugin_commands: Dict[str, dict] = {} # Slash commands registered by plugins
self._discovered: bool = False
self._cli_ref = None # Set by CLI after plugin discovery
# Plugin skill registry: qualified name → metadata dict.
@ -485,6 +565,10 @@ class PluginManager:
for h in p.hooks_registered
}
)
loaded.commands_registered = [
c for c in self._plugin_commands
if self._plugin_commands[c].get("plugin") == manifest.name
]
loaded.enabled = True
except Exception as exc:
@ -598,6 +682,7 @@ class PluginManager:
"enabled": loaded.enabled,
"tools": len(loaded.tools_registered),
"hooks": len(loaded.hooks_registered),
"commands": len(loaded.commands_registered),
"error": loaded.error,
}
)
@ -699,6 +784,20 @@ def get_plugin_context_engine():
return get_plugin_manager()._context_engine
def get_plugin_command_handler(name: str) -> Optional[Callable]:
"""Return the handler for a plugin-registered slash command, or ``None``."""
entry = get_plugin_manager()._plugin_commands.get(name)
return entry["handler"] if entry else None
def get_plugin_commands() -> Dict[str, dict]:
"""Return the full plugin commands dict (name → {handler, description, plugin}).
Safe to call before discovery returns an empty dict if no plugins loaded.
"""
return get_plugin_manager()._plugin_commands
def get_plugin_toolsets() -> List[tuple]:
"""Return plugin toolsets as ``(key, label, description)`` tuples.

View file

@ -300,19 +300,10 @@ def _read_config_model(profile_dir: Path) -> tuple:
def _check_gateway_running(profile_dir: Path) -> bool:
"""Check if a gateway is running for a given profile directory."""
pid_file = profile_dir / "gateway.pid"
if not pid_file.exists():
return False
try:
raw = pid_file.read_text().strip()
if not raw:
return False
data = json.loads(raw) if raw.startswith("{") else {"pid": int(raw)}
pid = int(data["pid"])
os.kill(pid, 0) # existence check
return True
except (json.JSONDecodeError, KeyError, ValueError, TypeError,
ProcessLookupError, PermissionError, OSError):
from gateway.status import get_running_pid
return get_running_pid(profile_dir / "gateway.pid", cleanup_stale=False) is not None
except Exception:
return False

View file

@ -64,6 +64,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://portal.qwen.ai/v1",
base_url_env_var="HERMES_QWEN_BASE_URL",
),
"google-gemini-cli": HermesOverlay(
transport="openai_chat",
auth_type="oauth_external",
base_url_override="cloudcode-pa://google",
),
"copilot-acp": HermesOverlay(
transport="codex_responses",
auth_type="external_process",
@ -128,10 +133,15 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_env_var="HF_BASE_URL",
),
"xai": HermesOverlay(
transport="openai_chat",
transport="codex_responses",
base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL",
),
"nvidia": HermesOverlay(
transport="openai_chat",
base_url_override="https://integrate.api.nvidia.com/v1",
base_url_env_var="NVIDIA_BASE_URL",
),
"xiaomi": HermesOverlay(
transport="openai_chat",
base_url_env_var="XIAOMI_BASE_URL",
@ -141,6 +151,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.arcee.ai/api/v1",
base_url_env_var="ARCEE_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
),
}
@ -180,6 +194,13 @@ ALIASES: Dict[str, str] = {
# xai
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
# nvidia
"nim": "nvidia",
"nvidia-nim": "nvidia",
"build-nvidia": "nvidia",
"nemotron": "nvidia",
# kimi-for-coding (models.dev ID)
"kimi": "kimi-for-coding",
@ -227,6 +248,11 @@ ALIASES: Dict[str, str] = {
"qwen": "alibaba",
"alibaba-cloud": "alibaba",
# google-gemini-cli (OAuth + Code Assist)
"gemini-cli": "google-gemini-cli",
"gemini-oauth": "google-gemini-cli",
# huggingface
"hf": "huggingface",
"hugging-face": "huggingface",
@ -236,6 +262,12 @@ ALIASES: Dict[str, str] = {
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
# bedrock
"aws": "bedrock",
"aws-bedrock": "bedrock",
"amazon-bedrock": "bedrock",
"amazon": "bedrock",
# arcee
"arcee-ai": "arcee",
"arceeai": "arcee",
@ -244,7 +276,7 @@ ALIASES: Dict[str, str] = {
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
"lm_studio": "lmstudio",
"ollama": "ollama-cloud",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"vllm": "local",
"llamacpp": "local",
"llama.cpp": "local",
@ -262,6 +294,8 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP",
"xiaomi": "Xiaomi MiMo",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",
}
@ -271,6 +305,7 @@ TRANSPORT_TO_API_MODE: Dict[str, str] = {
"openai_chat": "chat_completions",
"anthropic_messages": "anthropic_messages",
"codex_responses": "codex_responses",
"bedrock_converse": "bedrock_converse",
}
@ -388,6 +423,10 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
if pdef is not None:
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
# Direct provider checks for providers not in HERMES_OVERLAYS
if provider == "bedrock":
return "bedrock_converse"
# URL-based heuristics for custom / unknown providers
if base_url:
url_lower = base_url.rstrip("/").lower()
@ -395,6 +434,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
return "anthropic_messages"
if "api.openai.com" in url_lower:
return "codex_responses"
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse"
return "chat_completions"

View file

@ -22,6 +22,7 @@ from hermes_cli.auth import (
resolve_nous_runtime_credentials,
resolve_codex_runtime_credentials,
resolve_qwen_runtime_credentials,
resolve_gemini_oauth_runtime_credentials,
resolve_api_key_provider_credentials,
resolve_external_process_provider_credentials,
has_usable_secret,
@ -41,6 +42,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
tool calls with reasoning (chat/completions returns 400).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
if "api.x.ai" in normalized:
return "codex_responses"
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
return None
@ -124,7 +127,7 @@ def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
return "chat_completions"
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages"}
_VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
def _parse_api_mode(raw: Any) -> Optional[str]:
@ -154,6 +157,9 @@ def _resolve_runtime_from_pool_entry(
elif provider == "qwen-oauth":
api_mode = "chat_completions"
base_url = base_url or DEFAULT_QWEN_BASE_URL
elif provider == "google-gemini-cli":
api_mode = "chat_completions"
base_url = base_url or "cloudcode-pa://google"
elif provider == "anthropic":
api_mode = "anthropic_messages"
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
@ -163,10 +169,13 @@ def _resolve_runtime_from_pool_entry(
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
elif provider == "openrouter":
base_url = base_url or OPENROUTER_BASE_URL
elif provider == "xai":
api_mode = "codex_responses"
elif provider == "nous":
api_mode = "chat_completions"
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
@ -627,6 +636,8 @@ def _resolve_explicit_runtime(
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
elif provider == "xai":
api_mode = "codex_responses"
else:
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
@ -797,6 +808,26 @@ def resolve_runtime_provider(
logger.info("Qwen OAuth credentials failed; "
"falling through to next provider.")
if provider == "google-gemini-cli":
try:
creds = resolve_gemini_oauth_runtime_credentials()
return {
"provider": "google-gemini-cli",
"api_mode": "chat_completions",
"base_url": creds.get("base_url", ""),
"api_key": creds.get("api_key", ""),
"source": creds.get("source", "google-oauth"),
"expires_at_ms": creds.get("expires_at_ms"),
"email": creds.get("email", ""),
"project_id": creds.get("project_id", ""),
"requested_provider": requested_provider,
}
except AuthError:
if requested_provider != "auto":
raise
logger.info("Google Gemini OAuth credentials failed; "
"falling through to next provider.")
if provider == "copilot-acp":
creds = resolve_external_process_provider_credentials(provider)
return {
@ -836,6 +867,77 @@ def resolve_runtime_provider(
"requested_provider": requested_provider,
}
# AWS Bedrock (native Converse API via boto3)
if provider == "bedrock":
from agent.bedrock_adapter import (
has_aws_credentials,
resolve_aws_auth_env_var,
resolve_bedrock_region,
is_anthropic_bedrock_model,
)
# When the user explicitly selected bedrock (not auto-detected),
# trust boto3's credential chain — it handles IMDS, ECS task roles,
# Lambda execution roles, SSO, and other implicit sources that our
# env-var check can't detect.
is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
if not is_explicit and not has_aws_credentials():
raise AuthError(
"No AWS credentials found for Bedrock. Configure one of:\n"
" - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
" - AWS_PROFILE (for SSO / named profiles)\n"
" - IAM instance role (EC2, ECS, Lambda)\n"
"Or run 'aws configure' to set up credentials.",
code="no_aws_credentials",
)
# Read bedrock-specific config from config.yaml
from hermes_cli.config import load_config as _load_bedrock_config
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
# Region priority: config.yaml bedrock.region → env var → us-east-1
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
# Build guardrail config if configured
_gr = _bedrock_cfg.get("guardrail", {})
guardrail_config = None
if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
guardrail_config = {
"guardrailIdentifier": _gr["guardrail_identifier"],
"guardrailVersion": _gr["guardrail_version"],
}
if _gr.get("stream_processing_mode"):
guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
if _gr.get("trace"):
guardrail_config["trace"] = _gr["trace"]
# Dual-path routing: Claude models use AnthropicBedrock SDK for full
# feature parity (prompt caching, thinking budgets, adaptive thinking).
# Non-Claude models use the Converse API for multi-model support.
_current_model = str(model_cfg.get("default") or "").strip()
if is_anthropic_bedrock_model(_current_model):
# Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
runtime = {
"provider": "bedrock",
"api_mode": "anthropic_messages",
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
"api_key": "aws-sdk",
"source": auth_source,
"region": region,
"bedrock_anthropic": True, # Signal to use AnthropicBedrock client
"requested_provider": requested_provider,
}
else:
# Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
runtime = {
"provider": "bedrock",
"api_mode": "bedrock_converse",
"base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
"api_key": "aws-sdk",
"source": auth_source,
"region": region,
"requested_provider": requested_provider,
}
if guardrail_config:
runtime["guardrail_config"] = guardrail_config
return runtime
# API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig and pconfig.auth_type == "api_key":
@ -852,6 +954,8 @@ def resolve_runtime_provider(
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
elif provider == "xai":
api_mode = "codex_responses"
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Only honor persisted api_mode when it belongs to the same provider family.

View file

@ -20,10 +20,7 @@ import copy
from pathlib import Path
from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import (
apply_nous_provider_defaults,
get_nous_subscription_features,
)
from hermes_cli.nous_subscription import get_nous_subscription_features
from tools.tool_backend_helpers import managed_nous_tools_enabled
from hermes_constants import get_optional_skills_dir
@ -94,7 +91,7 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini": [
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
"gemma-4-31b-it", "gemma-4-26b-it",
"gemma-4-31b-it",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
@ -105,7 +102,7 @@ _DEFAULT_PROVIDER_MODELS = {
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@ -213,20 +210,20 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
sys.exit(1)
def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int:
def _curses_prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
"""Single-select menu using curses. Delegates to curses_radiolist."""
from hermes_cli.curses_ui import curses_radiolist
return curses_radiolist(question, choices, selected=default, cancel_returns=-1)
return curses_radiolist(question, choices, selected=default, cancel_returns=-1, description=description)
def prompt_choice(question: str, choices: list, default: int = 0) -> int:
def prompt_choice(question: str, choices: list, default: int = 0, description: str | None = None) -> int:
"""Prompt for a choice from a list with arrow key navigation.
Escape keeps the current default (skips the question).
Ctrl+C exits the wizard.
"""
idx = _curses_prompt_choice(question, choices, default)
idx = _curses_prompt_choice(question, choices, default, description=description)
if idx >= 0:
if idx == default:
print_info(" Skipped (keeping current)")
@ -433,6 +430,8 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (MiniMax)", True, None))
elif tts_provider == "mistral" and get_env_value("MISTRAL_API_KEY"):
tool_status.append(("Text-to-Speech (Mistral Voxtral)", True, None))
elif tts_provider == "gemini" and (get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")):
tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
elif tts_provider == "neutts":
try:
import importlib.util
@ -835,14 +834,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")
if selected_provider == "nous" and nous_subscription_selected:
changed_defaults = apply_nous_provider_defaults(config)
current_tts = str(config.get("tts", {}).get("provider") or "edge")
if "tts" in changed_defaults:
print_success("TTS provider set to: OpenAI TTS via your Nous subscription")
else:
print_info(f"Keeping your existing TTS provider: {current_tts}")
# Tool Gateway prompt is already shown by _model_flow_nous() above.
save_config(config)
if not quick and selected_provider != "nous":
@ -920,8 +912,10 @@ def _setup_tts_provider(config: dict):
"edge": "Edge TTS",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI TTS",
"xai": "xAI TTS",
"minimax": "MiniMax TTS",
"mistral": "Mistral Voxtral TTS",
"gemini": "Google Gemini TTS",
"neutts": "NeuTTS",
}
current_label = provider_labels.get(current_provider, current_provider)
@ -941,12 +935,14 @@ def _setup_tts_provider(config: dict):
"Edge TTS (free, cloud-based, no setup needed)",
"ElevenLabs (premium quality, needs API key)",
"OpenAI TTS (good quality, needs API key)",
"xAI TTS (Grok voices, needs API key)",
"MiniMax TTS (high quality with voice cloning, needs API key)",
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@ -1012,6 +1008,23 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "xai":
existing = get_env_value("XAI_API_KEY")
if not existing:
print()
api_key = prompt("xAI API key for TTS", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
print_success("xAI TTS API key saved")
else:
from hermes_constants import display_hermes_home as _dhh
print_warning(
"No xAI API key provided for TTS. Configure XAI_API_KEY via "
f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
"Falling back to Edge TTS."
)
selected = "edge"
elif selected == "minimax":
existing = get_env_value("MINIMAX_API_KEY")
if not existing:
@ -1036,6 +1049,19 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "gemini":
existing = get_env_value("GEMINI_API_KEY") or get_env_value("GOOGLE_API_KEY")
if not existing:
print()
print_info("Get a free API key at https://aistudio.google.com/app/apikey")
api_key = prompt("Gemini API key for TTS", password=True)
if api_key:
save_env_value("GEMINI_API_KEY", api_key)
print_success("Gemini TTS API key saved")
else:
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
# Save the selection
if "tts" not in config:
config["tts"] = {}
@ -1611,9 +1637,19 @@ def _setup_telegram():
return
print_info("Create a bot via @BotFather on Telegram")
token = prompt("Telegram bot token", password=True)
if not token:
return
import re
while True:
token = prompt("Telegram bot token", password=True)
if not token:
return
if not re.match(r"^\d+:[A-Za-z0-9_-]{30,}$", token):
print_error(
"Invalid token format. Expected: <numeric_id>:<alphanumeric_hash> "
"(e.g., 123456789:ABCdefGHI-jklMNOpqrSTUvwxYZ)"
)
continue
break
save_env_value("TELEGRAM_BOT_TOKEN", token)
print_success("Telegram token saved")
@ -1969,52 +2005,6 @@ def _setup_wecom_callback():
_gw_setup()
def _setup_qqbot():
"""Configure QQ Bot gateway."""
print_header("QQ Bot")
existing = get_env_value("QQ_APP_ID")
if existing:
print_info("QQ Bot: already configured")
if not prompt_yes_no("Reconfigure QQ Bot?", False):
return
print_info("Connects Hermes to QQ via the Official QQ Bot API (v2).")
print_info(" Requires a QQ Bot application at q.qq.com")
print_info(" Reference: https://bot.q.qq.com/wiki/develop/api-v2/")
print()
app_id = prompt("QQ Bot App ID")
if not app_id:
print_warning("App ID is required — skipping QQ Bot setup")
return
save_env_value("QQ_APP_ID", app_id.strip())
client_secret = prompt("QQ Bot App Secret", password=True)
if not client_secret:
print_warning("App Secret is required — skipping QQ Bot setup")
return
save_env_value("QQ_CLIENT_SECRET", client_secret)
print_success("QQ Bot credentials saved")
print()
print_info("🔒 Security: Restrict who can DM your bot")
print_info(" Use QQ user OpenIDs (found in event payloads)")
print()
allowed_users = prompt("Allowed user OpenIDs (comma-separated, leave empty for open access)")
if allowed_users:
save_env_value("QQ_ALLOWED_USERS", allowed_users.replace(" ", ""))
print_success("QQ Bot allowlist configured")
else:
print_info("⚠️ No allowlist set — anyone can DM the bot!")
print()
print_info("📬 Home Channel: OpenID for cron job delivery and notifications.")
home_channel = prompt("Home channel OpenID (leave empty to set later)")
if home_channel:
save_env_value("QQ_HOME_CHANNEL", home_channel)
print()
print_success("QQ Bot configured!")
def _setup_bluebubbles():
@ -2083,12 +2073,9 @@ def _setup_bluebubbles():
def _setup_qqbot():
"""Configure QQ Bot (Official API v2) via standard platform setup."""
from hermes_cli.gateway import _PLATFORMS
qq_platform = next((p for p in _PLATFORMS if p["key"] == "qqbot"), None)
if qq_platform:
from hermes_cli.gateway import _setup_standard_platform
_setup_standard_platform(qq_platform)
"""Configure QQ Bot (Official API v2) via gateway setup."""
from hermes_cli.gateway import _setup_qqbot as _gateway_setup_qqbot
_gateway_setup_qqbot()
def _setup_webhooks():
@ -2228,7 +2215,9 @@ def setup_gateway(config: dict):
missing_home.append("Slack")
if get_env_value("BLUEBUBBLES_SERVER_URL") and not get_env_value("BLUEBUBBLES_HOME_CHANNEL"):
missing_home.append("BlueBubbles")
if get_env_value("QQ_APP_ID") and not get_env_value("QQ_HOME_CHANNEL"):
if get_env_value("QQ_APP_ID") and not (
get_env_value("QQBOT_HOME_CHANNEL") or get_env_value("QQ_HOME_CHANNEL")
):
missing_home.append("QQBot")
if missing_home:
@ -2253,8 +2242,10 @@ def setup_gateway(config: dict):
_is_service_running,
supports_systemd_services,
has_conflicting_systemd_units,
has_legacy_hermes_units,
install_linux_gateway_from_setup,
print_systemd_scope_conflict_warning,
print_legacy_unit_warning,
systemd_start,
systemd_restart,
launchd_install,
@ -2272,6 +2263,10 @@ def setup_gateway(config: dict):
print_systemd_scope_conflict_warning()
print()
if supports_systemd and has_legacy_hermes_units():
print_legacy_unit_warning()
print()
if service_running:
if prompt_yes_no(" Restart the gateway to pick up changes?", True):
try:

View file

@ -515,6 +515,90 @@ def do_inspect(identifier: str, console: Optional[Console] = None) -> None:
c.print()
def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> dict:
"""Paginated hub browse for programmatic callers (e.g. TUI gateway).
Returns ``{"items": [...], "page": int, "total_pages": int, "total": int}``.
"""
from tools.skills_hub import GitHubAuth, create_source_router
page_size = max(1, min(page_size, 100))
_TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
_PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50,
"claude-marketplace": 50, "lobehub": 50}
auth = GitHubAuth()
sources = create_source_router(auth)
all_results: list = []
for src in sources:
sid = src.source_id()
if source != "all" and sid != source and sid != "official":
continue
try:
limit = _PER_SOURCE_LIMIT.get(sid, 50)
all_results.extend(src.search("", limit=limit))
except Exception:
continue
if not all_results:
return {"items": [], "page": 1, "total_pages": 1, "total": 0}
seen: dict = {}
for r in all_results:
rank = _TRUST_RANK.get(r.trust_level, 0)
if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
seen[r.name] = r
deduped = list(seen.values())
deduped.sort(key=lambda r: (-_TRUST_RANK.get(r.trust_level, 0), r.source != "official", r.name.lower()))
total = len(deduped)
total_pages = max(1, (total + page_size - 1) // page_size)
page = max(1, min(page, total_pages))
start = (page - 1) * page_size
page_items = deduped[start : min(start + page_size, total)]
return {
"items": [{"name": r.name, "description": r.description, "source": r.source,
"trust": r.trust_level} for r in page_items],
"page": page,
"total_pages": total_pages,
"total": total,
}
def inspect_skill(identifier: str) -> Optional[dict]:
"""Skill metadata (+ SKILL.md preview) for programmatic callers."""
from tools.skills_hub import GitHubAuth, create_source_router
class _Q:
def print(self, *a, **k):
pass
c = _Q()
auth = GitHubAuth()
sources = create_source_router(auth)
ident = identifier
if "/" not in ident:
ident = _resolve_short_name(ident, sources, c)
if not ident:
return None
meta, bundle, _ = _resolve_source_meta_and_bundle(ident, sources)
if not meta:
return None
out: dict = {
"name": meta.name,
"description": meta.description,
"source": meta.source,
"identifier": meta.identifier,
"tags": list(meta.tags) if meta.tags else [],
}
if bundle and "SKILL.md" in bundle.files:
content = bundle.files["SKILL.md"]
if isinstance(content, bytes):
content = content.decode("utf-8", errors="replace")
lines = content.split("\n")
preview = "\n".join(lines[:50])
if len(lines) > 50:
preview += f"\n\n... ({len(lines) - 50} more lines)"
out["skill_md_preview"] = preview
return out
def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
"""List installed skills, distinguishing hub, builtin, and local skills."""
from tools.skills_hub import HubLockFile, ensure_hub_dirs
@ -684,6 +768,51 @@ def do_uninstall(name: str, console: Optional[Console] = None,
c.print(f"[bold red]Error:[/] {msg}\n")
def do_reset(name: str, restore: bool = False,
console: Optional[Console] = None,
skip_confirm: bool = False,
invalidate_cache: bool = True) -> None:
"""Reset a bundled skill's manifest tracking (+ optionally restore from bundled)."""
from tools.skills_sync import reset_bundled_skill
c = console or _console
if not skip_confirm and restore:
c.print(f"\n[bold]Restore '{name}' from bundled source?[/]")
c.print("[dim]This will DELETE your current copy and re-copy the bundled version.[/]")
try:
answer = input("Confirm [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
answer = "n"
if answer not in ("y", "yes"):
c.print("[dim]Cancelled.[/]\n")
return
result = reset_bundled_skill(name, restore=restore)
if not result["ok"]:
c.print(f"[bold red]Error:[/] {result['message']}\n")
return
c.print(f"[bold green]{result['message']}[/]")
synced = result.get("synced") or {}
if synced.get("copied"):
c.print(f"[dim]Copied: {', '.join(synced['copied'])}[/]")
if synced.get("updated"):
c.print(f"[dim]Updated: {', '.join(synced['updated'])}[/]")
c.print()
if invalidate_cache:
try:
from agent.prompt_builder import clear_skills_system_prompt_cache
clear_skills_system_prompt_cache(clear_snapshot=True)
except Exception:
pass
else:
c.print("[dim]Change will take effect in your next session.[/]")
c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n")
def do_tap(action: str, repo: str = "", console: Optional[Console] = None) -> None:
"""Manage taps (custom GitHub repo sources)."""
from tools.skills_hub import TapsManager
@ -1007,6 +1136,9 @@ def skills_command(args) -> None:
do_audit(name=getattr(args, "name", None))
elif action == "uninstall":
do_uninstall(args.name)
elif action == "reset":
do_reset(args.name, restore=getattr(args, "restore", False),
skip_confirm=getattr(args, "yes", False))
elif action == "publish":
do_publish(
args.skill_path,
@ -1029,7 +1161,7 @@ def skills_command(args) -> None:
return
do_tap(tap_action, repo=repo)
else:
_console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|publish|snapshot|tap]\n")
_console.print("Usage: hermes skills [browse|search|install|inspect|list|check|update|audit|uninstall|reset|publish|snapshot|tap]\n")
_console.print("Run 'hermes skills <command> --help' for details.\n")
@ -1175,6 +1307,19 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
do_uninstall(args[0], console=c, skip_confirm=skip_confirm,
invalidate_cache=invalidate_cache)
elif action == "reset":
if not args:
c.print("[bold red]Usage:[/] /skills reset <name> [--restore] [--now]\n")
c.print("[dim]Clears the bundled-skills manifest entry so future updates stop marking it as user-modified.[/]")
c.print("[dim]Pass --restore to also replace the current copy with the bundled version.[/]\n")
return
name = args[0]
restore = "--restore" in args
invalidate_cache = "--now" in args
# Slash commands can't prompt — --restore in slash mode is implicit consent.
do_reset(name, restore=restore, console=c, skip_confirm=True,
invalidate_cache=invalidate_cache)
elif action == "publish":
if not args:
c.print("[bold red]Usage:[/] /skills publish <skill-path> [--to github] [--repo owner/repo]\n")
@ -1231,6 +1376,7 @@ def _print_skills_help(console: Console) -> None:
" [cyan]update[/] [name] Update hub skills with upstream changes\n"
" [cyan]audit[/] [name] Re-scan hub skills for security\n"
" [cyan]uninstall[/] <name> Remove a hub-installed skill\n"
" [cyan]reset[/] <name> [--restore] Reset bundled-skill tracking (fix 'user-modified' flag)\n"
" [cyan]publish[/] <path> --repo <r> Publish a skill to GitHub via PR\n"
" [cyan]snapshot[/] export|import Export/import skill configurations\n"
" [cyan]tap[/] list|add|remove Manage skill sources\n",

View file

@ -23,7 +23,7 @@ All fields are optional. Missing values inherit from the ``default`` skin.
banner_dim: "#B8860B" # Dim/muted text (separators, labels)
banner_text: "#FFF8DC" # Body text (tool names, skill names)
ui_accent: "#FFBF00" # General UI accent
ui_label: "#4dd0e1" # UI labels
ui_label: "#DAA520" # UI labels (warm gold; teal clashed w/ default banner gold)
ui_ok: "#4caf50" # Success indicators
ui_error: "#ef5350" # Error indicators
ui_warn: "#ffa726" # Warning indicators
@ -163,7 +163,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"banner_dim": "#B8860B",
"banner_text": "#FFF8DC",
"ui_accent": "#FFBF00",
"ui_label": "#4dd0e1",
"ui_label": "#DAA520",
"ui_ok": "#4caf50",
"ui_error": "#ef5350",
"ui_warn": "#ffa726",
@ -708,7 +708,9 @@ def init_skin_from_config(config: dict) -> None:
Call this once during CLI init with the loaded config dict.
"""
display = config.get("display", {})
display = config.get("display") or {}
if not isinstance(display, dict):
display = {}
skin_name = display.get("skin", "default")
if isinstance(skin_name, str) and skin_name.strip():
set_active_skin(skin_name.strip())

Some files were not shown because too many files have changed in this diff Show more