mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(delegate): resolve merge conflict with upstream thread capture
This commit is contained in:
commit
fda1325e79
522 changed files with 82059 additions and 4541 deletions
3
.github/workflows/deploy-site.yml
vendored
3
.github/workflows/deploy-site.yml
vendored
|
|
@ -53,6 +53,9 @@ jobs:
|
|||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
|
|||
3
.github/workflows/docs-site-checks.yml
vendored
3
.github/workflows/docs-site-checks.yml
vendored
|
|
@ -36,6 +36,9 @@ jobs:
|
|||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
|
|
|||
301
AGENTS.md
301
AGENTS.md
|
|
@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
|||
## Development Environment
|
||||
|
||||
```bash
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
|
||||
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
|
||||
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
|
||||
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
|
||||
│ ├── models.py # Model catalog, provider model lists
|
||||
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
|
||||
│ └── auth.py # Provider credential resolution
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ ├── src/entry.tsx # TTY gate + render()
|
||||
│ ├── src/app.tsx # Main state machine and UI
|
||||
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
|
||||
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
|
||||
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
|
||||
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
|
||||
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
│ ├── entry.py # stdio entrypoint
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
|
|
@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/
|
|||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
|
|
@ -120,10 +113,13 @@ class AIAgent:
|
|||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
|
||||
```python
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
|
|
@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
|
|||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -243,6 +240,19 @@ npm run fmt # prettier
|
|||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
|
@ -280,7 +290,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
|||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -288,9 +298,13 @@ The registry handles schema collection, dispatch, availability checking, and err
|
|||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
|
||||
### .env variables:
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
|
|
@ -302,13 +316,29 @@ The registry handles schema collection, dispatch, availability checking, and err
|
|||
},
|
||||
```
|
||||
|
||||
### Config loaders (two separate systems):
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -401,7 +431,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
|||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
|
||||
Context engines plug into `agent/context_engine.py`; image-gen providers
|
||||
into `agent/image_gen_provider.py`.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
|
|
@ -411,9 +529,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
|||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
|
|
@ -435,7 +554,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
|||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
|
|
@ -492,8 +611,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
|||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
|
|
@ -504,6 +627,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
|||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
|
|
@ -559,7 +706,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
|||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
|||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
|
|
@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
|||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
|
|
@ -597,7 +597,7 @@ refactor/description # Code restructuring
|
|||
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
|
|
|||
16
Dockerfile
16
Dockerfile
|
|
@ -10,9 +10,11 @@ ENV PYTHONUNBUFFERED=1
|
|||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
|
|
@ -41,9 +43,15 @@ COPY --chown=hermes:hermes . .
|
|||
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
# ---------- Python virtualenv ----------
|
||||
RUN chown hermes:hermes /opt/hermes
|
||||
USER hermes
|
||||
RUN uv venv && \
|
||||
uv pip install --no-cache-dir -e ".[all]"
|
||||
|
||||
|
|
@ -52,4 +60,4 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
|||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
|
|
|||
10
README.md
10
README.md
|
|
@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
|||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
|
|
@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
|||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
scripts/run_tests.sh
|
||||
```
|
||||
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
453
RELEASE_v0.11.0.md
Normal file
453
RELEASE_v0.11.0.md
Normal file
|
|
@ -0,0 +1,453 @@
|
|||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
|
|
@ -60,7 +60,7 @@ from acp_adapter.events import (
|
|||
make_tool_progress_cb,
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -287,7 +287,11 @@ class HermesACPAgent(acp.Agent):
|
|||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
|
||||
enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
enabled_toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
|
||||
mcp_server_names=[server.name for server in mcp_servers],
|
||||
)
|
||||
state.agent.enabled_toolsets = enabled_toolsets
|
||||
disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
|
||||
state.agent.tools = get_tool_definitions(
|
||||
enabled_toolsets=enabled_toolsets,
|
||||
|
|
@ -754,7 +758,9 @@ class HermesACPAgent(acp.Agent):
|
|||
def _cmd_tools(self, args: str, state: SessionState) -> str:
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
)
|
||||
tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
|
||||
if not tools:
|
||||
return "No tools available."
|
||||
|
|
|
|||
|
|
@ -106,6 +106,24 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
|
|||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
def _expand_acp_enabled_toolsets(
|
||||
toolsets: List[str] | None = None,
|
||||
mcp_server_names: List[str] | None = None,
|
||||
) -> List[str]:
|
||||
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
|
||||
expanded: List[str] = []
|
||||
for name in list(toolsets or ["hermes-acp"]):
|
||||
if name and name not in expanded:
|
||||
expanded.append(name)
|
||||
|
||||
for server_name in list(mcp_server_names or []):
|
||||
toolset_name = f"mcp-{server_name}"
|
||||
if server_name and toolset_name not in expanded:
|
||||
expanded.append(toolset_name)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def _clear_task_cwd(task_id: str) -> None:
|
||||
"""Remove task-specific cwd overrides for an ACP session."""
|
||||
if not task_id:
|
||||
|
|
@ -537,9 +555,18 @@ class SessionManager:
|
|||
elif isinstance(model_cfg, str) and model_cfg.strip():
|
||||
default_model = model_cfg.strip()
|
||||
|
||||
configured_mcp_servers = [
|
||||
name
|
||||
for name, cfg in (config.get("mcp_servers") or {}).items()
|
||||
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
|
||||
]
|
||||
|
||||
kwargs = {
|
||||
"platform": "acp",
|
||||
"enabled_toolsets": ["hermes-acp"],
|
||||
"enabled_toolsets": _expand_acp_enabled_toolsets(
|
||||
["hermes-acp"],
|
||||
mcp_server_names=configured_mcp_servers,
|
||||
),
|
||||
"quiet_mode": True,
|
||||
"session_id": session_id,
|
||||
"model": model or default_model,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ import copy
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
|
@ -277,8 +279,9 @@ def _is_oauth_token(key: str) -> bool:
|
|||
Positively identifies Anthropic OAuth tokens by their key format:
|
||||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||||
- ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
|
||||
and correctly return False.
|
||||
"""
|
||||
if not key:
|
||||
|
|
@ -292,6 +295,9 @@ def _is_oauth_token(key: str) -> bool:
|
|||
# JWTs from Anthropic OAuth flow
|
||||
if key.startswith("eyJ"):
|
||||
return True
|
||||
# Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
if key.startswith("cc-"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
|
@ -461,8 +467,72 @@ def build_anthropic_bedrock_client(region: str):
|
|||
)
|
||||
|
||||
|
||||
def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
"""Read Claude Code OAuth credentials from the macOS Keychain.
|
||||
|
||||
Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
|
||||
service name "Claude Code-credentials" rather than (or in addition to)
|
||||
the JSON file at ~/.claude/.credentials.json.
|
||||
|
||||
The password field contains a JSON string with the same claudeAiOauth
|
||||
structure as the JSON file.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
try:
|
||||
# Read the "Claude Code-credentials" generic password entry
|
||||
result = subprocess.run(
|
||||
["security", "find-generic-password",
|
||||
"-s", "Claude Code-credentials",
|
||||
"-w"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
logger.debug("Keychain: security command not available or timed out")
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
|
||||
return None
|
||||
|
||||
raw = result.stdout.strip()
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Keychain: credentials payload is not valid JSON")
|
||||
return None
|
||||
|
||||
oauth_data = data.get("claudeAiOauth")
|
||||
if oauth_data and isinstance(oauth_data, dict):
|
||||
access_token = oauth_data.get("accessToken", "")
|
||||
if access_token:
|
||||
return {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "macos_keychain",
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||
"""Read refreshable Claude Code OAuth credentials.
|
||||
|
||||
Checks two sources in order:
|
||||
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
|
||||
2. ~/.claude/.credentials.json file
|
||||
|
||||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||||
|
|
@ -471,6 +541,12 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
|||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
# Try macOS Keychain first (covers Claude Code >=2.1.114)
|
||||
kc_creds = _read_claude_code_credentials_from_keychain()
|
||||
if kc_creds:
|
||||
return kc_creds
|
||||
|
||||
# Fall back to JSON file
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if cred_path.exists():
|
||||
try:
|
||||
|
|
@ -641,7 +717,9 @@ def _write_claude_code_credentials(
|
|||
existing["claudeAiOauth"] = oauth_data
|
||||
|
||||
cred_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
_tmp_cred = cred_path.with_suffix(".tmp")
|
||||
_tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
_tmp_cred.replace(cred_path)
|
||||
# Restrict permissions (credentials file)
|
||||
cred_path.chmod(0o600)
|
||||
except (OSError, IOError) as e:
|
||||
|
|
@ -908,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_bedrock_model_id(model: str) -> bool:
|
||||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||||
|
||||
Bedrock model IDs come in two forms:
|
||||
- Bare: ``anthropic.claude-opus-4-7``
|
||||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||||
|
||||
In both cases the dots separate namespace components, not version
|
||||
numbers, and must be preserved verbatim for the Bedrock API.
|
||||
"""
|
||||
lower = model.lower()
|
||||
# Regional inference-profile prefixes
|
||||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||||
return True
|
||||
# Bare Bedrock model IDs: provider.model-family
|
||||
if lower.startswith("anthropic."):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
"""Normalize a model name for the Anthropic API.
|
||||
|
||||
|
|
@ -915,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
|||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||||
are namespace separators, not version separators.
|
||||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
if not preserve_dots:
|
||||
# Bedrock model IDs use dots as namespace separators
|
||||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
|
|
@ -1598,4 +1704,3 @@ def build_anthropic_kwargs(
|
|||
return kwargs
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -74,6 +74,12 @@ _PROVIDER_ALIASES = {
|
|||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
"claude-code": "anthropic",
|
||||
"github": "copilot",
|
||||
"github-copilot": "copilot",
|
||||
"github-model": "copilot",
|
||||
"github-models": "copilot",
|
||||
"github-copilot-acp": "copilot-acp",
|
||||
"copilot-acp-agent": "copilot-acp",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -89,10 +95,11 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
|
|||
if normalized == "main":
|
||||
# Resolve to the user's actual main provider so named custom providers
|
||||
# and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
|
||||
main_prov = _read_main_provider()
|
||||
main_prov = (_read_main_provider() or "").strip().lower()
|
||||
if main_prov and main_prov not in ("auto", "main", ""):
|
||||
return main_prov
|
||||
return "custom"
|
||||
normalized = main_prov
|
||||
else:
|
||||
return "custom"
|
||||
return _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
|
||||
|
|
@ -151,7 +158,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
|||
# differs from their main chat model, map it here. The vision auto-detect
|
||||
# "exotic provider" branch checks this before falling back to the main model.
|
||||
_PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"xiaomi": "mimo-v2-omni",
|
||||
"xiaomi": "mimo-v2.5",
|
||||
"zai": "glm-5v-turbo",
|
||||
}
|
||||
|
||||
|
|
@ -916,6 +923,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
"""Return a more precise OpenRouter auth failure reason for logs."""
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
if entry is None:
|
||||
return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
|
||||
if not _pool_runtime_api_key(entry):
|
||||
return "OpenRouter credential pool entry is missing a runtime API key"
|
||||
if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
|
||||
return "OPENROUTER_API_KEY not set"
|
||||
return "no usable OpenRouter credentials found"
|
||||
|
||||
|
||||
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
# Check cross-session rate limit guard before attempting Nous —
|
||||
# if another session already recorded a 429, skip Nous entirely
|
||||
|
|
@ -1329,6 +1349,68 @@ def _is_auth_error(exc: Exception) -> bool:
|
|||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
with _client_cache_lock:
|
||||
stale_keys = [
|
||||
key for key in _client_cache
|
||||
if _normalize_aux_provider(str(key[0])) == normalized
|
||||
]
|
||||
for key in stale_keys:
|
||||
client = _client_cache.get(key, (None, None, None))[0]
|
||||
if client is not None:
|
||||
_force_close_async_httpx(client)
|
||||
try:
|
||||
close_fn = getattr(client, "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
_client_cache.pop(key, None)
|
||||
|
||||
|
||||
def _refresh_provider_credentials(provider: str) -> bool:
|
||||
"""Refresh short-lived credentials for OAuth-backed auxiliary providers."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
|
||||
creds = resolve_codex_runtime_credentials(force_refresh=True)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "nous":
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=True,
|
||||
)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "anthropic":
|
||||
from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None
|
||||
if not str(token or "").strip():
|
||||
token = resolve_anthropic_token()
|
||||
if not str(token or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc)
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
|
|
@ -1627,8 +1709,10 @@ def resolve_provider_client(
|
|||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: openrouter requested "
|
||||
"but OPENROUTER_API_KEY not set")
|
||||
logger.warning(
|
||||
"resolve_provider_client: openrouter requested but %s",
|
||||
_describe_openrouter_unavailable(),
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
|
|
@ -1721,7 +1805,7 @@ def resolve_provider_client(
|
|||
"but no endpoint credentials found")
|
||||
return None, None
|
||||
|
||||
# ── Named custom providers (config.yaml custom_providers list) ───
|
||||
# ── Named custom providers (config.yaml providers dict / custom_providers list) ───
|
||||
try:
|
||||
from hermes_cli.runtime_provider import _get_named_custom_provider
|
||||
custom_entry = _get_named_custom_provider(provider)
|
||||
|
|
@ -1732,16 +1816,51 @@ def resolve_provider_client(
|
|||
if not custom_key and custom_key_env:
|
||||
custom_key = os.getenv(custom_key_env, "").strip()
|
||||
custom_key = custom_key or "no-key-required"
|
||||
# An explicit per-task api_mode override (from _resolve_task_provider_model)
|
||||
# wins; otherwise fall back to what the provider entry declared.
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
if custom_base:
|
||||
final_model = _normalize_resolved_model(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s)",
|
||||
provider, final_model)
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
provider, final_model, entry_api_mode or "chat_completions")
|
||||
# anthropic_messages: route through the Anthropic Messages API
|
||||
# via AnthropicAuxiliaryClient. Mirrors the anonymous-custom
|
||||
# branch in _try_custom_endpoint(). See #15033.
|
||||
if entry_api_mode == "anthropic_messages":
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
real_client = build_anthropic_client(custom_key, custom_base)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Named custom provider %r declares api_mode="
|
||||
"anthropic_messages but the anthropic SDK is not "
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, custom_key, custom_base, is_oauth=False,
|
||||
)
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
# flows through here.
|
||||
if entry_api_mode == "codex_responses" and not isinstance(
|
||||
client, CodexAuxiliaryClient
|
||||
):
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
else:
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning(
|
||||
|
|
@ -1874,6 +1993,39 @@ def resolve_provider_client(
|
|||
"directly supported", provider)
|
||||
return None, None
|
||||
|
||||
elif pconfig.auth_type == "aws_sdk":
|
||||
# AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
|
||||
# boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
except ImportError:
|
||||
logger.warning("resolve_provider_client: bedrock requested but "
|
||||
"boto3 or anthropic SDK not installed")
|
||||
return None, None
|
||||
|
||||
if not has_aws_credentials():
|
||||
logger.debug("resolve_provider_client: bedrock requested but "
|
||||
"no AWS credentials found")
|
||||
return None, None
|
||||
|
||||
region = resolve_bedrock_region()
|
||||
default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
try:
|
||||
real_client = build_anthropic_bedrock_client(region)
|
||||
except ImportError as exc:
|
||||
logger.warning("resolve_provider_client: cannot create Bedrock "
|
||||
"client: %s", exc)
|
||||
return None, None
|
||||
client = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, api_key="aws-sdk",
|
||||
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||
)
|
||||
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
|
|
@ -2842,6 +2994,49 @@ def call_llm(
|
|||
return _validate_llm_response(
|
||||
refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Auth refresh retry ───────────────────────────────────────
|
||||
if (_is_auth_error(first_err)
|
||||
and resolved_provider not in ("auto", "", None)
|
||||
and not client_is_nous):
|
||||
if _refresh_provider_credentials(resolved_provider):
|
||||
logger.info(
|
||||
"Auxiliary %s: refreshed %s credentials after auth error, retrying",
|
||||
task or "call", resolved_provider,
|
||||
)
|
||||
retry_client, retry_model = (
|
||||
resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=final_model,
|
||||
async_mode=False,
|
||||
)[1:]
|
||||
if task == "vision"
|
||||
else _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
)
|
||||
if retry_client is not None:
|
||||
retry_kwargs = _build_call_kwargs(
|
||||
resolved_provider,
|
||||
retry_model or final_model,
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=resolved_base_url,
|
||||
)
|
||||
_retry_base = str(getattr(retry_client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
|
||||
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
|
||||
return _validate_llm_response(
|
||||
retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||
# When the resolved provider returns 402 or a credit-related error,
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
|
|
@ -3062,6 +3257,48 @@ async def async_call_llm(
|
|||
return _validate_llm_response(
|
||||
await refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Auth refresh retry (mirrors sync call_llm) ───────────────
|
||||
if (_is_auth_error(first_err)
|
||||
and resolved_provider not in ("auto", "", None)
|
||||
and not client_is_nous):
|
||||
if _refresh_provider_credentials(resolved_provider):
|
||||
logger.info(
|
||||
"Auxiliary %s (async): refreshed %s credentials after auth error, retrying",
|
||||
task or "call", resolved_provider,
|
||||
)
|
||||
if task == "vision":
|
||||
_, retry_client, retry_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=final_model,
|
||||
async_mode=True,
|
||||
)
|
||||
else:
|
||||
retry_client, retry_model = _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if retry_client is not None:
|
||||
retry_kwargs = _build_call_kwargs(
|
||||
resolved_provider,
|
||||
retry_model or final_model,
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=resolved_base_url,
|
||||
)
|
||||
_retry_base = str(getattr(retry_client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
|
||||
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
|
||||
return _validate_llm_response(
|
||||
await retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
|
|
|
|||
|
|
@ -87,6 +87,114 @@ def reset_client_cache():
|
|||
_bedrock_control_client_cache.clear()
|
||||
|
||||
|
||||
def invalidate_runtime_client(region: str) -> bool:
|
||||
"""Evict the cached ``bedrock-runtime`` client for a single region.
|
||||
|
||||
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
|
||||
call wrappers to discard clients whose underlying HTTP connection has
|
||||
gone stale, so the next call allocates a fresh client (with a fresh
|
||||
connection pool) instead of reusing a dead socket.
|
||||
|
||||
Returns True if a cached entry was evicted, False if the region was not
|
||||
cached.
|
||||
"""
|
||||
existed = region in _bedrock_runtime_client_cache
|
||||
_bedrock_runtime_client_cache.pop(region, None)
|
||||
return existed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# boto3 caches its HTTPS connection pool inside the client object. When a
|
||||
# pooled connection is killed out from under us (NAT timeout, VPN flap,
|
||||
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
|
||||
# one of a handful of low-level exceptions — most commonly
|
||||
# ``botocore.exceptions.ConnectionClosedError`` or
|
||||
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
|
||||
# ``assert`` in a couple of paths (connection pool state checks, chunked
|
||||
# response readers) which bubbles up as a bare ``AssertionError`` with an
|
||||
# empty ``str(exc)``.
|
||||
#
|
||||
# In all of these cases the client is the problem, not the request: retrying
|
||||
# with the same cached client reproduces the failure until the process
|
||||
# restarts. The fix is to evict the region's cached client so the next
|
||||
# attempt builds a new one.
|
||||
|
||||
_STALE_LIB_MODULE_PREFIXES = (
|
||||
"urllib3.",
|
||||
"botocore.",
|
||||
"boto3.",
|
||||
)
|
||||
|
||||
|
||||
def _traceback_frames_modules(exc: BaseException):
|
||||
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
|
||||
tb = getattr(exc, "__traceback__", None)
|
||||
while tb is not None:
|
||||
frame = tb.tb_frame
|
||||
module = frame.f_globals.get("__name__", "")
|
||||
yield module or ""
|
||||
tb = tb.tb_next
|
||||
|
||||
|
||||
def is_stale_connection_error(exc: BaseException) -> bool:
|
||||
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
|
||||
|
||||
Matches:
|
||||
* ``botocore.exceptions.ConnectionError`` and subclasses
|
||||
(``ConnectionClosedError``, ``EndpointConnectionError``,
|
||||
``ReadTimeoutError``, ``ConnectTimeoutError``).
|
||||
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
|
||||
``ConnectionError`` (best-effort import — urllib3 is a transitive
|
||||
dependency of botocore so it is always available in practice).
|
||||
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
|
||||
or boto3. These are internal-invariant failures (typically triggered
|
||||
by corrupted connection-pool state after a dropped socket) and are
|
||||
recoverable by swapping the client.
|
||||
|
||||
Non-library ``AssertionError``s (from application code or tests) are
|
||||
intentionally not matched — only library-internal asserts signal stale
|
||||
connection state.
|
||||
"""
|
||||
# botocore: the canonical signal — HTTPClientError is the umbrella for
|
||||
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
|
||||
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
|
||||
# the same family via a different branch of the hierarchy.
|
||||
try:
|
||||
from botocore.exceptions import (
|
||||
ConnectionError as BotoConnectionError,
|
||||
HTTPClientError,
|
||||
)
|
||||
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
|
||||
except ImportError: # pragma: no cover — botocore always present with boto3
|
||||
botocore_errors = ()
|
||||
if botocore_errors and isinstance(exc, botocore_errors):
|
||||
return True
|
||||
|
||||
# urllib3: low-level transport failures
|
||||
try:
|
||||
from urllib3.exceptions import (
|
||||
ProtocolError,
|
||||
NewConnectionError,
|
||||
ConnectionError as Urllib3ConnectionError,
|
||||
)
|
||||
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
|
||||
except ImportError: # pragma: no cover
|
||||
urllib3_errors = ()
|
||||
if urllib3_errors and isinstance(exc, urllib3_errors):
|
||||
return True
|
||||
|
||||
# Library-internal AssertionError (urllib3 / botocore / boto3)
|
||||
if isinstance(exc, AssertionError):
|
||||
for module in _traceback_frames_modules(exc):
|
||||
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS credential detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -787,7 +895,17 @@ def call_converse(
|
|||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse(**kwargs)
|
||||
try:
|
||||
response = client.converse(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse(region=%s, model=%s): "
|
||||
"%s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_response(response)
|
||||
|
||||
|
||||
|
|
@ -819,7 +937,17 @@ def call_converse_stream(
|
|||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse_stream(**kwargs)
|
||||
try:
|
||||
response = client.converse_stream(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse_stream(region=%s, "
|
||||
"model=%s): %s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_stream_events(response)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
|
||||
# assistant-message content when the model fails to emit a structured
|
||||
# ``function_call`` item. Accepts the common forms:
|
||||
#
|
||||
# to=functions.exec_command
|
||||
# assistant to=functions.exec_command
|
||||
# <|channel|>commentary to=functions.exec_command
|
||||
#
|
||||
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
|
||||
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
|
||||
# cover lowercase/uppercase ``assistant`` variants.
|
||||
_TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
|||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
# ── Tool-call leak recovery ──────────────────────────────────
|
||||
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
|
||||
# what should be a structured `function_call` item as plain assistant
|
||||
# text using the Harmony/Codex serialization (``to=functions.foo
|
||||
# {json}`` or ``assistant to=functions.foo {json}``). The model
|
||||
# intended to call a tool, but the intent never made it into
|
||||
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
|
||||
# is empty here. If we pass this through, the parent sees a
|
||||
# confident-looking summary with no audit trail (empty ``tool_trace``)
|
||||
# and no tools actually ran — the Taiwan-embassy-email incident.
|
||||
#
|
||||
# Detection: leaked tokens always contain ``to=functions.<name>`` and
|
||||
# the assistant message has no real tool calls. Treat it as incomplete
|
||||
# so the existing Codex-incomplete continuation path (3 retries,
|
||||
# handled in run_agent.py) gets a chance to re-elicit a proper
|
||||
# ``function_call`` item. The existing loop already handles message
|
||||
# append, dedup, and retry budget.
|
||||
leaked_tool_call_text = False
|
||||
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
|
||||
leaked_tool_call_text = True
|
||||
logger.warning(
|
||||
"Codex response contains leaked tool-call text in assistant content "
|
||||
"(no structured function_call items). Treating as incomplete so the "
|
||||
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
|
||||
final_text[:300],
|
||||
)
|
||||
# Clear the text so downstream code doesn't surface the garbage as
|
||||
# a summary. The encrypted reasoning items (if any) are preserved
|
||||
# so the model keeps its chain-of-thought on the retry.
|
||||
final_text = ""
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
|
|
@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
|||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif leaked_tool_call_text:
|
||||
finish_reason = "incomplete"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
|
|
|
|||
|
|
@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
|
|||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
|
|
@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
|
|||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
|
|
@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
|
|
@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
|
|
@ -1099,6 +1107,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
|
||||
return max(cut_idx, head_end + 1)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# ContextEngine: manual /compress preflight
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Return True if there is a non-empty middle region to compact.
|
||||
|
||||
Overrides the ABC default so the gateway ``/compress`` guard can
|
||||
skip the LLM call when the transcript is still entirely inside
|
||||
the protected head/tail.
|
||||
"""
|
||||
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
|
||||
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
|
||||
return compress_start < compress_end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main compression entry point
|
||||
# ------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ class ContextEngine(ABC):
|
|||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
focus_topic: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
|
|
@ -86,6 +87,12 @@ class ContextEngine(ABC):
|
|||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||
Engines that support guided compression should prioritise
|
||||
preserving information related to this topic. Engines that
|
||||
don't support it may simply ignore this argument.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
|
|
@ -98,6 +105,21 @@ class ContextEngine(ABC):
|
|||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: manual /compress preflight ------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||
|
||||
Used by the gateway ``/compress`` command as a preflight guard —
|
||||
returning False lets the gateway report "nothing to compress yet"
|
||||
without making an LLM call.
|
||||
|
||||
Default returns True (always attempt). Engines with a cheap way
|
||||
to introspect their own head/tail boundaries should override this
|
||||
to return False when the transcript is still entirely protected.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
|
|
|
|||
|
|
@ -46,6 +46,47 @@ def _resolve_args() -> list[str]:
|
|||
return shlex.split(raw)
|
||||
|
||||
|
||||
def _resolve_home_dir() -> str:
|
||||
"""Return a stable HOME for child ACP processes."""
|
||||
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
profile_home = get_subprocess_home()
|
||||
if profile_home:
|
||||
return profile_home
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
home = os.environ.get("HOME", "").strip()
|
||||
if home:
|
||||
return home
|
||||
|
||||
expanded = os.path.expanduser("~")
|
||||
if expanded and expanded != "~":
|
||||
return expanded
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
||||
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
|
||||
if resolved:
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
|
||||
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
|
||||
# need a different writable dir.
|
||||
return "/tmp"
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = _resolve_home_dir()
|
||||
return env
|
||||
|
||||
|
||||
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
|
|
@ -382,6 +423,7 @@ class CopilotACPClient:
|
|||
text=True,
|
||||
bufsize=1,
|
||||
cwd=self._acp_cwd,
|
||||
env=_build_subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
|
|
|
|||
|
|
@ -455,6 +455,61 @@ class CredentialPool:
|
|||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
Nous OAuth refresh tokens are single-use. When another process
|
||||
(e.g. a concurrent cron) refreshes the token via
|
||||
``resolve_nous_runtime_credentials``, it writes fresh tokens to
|
||||
auth.json under ``_auth_store_lock``. The pool entry's tokens
|
||||
become stale. This method detects that and adopts the newer pair,
|
||||
avoiding a "refresh token reuse" revocation on the Nous Portal.
|
||||
"""
|
||||
if self.provider != "nous" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if not state:
|
||||
return entry
|
||||
store_refresh = state.get("refresh_token", "")
|
||||
store_access = state.get("access_token", "")
|
||||
if store_refresh and store_refresh != entry.refresh_token:
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
}
|
||||
if state.get("expires_at"):
|
||||
field_updates["expires_at"] = state["expires_at"]
|
||||
if state.get("agent_key"):
|
||||
field_updates["agent_key"] = state["agent_key"]
|
||||
if state.get("agent_key_expires_at"):
|
||||
field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
|
||||
if state.get("inference_base_url"):
|
||||
field_updates["inference_base_url"] = state["inference_base_url"]
|
||||
extra_updates = dict(entry.extra)
|
||||
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
|
||||
"agent_key_expires_in", "agent_key_reused",
|
||||
"agent_key_obtained_at"):
|
||||
val = state.get(extra_key)
|
||||
if val is not None:
|
||||
extra_updates[extra_key] = val
|
||||
updated = replace(entry, extra=extra_updates, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
|
||||
"""Write refreshed pool entry tokens back to auth.json providers.
|
||||
|
||||
|
|
@ -561,6 +616,9 @@ class CredentialPool:
|
|||
last_refresh=refreshed.get("last_refresh"),
|
||||
)
|
||||
elif self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
nous_state = {
|
||||
"access_token": entry.access_token,
|
||||
"refresh_token": entry.refresh_token,
|
||||
|
|
@ -635,6 +693,26 @@ class CredentialPool:
|
|||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For nous: another process may have consumed the refresh token
|
||||
# between our proactive sync and the HTTP call. Re-sync from
|
||||
# auth.json and adopt the fresh tokens if available.
|
||||
if self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
|
||||
updated = replace(
|
||||
synced,
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
last_error_reason=None,
|
||||
last_error_message=None,
|
||||
last_error_reset_at=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
|
|
@ -698,6 +776,17 @@ class CredentialPool:
|
|||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For nous entries, sync from auth.json before status checks.
|
||||
# Another process may have successfully refreshed via
|
||||
# resolve_nous_runtime_credentials(), making this entry's
|
||||
# exhausted status stale.
|
||||
if (self.provider == "nous"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
|
|
@ -739,8 +828,11 @@ class CredentialPool:
|
|||
|
||||
if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
|
||||
entry = min(available, key=lambda e: e.request_count)
|
||||
# Increment usage counter so subsequent selections distribute load
|
||||
updated = replace(entry, request_count=entry.request_count + 1)
|
||||
self._replace_entry(entry, updated)
|
||||
self._current_id = entry.id
|
||||
return entry
|
||||
return updated
|
||||
|
||||
if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
|
||||
entry = available[0]
|
||||
|
|
@ -1056,6 +1148,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
"inference_base_url": state.get("inference_base_url"),
|
||||
"agent_key": state.get("agent_key"),
|
||||
"agent_key_expires_at": state.get("agent_key_expires_at"),
|
||||
# Carry the mint/refresh timestamps into the pool so
|
||||
# freshness-sensitive consumers (self-heal hooks, pool
|
||||
# pruning by age) can distinguish just-minted credentials
|
||||
# from stale ones. Without these, fresh device_code
|
||||
# entries get obtained_at=None and look older than they
|
||||
# are (#15099).
|
||||
"obtained_at": state.get("obtained_at"),
|
||||
"expires_in": state.get("expires_in"),
|
||||
"agent_key_id": state.get("agent_key_id"),
|
||||
"agent_key_expires_in": state.get("agent_key_expires_in"),
|
||||
"agent_key_reused": state.get("agent_key_reused"),
|
||||
"agent_key_obtained_at": state.get("agent_key_obtained_at"),
|
||||
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
|
||||
"label": seeded_label,
|
||||
},
|
||||
|
|
@ -1066,9 +1170,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
# env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in
|
||||
# the auth store or credential pool, so we resolve them here.
|
||||
try:
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
|
||||
token, source = resolve_copilot_token()
|
||||
if token:
|
||||
api_token = get_copilot_api_token(token)
|
||||
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
|
|
@ -1080,7 +1185,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": token,
|
||||
"access_token": api_token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ class FailoverReason(enum.Enum):
|
|||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
|
|
@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
|||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
|
|
@ -319,6 +343,11 @@ def classify_api_error(
|
|||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
|
||||
# so downstream rate-limit handling (classifier reason, pool rotation,
|
||||
# fallback gating) fires correctly instead of misclassifying as generic.
|
||||
if status_code is None and error_type == "RateLimitError":
|
||||
status_code = 429
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
|
|
@ -523,6 +552,17 @@ def _classify_by_status(
|
|||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
|
|
@ -640,6 +680,12 @@ def _classify_400(
|
|||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
|
|
@ -812,6 +858,15 @@ def _classify_by_message(
|
|||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
|
|
|
|||
|
|
@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool:
|
|||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
def probe_gemini_tier(
|
||||
api_key: str,
|
||||
base_url: str = DEFAULT_GEMINI_BASE_URL,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
timeout: float = 10.0,
|
||||
) -> str:
|
||||
"""Probe a Google AI Studio API key and return its tier.
|
||||
|
||||
Returns one of:
|
||||
|
||||
- ``"free"`` -- key is on the free tier (unusable with Hermes)
|
||||
- ``"paid"`` -- key is on a paid tier
|
||||
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
|
||||
"""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return "unknown"
|
||||
|
||||
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
|
||||
if not normalized_base:
|
||||
normalized_base = DEFAULT_GEMINI_BASE_URL
|
||||
if normalized_base.lower().endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
|
||||
url = f"{normalized_base}/models/{model}:generateContent"
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
|
||||
"generationConfig": {"maxOutputTokens": 1},
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
resp = client.post(
|
||||
url,
|
||||
params={"key": key},
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("probe_gemini_tier: network error: %s", exc)
|
||||
return "unknown"
|
||||
|
||||
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
|
||||
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
|
||||
if rpd_header:
|
||||
try:
|
||||
rpd_val = int(rpd_header)
|
||||
except (TypeError, ValueError):
|
||||
rpd_val = None
|
||||
# Published free-tier daily caps (Dec 2025):
|
||||
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
|
||||
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
|
||||
if rpd_val is not None and rpd_val <= 1000:
|
||||
return "free"
|
||||
if rpd_val is not None and rpd_val > 1000:
|
||||
return "paid"
|
||||
|
||||
if resp.status_code == 429:
|
||||
body_text = ""
|
||||
try:
|
||||
body_text = resp.text or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "free_tier" in body_text.lower():
|
||||
return "free"
|
||||
return "paid"
|
||||
|
||||
if 200 <= resp.status_code < 300:
|
||||
return "paid"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_free_tier_quota_error(error_message: str) -> bool:
|
||||
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
|
||||
if not error_message:
|
||||
return False
|
||||
return "free_tier" in error_message.lower()
|
||||
|
||||
|
||||
_FREE_TIER_GUIDANCE = (
|
||||
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
|
||||
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
|
||||
"so the free tier is exhausted in a handful of messages and cannot sustain "
|
||||
"an agent session. Enable billing on your Google Cloud project and "
|
||||
"regenerate the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
|
|
@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
|||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
# Free-tier quota exhaustion -> append actionable guidance so users who
|
||||
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
|
||||
# that the free tier cannot sustain an agent session.
|
||||
if status == 429 and is_free_tier_quota_error(err_message or body_text):
|
||||
message = message + _FREE_TIER_GUIDANCE
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
|
|
@ -704,6 +801,13 @@ class GeminiNativeClient:
|
|||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
if not (api_key or "").strip():
|
||||
raise RuntimeError(
|
||||
"Gemini native client requires an API key, but none was provided. "
|
||||
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
|
||||
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
|
||||
"to configure the Google provider."
|
||||
)
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
|
|
|
|||
|
|
@ -73,6 +73,20 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
|||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
|
||||
# Gemini's Schema validator requires every ``enum`` entry to be a string,
|
||||
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
|
||||
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
|
||||
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
|
||||
# so we only drop the ``enum`` when it would collide with Gemini's rule.
|
||||
# Keeping ``type: integer`` plus the human-readable description gives the
|
||||
# model enough guidance; the tool handler still validates the value.
|
||||
enum_val = cleaned.get("enum")
|
||||
type_val = cleaned.get("type")
|
||||
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
|
||||
if any(not isinstance(item, str) for item in enum_val):
|
||||
cleaned.pop("enum", None)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ from __future__ import annotations
|
|||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
|
|
@ -312,7 +313,39 @@ class MemoryManager:
|
|||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
@staticmethod
|
||||
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
|
||||
"""Return how to pass metadata to a provider's memory-write hook."""
|
||||
try:
|
||||
signature = inspect.signature(provider.on_memory_write)
|
||||
except (TypeError, ValueError):
|
||||
return "keyword"
|
||||
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return "keyword"
|
||||
if "metadata" in signature.parameters:
|
||||
return "keyword"
|
||||
|
||||
accepted = [
|
||||
p for p in params
|
||||
if p.kind in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
inspect.Parameter.KEYWORD_ONLY,
|
||||
)
|
||||
]
|
||||
if len(accepted) >= 4:
|
||||
return "positional"
|
||||
return "legacy"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Notify external providers when the built-in memory tool writes.
|
||||
|
||||
Skips the builtin provider itself (it's the source of the write).
|
||||
|
|
@ -321,7 +354,15 @@ class MemoryManager:
|
|||
if provider.name == "builtin":
|
||||
continue
|
||||
try:
|
||||
provider.on_memory_write(action, target, content)
|
||||
metadata_mode = self._provider_memory_write_metadata_mode(provider)
|
||||
if metadata_mode == "keyword":
|
||||
provider.on_memory_write(
|
||||
action, target, content, metadata=dict(metadata or {})
|
||||
)
|
||||
elif metadata_mode == "positional":
|
||||
provider.on_memory_write(action, target, content, dict(metadata or {}))
|
||||
else:
|
||||
provider.on_memory_write(action, target, content)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_memory_write failed: %s",
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ Optional hooks (override to opt in):
|
|||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content) — mirror built-in memory writes
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
"""
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -220,12 +220,21 @@ class MemoryProvider(ABC):
|
|||
should all have ``env_var`` set and this method stays no-op).
|
||||
"""
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Called when the built-in memory tool writes an entry.
|
||||
|
||||
action: 'add', 'replace', or 'remove'
|
||||
target: 'memory' or 'user'
|
||||
content: the entry content
|
||||
metadata: structured provenance for the write, when available. Common
|
||||
keys include ``write_origin``, ``execution_context``, ``session_id``,
|
||||
``parent_session_id``, ``platform``, and ``tool_name``.
|
||||
|
||||
Use to mirror built-in memory writes to your backend.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ and run_agent.py for pre-flight context checks.
|
|||
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
|
@ -21,6 +22,25 @@ from hermes_constants import OPENROUTER_MODELS_URL
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_requests_verify() -> bool | str:
|
||||
"""Resolve SSL verify setting for `requests` calls from env vars.
|
||||
|
||||
The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
|
||||
by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
|
||||
and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
|
||||
that a single env var can cover both `requests` and `httpx` callsites
|
||||
inside the same process.
|
||||
|
||||
Returns either a filesystem path to a CA bundle, or True to defer to
|
||||
the requests default (certifi).
|
||||
"""
|
||||
for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
|
||||
val = os.getenv(env_var)
|
||||
if val and os.path.isfile(val):
|
||||
return val
|
||||
return True
|
||||
|
||||
# Provider names that can appear as a "provider:" prefix before a model ID.
|
||||
# Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
|
|
@ -123,6 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
|
|
@ -183,12 +207,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"mimo-v2.5-pro": 1000000,
|
||||
"mimo-v2.5": 1000000,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 262144,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2.5-pro": 1048576,
|
||||
"mimo-v2.5": 1048576,
|
||||
"mimo-v2-omni": 262144,
|
||||
"mimo-v2-flash": 262144,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
|
|
@ -491,7 +515,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
|||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
|
|
@ -558,6 +582,7 @@ def fetch_endpoint_model_metadata(
|
|||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
|
@ -606,7 +631,7 @@ def fetch_endpoint_model_metadata(
|
|||
for candidate in candidates:
|
||||
url = candidate.rstrip("/") + "/models"
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
cache: Dict[str, Dict[str, Any]] = {}
|
||||
|
|
@ -637,9 +662,10 @@ def fetch_endpoint_model_metadata(
|
|||
try:
|
||||
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
|
||||
base = candidate.rstrip("/").replace("/v1", "")
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
|
||||
_verify = _resolve_requests_verify()
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
|
||||
if not props_resp.ok:
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
|
|
@ -711,6 +737,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
|
|||
return cache.get(key)
|
||||
|
||||
|
||||
def _invalidate_cached_context_length(model: str, base_url: str) -> None:
|
||||
"""Drop a stale cache entry so it gets re-resolved on the next lookup."""
|
||||
key = f"{model}@{base_url}"
|
||||
cache = _load_context_cache()
|
||||
if key not in cache:
|
||||
return
|
||||
del cache[key]
|
||||
path = _get_context_cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
|
||||
|
||||
|
||||
def get_next_probe_tier(current_length: int) -> Optional[int]:
|
||||
"""Return the next lower probe tier, or None if already at minimum."""
|
||||
for tier in CONTEXT_PROBE_TIERS:
|
||||
|
|
@ -988,7 +1030,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
|||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
}
|
||||
resp = requests.get(url, headers=headers, timeout=10)
|
||||
resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
|
|
@ -1002,6 +1044,116 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
|||
return None
|
||||
|
||||
|
||||
# Known ChatGPT Codex OAuth context windows (observed via live
|
||||
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
|
||||
# `context_window` values, which are what Codex actually enforces — the
|
||||
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
|
||||
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
|
||||
#
|
||||
# Used as a fallback when the live probe fails (no token, network error).
|
||||
# Longest keys first so substring match picks the most specific entry.
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
|
||||
"gpt-5.1-codex-max": 272_000,
|
||||
"gpt-5.1-codex-mini": 272_000,
|
||||
"gpt-5.3-codex": 272_000,
|
||||
"gpt-5.2-codex": 272_000,
|
||||
"gpt-5.4-mini": 272_000,
|
||||
"gpt-5.5": 272_000,
|
||||
"gpt-5.4": 272_000,
|
||||
"gpt-5.2": 272_000,
|
||||
"gpt-5": 272_000,
|
||||
}
|
||||
|
||||
|
||||
_codex_oauth_context_cache: Dict[str, int] = {}
|
||||
_codex_oauth_context_cache_time: float = 0.0
|
||||
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
|
||||
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
|
||||
|
||||
Codex OAuth imposes its own context limits that differ from the direct
|
||||
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
|
||||
`context_window` field in each model entry is the authoritative source.
|
||||
|
||||
Returns a ``{slug: context_window}`` dict. Empty on failure.
|
||||
"""
|
||||
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
|
||||
now = time.time()
|
||||
if (
|
||||
_codex_oauth_context_cache
|
||||
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
|
||||
):
|
||||
return _codex_oauth_context_cache
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
|
||||
resp.status_code,
|
||||
)
|
||||
return {}
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug("Codex /models probe failed: %s", exc)
|
||||
return {}
|
||||
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
result: Dict[str, int] = {}
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
ctx = item.get("context_window")
|
||||
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
|
||||
result[slug.strip()] = ctx
|
||||
|
||||
if result:
|
||||
_codex_oauth_context_cache = result
|
||||
_codex_oauth_context_cache_time = now
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_codex_oauth_context_length(
|
||||
model: str, access_token: str = ""
|
||||
) -> Optional[int]:
|
||||
"""Resolve a Codex OAuth model's real context window.
|
||||
|
||||
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
|
||||
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
|
||||
"""
|
||||
model_bare = _strip_provider_prefix(model).strip()
|
||||
if not model_bare:
|
||||
return None
|
||||
|
||||
if access_token:
|
||||
live = _fetch_codex_oauth_context_lengths(access_token)
|
||||
if model_bare in live:
|
||||
return live[model_bare]
|
||||
# Case-insensitive match in case casing drifts
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in live.items():
|
||||
if slug.lower() == model_lower:
|
||||
return ctx
|
||||
|
||||
# Fallback: longest-key-first substring match over hardcoded defaults.
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in sorted(
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
|
||||
):
|
||||
if slug in model_lower:
|
||||
return ctx
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
|
||||
|
|
@ -1047,6 +1199,7 @@ def get_model_context_length(
|
|||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
|
|
@ -1069,7 +1222,41 @@ def get_model_context_length(
|
|||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
return cached
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
# resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
|
||||
# models.dev and persisted it. Codex OAuth caps at 272K for every
|
||||
# slug, so any cached Codex entry at or above 400K is a leftover
|
||||
# from the old resolution path. Drop it and fall through to the
|
||||
# live /models probe in step 5 below.
|
||||
if provider == "openai-codex" and cached >= 400_000:
|
||||
logger.info(
|
||||
"Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
|
||||
"re-resolving via live /models probe",
|
||||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
else:
|
||||
return cached
|
||||
|
||||
# 1b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py that reflects
|
||||
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
|
||||
# Anthropic API). This must run BEFORE the custom-endpoint probe at
|
||||
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
|
||||
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
|
||||
# fail the /models probe (Bedrock doesn't expose that shape), and fall
|
||||
# back to the 128K default before reaching the original step 4b branch.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
|
||||
# 2. Active endpoint metadata for truly custom/unknown endpoints.
|
||||
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
|
||||
|
|
@ -1116,19 +1303,7 @@ def get_model_context_length(
|
|||
if ctx:
|
||||
return ctx
|
||||
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
|
||||
|
||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||
# These are provider-specific and take priority over the generic OR cache,
|
||||
|
|
@ -1142,10 +1317,32 @@ def get_model_context_length(
|
|||
if inferred:
|
||||
effective_provider = inferred
|
||||
|
||||
# 5a. Copilot live /models API — max_prompt_tokens from the user's account.
|
||||
# This catches account-specific models (e.g. claude-opus-4.6-1m) that
|
||||
# don't exist in models.dev. For models that ARE in models.dev, this
|
||||
# returns the provider-enforced limit which is what users can actually use.
|
||||
if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
|
||||
try:
|
||||
from hermes_cli.models import get_copilot_model_context
|
||||
ctx = get_copilot_model_context(model, api_key=api_key)
|
||||
if ctx:
|
||||
return ctx
|
||||
except Exception:
|
||||
pass # Fall through to models.dev
|
||||
|
||||
if effective_provider == "nous":
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
if ctx:
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
|
||||
# on Codex). Authoritative source is Codex's own /models endpoint.
|
||||
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
|
||||
if codex_ctx:
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
|
|
|
|||
190
agent/moonshot_schema.py
Normal file
190
agent/moonshot_schema.py
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in (None, ""):
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
|
|
@ -1,154 +1,29 @@
|
|||
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||
"""Shared slash command helpers for skills.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
/plan.
|
||||
can invoke skills via /skill-name commands.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_preprocessing import (
|
||||
expand_inline_shell as _expand_inline_shell,
|
||||
load_skills_config as _load_skills_config,
|
||||
substitute_template_vars as _substitute_template_vars,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
*,
|
||||
now: datetime | None = None,
|
||||
) -> Path:
|
||||
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||
|
||||
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||
them against the active working directory for local, docker, ssh, modal,
|
||||
daytona, and similar terminal backends. That keeps the plan with the active
|
||||
workspace instead of the Hermes host's global home directory.
|
||||
"""
|
||||
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||
if slug:
|
||||
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||
slug = slug or "conversation-plan"
|
||||
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
|
|
@ -167,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
|||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
loaded_skill = json.loads(
|
||||
skill_view(normalized, task_id=task_id, preprocess=False)
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
|
@ -345,7 +222,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
|||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
|
||||
disabled = _get_disabled_skill_names()
|
||||
seen_names: set = set()
|
||||
|
||||
|
|
@ -356,7 +233,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
|||
dirs_to_scan.extend(get_external_skills_dirs())
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in scan_dir.rglob("SKILL.md"):
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
|
|
|
|||
131
agent/skill_preprocessing.py
Normal file
131
agent/skill_preprocessing.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
|
|
@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
|||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
|
@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):
|
|||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
|
|
|
|||
|
|
@ -61,6 +61,20 @@ class ToolCall:
|
|||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@property
|
||||
def extra_content(self) -> Optional[Dict[str, Any]]:
|
||||
"""Gemini extra_content (thought_signature) from provider_data.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` with a
|
||||
``thought_signature`` to each tool call. This signature must be
|
||||
replayed on subsequent API calls — without it the API rejects the
|
||||
request with HTTP 400. The chat_completions transport stores this
|
||||
in ``provider_data["extra_content"]``; this property exposes it so
|
||||
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
|
||||
uniformly.
|
||||
"""
|
||||
return (self.provider_data or {}).get("extra_content")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
|
|
|
|||
|
|
@ -951,13 +951,9 @@ class BatchRunner:
|
|||
root_logger.setLevel(original_level)
|
||||
|
||||
# Aggregate all batch statistics and update checkpoint
|
||||
all_completed_prompts = list(completed_prompts_set)
|
||||
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
|
||||
|
||||
|
||||
for batch_result in results:
|
||||
# Add newly completed prompts
|
||||
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
|
||||
|
||||
# Aggregate tool stats
|
||||
for tool_name, stats in batch_result.get("tool_stats", {}).items():
|
||||
if tool_name not in total_tool_stats:
|
||||
|
|
@ -977,7 +973,7 @@ class BatchRunner:
|
|||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
|
|
|
|||
|
|
@ -326,6 +326,16 @@ compression:
|
|||
# To pin a specific model/provider for compression summaries, use the
|
||||
# auxiliary section below (auxiliary.compression.provider / model).
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic prompt caching TTL
|
||||
# =============================================================================
|
||||
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
|
||||
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
|
||||
# "1h". Other values are ignored and "5m" is used.
|
||||
#
|
||||
prompt_caching:
|
||||
cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary Models (Advanced — Experimental)
|
||||
# =============================================================================
|
||||
|
|
@ -507,6 +517,13 @@ agent:
|
|||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
|
|
|
|||
330
cli.py
330
cli.py
|
|
@ -1688,7 +1688,6 @@ def _looks_like_slash_command(text: str) -> bool:
|
|||
from agent.skill_commands import (
|
||||
scan_skill_commands,
|
||||
build_skill_invocation_message,
|
||||
build_plan_path,
|
||||
build_preloaded_skills_prompt,
|
||||
)
|
||||
|
||||
|
|
@ -3084,6 +3083,8 @@ class HermesCLI:
|
|||
format_runtime_provider_error,
|
||||
)
|
||||
|
||||
_primary_exc = None
|
||||
runtime = None
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=self.requested_provider,
|
||||
|
|
@ -3091,7 +3092,34 @@ class HermesCLI:
|
|||
explicit_base_url=self._explicit_base_url,
|
||||
)
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
_primary_exc = exc
|
||||
|
||||
# Primary provider auth failed — try fallback providers before giving up.
|
||||
if runtime is None and _primary_exc is not None:
|
||||
from hermes_cli.auth import AuthError
|
||||
if isinstance(_primary_exc, AuthError):
|
||||
_fb_chain = self._fallback_model if isinstance(self._fallback_model, list) else []
|
||||
for _fb in _fb_chain:
|
||||
_fb_provider = (_fb.get("provider") or "").strip().lower()
|
||||
_fb_model = (_fb.get("model") or "").strip()
|
||||
if not _fb_provider or not _fb_model:
|
||||
continue
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=_fb_provider)
|
||||
logger.warning(
|
||||
"Primary provider auth failed (%s). Falling through to fallback: %s/%s",
|
||||
_primary_exc, _fb_provider, _fb_model,
|
||||
)
|
||||
_cprint(f"⚠️ Primary auth failed — switching to fallback: {_fb_provider} / {_fb_model}")
|
||||
self.requested_provider = _fb_provider
|
||||
self.model = _fb_model
|
||||
_primary_exc = None
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if runtime is None:
|
||||
message = format_runtime_provider_error(_primary_exc) if _primary_exc else "Provider resolution failed."
|
||||
ChatConsole().print(f"[bold red]{message}[/]")
|
||||
return False
|
||||
|
||||
|
|
@ -3254,6 +3282,23 @@ class HermesCLI:
|
|||
_cprint(f"\033[1;31mSession not found: {self.session_id}{_RST}")
|
||||
_cprint(f"{_DIM}Use a session ID from a previous CLI run (hermes sessions list).{_RST}")
|
||||
return False
|
||||
# If the requested session is the (empty) head of a compression
|
||||
# chain, walk to the descendant that actually holds the messages.
|
||||
# See #15000 and SessionDB.resolve_resume_session_id.
|
||||
try:
|
||||
resolved_id = self._session_db.resolve_resume_session_id(self.session_id)
|
||||
except Exception:
|
||||
resolved_id = self.session_id
|
||||
if resolved_id and resolved_id != self.session_id:
|
||||
ChatConsole().print(
|
||||
f"[{_DIM}]Session {_escape(self.session_id)} was compressed into "
|
||||
f"{_escape(resolved_id)}; resuming the descendant with your "
|
||||
f"transcript.[/]"
|
||||
)
|
||||
self.session_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(self.session_id)
|
||||
if resolved_meta:
|
||||
session_meta = resolved_meta
|
||||
restored = self._session_db.get_messages_as_conversation(self.session_id)
|
||||
if restored:
|
||||
restored = [m for m in restored if m.get("role") != "session_meta"]
|
||||
|
|
@ -3472,6 +3517,22 @@ class HermesCLI:
|
|||
)
|
||||
return False
|
||||
|
||||
# If the requested session is the (empty) head of a compression chain,
|
||||
# walk to the descendant that actually holds the messages. See #15000.
|
||||
try:
|
||||
resolved_id = self._session_db.resolve_resume_session_id(self.session_id)
|
||||
except Exception:
|
||||
resolved_id = self.session_id
|
||||
if resolved_id and resolved_id != self.session_id:
|
||||
self._console_print(
|
||||
f"[dim]Session {self.session_id} was compressed into "
|
||||
f"{resolved_id}; resuming the descendant with your transcript.[/]"
|
||||
)
|
||||
self.session_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(self.session_id)
|
||||
if resolved_meta:
|
||||
session_meta = resolved_meta
|
||||
|
||||
restored = self._session_db.get_messages_as_conversation(self.session_id)
|
||||
if restored:
|
||||
restored = [m for m in restored if m.get("role") != "session_meta"]
|
||||
|
|
@ -4686,6 +4747,22 @@ class HermesCLI:
|
|||
_cprint(" Use /history or `hermes sessions list` to see available sessions.")
|
||||
return
|
||||
|
||||
# If the target is the empty head of a compression chain, redirect to
|
||||
# the descendant that actually holds the transcript. See #15000.
|
||||
try:
|
||||
resolved_id = self._session_db.resolve_resume_session_id(target_id)
|
||||
except Exception:
|
||||
resolved_id = target_id
|
||||
if resolved_id and resolved_id != target_id:
|
||||
_cprint(
|
||||
f" Session {target_id} was compressed into {resolved_id}; "
|
||||
f"resuming the descendant with your transcript."
|
||||
)
|
||||
target_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(target_id)
|
||||
if resolved_meta:
|
||||
session_meta = resolved_meta
|
||||
|
||||
if target_id == self.session_id:
|
||||
_cprint(" Already on that session.")
|
||||
return
|
||||
|
|
@ -5378,79 +5455,6 @@ class HermesCLI:
|
|||
except Exception:
|
||||
return False
|
||||
|
||||
def _show_model_and_providers(self):
|
||||
"""Show current model + provider and list all authenticated providers.
|
||||
|
||||
Shows current model + provider, then lists all authenticated
|
||||
providers with their available models.
|
||||
"""
|
||||
from hermes_cli.models import (
|
||||
curated_models_for_provider, list_available_providers,
|
||||
normalize_provider, _PROVIDER_LABELS,
|
||||
get_pricing_for_provider, format_model_pricing_table,
|
||||
)
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
|
||||
# Resolve current provider
|
||||
raw_provider = normalize_provider(self.provider)
|
||||
if raw_provider == "auto":
|
||||
try:
|
||||
current = _resolve_provider(
|
||||
self.requested_provider,
|
||||
explicit_api_key=self._explicit_api_key,
|
||||
explicit_base_url=self._explicit_base_url,
|
||||
)
|
||||
except Exception:
|
||||
current = "openrouter"
|
||||
else:
|
||||
current = raw_provider
|
||||
current_label = _PROVIDER_LABELS.get(current, current)
|
||||
|
||||
print(f"\n Current: {self.model} via {current_label}")
|
||||
print()
|
||||
|
||||
# Show all authenticated providers with their models
|
||||
providers = list_available_providers()
|
||||
authed = [p for p in providers if p["authenticated"]]
|
||||
unauthed = [p for p in providers if not p["authenticated"]]
|
||||
|
||||
if authed:
|
||||
print(" Authenticated providers & models:")
|
||||
for p in authed:
|
||||
is_active = p["id"] == current
|
||||
marker = " ← active" if is_active else ""
|
||||
print(f" [{p['id']}]{marker}")
|
||||
curated = curated_models_for_provider(p["id"])
|
||||
# Fetch pricing for providers that support it (openrouter, nous)
|
||||
pricing_map = get_pricing_for_provider(p["id"]) if p["id"] in ("openrouter", "nous") else {}
|
||||
if curated and pricing_map:
|
||||
cur_model = self.model if is_active else ""
|
||||
for line in format_model_pricing_table(curated, pricing_map, current_model=cur_model):
|
||||
print(line)
|
||||
elif curated:
|
||||
for mid, desc in curated:
|
||||
current_marker = " ← current" if (is_active and mid == self.model) else ""
|
||||
print(f" {mid}{current_marker}")
|
||||
elif p["id"] == "custom":
|
||||
from hermes_cli.models import _get_custom_base_url
|
||||
custom_url = _get_custom_base_url()
|
||||
if custom_url:
|
||||
print(f" endpoint: {custom_url}")
|
||||
if is_active:
|
||||
print(f" model: {self.model} ← current")
|
||||
print(" (use hermes model to change)")
|
||||
else:
|
||||
print(" (use hermes model to change)")
|
||||
print()
|
||||
|
||||
if unauthed:
|
||||
names = ", ".join(p["label"] for p in unauthed)
|
||||
print(f" Not configured: {names}")
|
||||
print(" Run: hermes setup")
|
||||
print()
|
||||
|
||||
print(" To change model or provider, use: hermes model")
|
||||
|
||||
def _output_console(self):
|
||||
"""Use prompt_toolkit-safe Rich rendering once the TUI is live."""
|
||||
if getattr(self, "_app", None):
|
||||
|
|
@ -6026,16 +6030,12 @@ class HermesCLI:
|
|||
self._handle_resume_command(cmd_original)
|
||||
elif canonical == "model":
|
||||
self._handle_model_switch(cmd_original)
|
||||
elif canonical == "provider":
|
||||
self._show_model_and_providers()
|
||||
elif canonical == "gquota":
|
||||
self._handle_gquota_command(cmd_original)
|
||||
|
||||
elif canonical == "personality":
|
||||
# Use original case (handler lowercases the personality name itself)
|
||||
self._handle_personality_command(cmd_original)
|
||||
elif canonical == "plan":
|
||||
self._handle_plan_command(cmd_original)
|
||||
elif canonical == "retry":
|
||||
retry_msg = self.retry_last()
|
||||
if retry_msg and hasattr(self, '_pending_input'):
|
||||
|
|
@ -6165,6 +6165,8 @@ class HermesCLI:
|
|||
self._handle_skin_command(cmd_original)
|
||||
elif canonical == "voice":
|
||||
self._handle_voice_command(cmd_original)
|
||||
elif canonical == "busy":
|
||||
self._handle_busy_command(cmd_original)
|
||||
else:
|
||||
# Check for user-defined quick commands (bypass agent loop, no LLM call)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
|
|
@ -6270,32 +6272,6 @@ class HermesCLI:
|
|||
|
||||
return True
|
||||
|
||||
def _handle_plan_command(self, cmd: str):
|
||||
"""Handle /plan [request] — load the bundled plan skill."""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
user_instruction = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
plan_path = build_plan_path(user_instruction)
|
||||
msg = build_skill_invocation_message(
|
||||
"/plan",
|
||||
user_instruction,
|
||||
task_id=self.session_id,
|
||||
runtime_note=(
|
||||
"Save the markdown plan with write_file to this exact relative path "
|
||||
f"inside the active workspace/backend cwd: {plan_path}"
|
||||
),
|
||||
)
|
||||
|
||||
if not msg:
|
||||
ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]")
|
||||
return
|
||||
|
||||
_cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
|
||||
if hasattr(self, '_pending_input'):
|
||||
self._pending_input.put(msg)
|
||||
else:
|
||||
ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]")
|
||||
|
||||
def _handle_background_command(self, cmd: str):
|
||||
"""Handle /background <prompt> — run a prompt in a separate background session.
|
||||
|
||||
|
|
@ -6685,6 +6661,13 @@ class HermesCLI:
|
|||
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
|
||||
|
||||
os.environ["BROWSER_CDP_URL"] = cdp_url
|
||||
# Eagerly start the CDP supervisor so pending_dialogs + frame_tree
|
||||
# show up in the next browser_snapshot. No-op if already started.
|
||||
try:
|
||||
from tools.browser_tool import _ensure_cdp_supervisor # type: ignore[import-not-found]
|
||||
_ensure_cdp_supervisor("default")
|
||||
except Exception:
|
||||
pass
|
||||
print()
|
||||
print("🌐 Browser connected to live Chrome via CDP")
|
||||
print(f" Endpoint: {cdp_url}")
|
||||
|
|
@ -6706,7 +6689,8 @@ class HermesCLI:
|
|||
if current:
|
||||
os.environ.pop("BROWSER_CDP_URL", None)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor
|
||||
_stop_cdp_supervisor("default")
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -6919,6 +6903,36 @@ class HermesCLI:
|
|||
else:
|
||||
_cprint(f" {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_busy_command(self, cmd: str):
|
||||
"""Handle /busy — control what Enter does while Hermes is working.
|
||||
|
||||
Usage:
|
||||
/busy Show current busy input mode
|
||||
/busy status Show current busy input mode
|
||||
/busy queue Queue input for the next turn instead of interrupting
|
||||
/busy interrupt Interrupt the current run on Enter (default)
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
_cprint(f" {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
|
||||
_cprint(f" {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
arg = parts[1].strip().lower()
|
||||
if arg not in {"queue", "interrupt"}:
|
||||
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
self.busy_input_mode = arg
|
||||
if save_config_value("display.busy_input_mode", arg):
|
||||
behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
|
||||
_cprint(f" {_DIM}{behavior}{_RST}")
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
|
||||
if not self._fast_command_available():
|
||||
|
|
@ -6997,51 +7011,52 @@ class HermesCLI:
|
|||
focus_topic = parts[1].strip()
|
||||
|
||||
original_count = len(self.conversation_history)
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
with self._busy_command("Compressing context..."):
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _handle_debug_command(self):
|
||||
"""Handle /debug — upload debug report + logs and print paste URLs."""
|
||||
|
|
@ -9543,9 +9558,20 @@ class HermesCLI:
|
|||
|
||||
@kb.add('c-d')
|
||||
def handle_ctrl_d(event):
|
||||
"""Handle Ctrl+D - exit."""
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
"""Ctrl+D: delete char under cursor (standard readline behaviour).
|
||||
Only exit when the input is empty — same as bash/zsh. Pending
|
||||
attached images count as input and block the EOF-exit so the
|
||||
user doesn't lose them silently.
|
||||
"""
|
||||
buf = event.app.current_buffer
|
||||
if buf.text:
|
||||
buf.delete()
|
||||
elif self._attached_images:
|
||||
# Empty text but pending attachments — no-op, don't exit.
|
||||
return
|
||||
else:
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
|
||||
_modal_prompt_active = Condition(
|
||||
lambda: bool(self._secret_state or self._sudo_state)
|
||||
|
|
|
|||
58
cron/jobs.py
58
cron/jobs.py
|
|
@ -371,6 +371,39 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
|||
raise
|
||||
|
||||
|
||||
def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
|
||||
"""Normalize and validate a cron job workdir.
|
||||
|
||||
Rules:
|
||||
- Empty / None → None (feature off, preserves old behaviour).
|
||||
- ``~`` is expanded. Relative paths are rejected — cron jobs run detached
|
||||
from any shell cwd, so relative paths have no stable meaning.
|
||||
- The path must exist and be a directory at create/update time. We do
|
||||
NOT re-check at run time (a user might briefly unmount the dir; the
|
||||
scheduler will just fall back to old behaviour with a logged warning).
|
||||
|
||||
Returns the absolute path string, or None when disabled.
|
||||
Raises ValueError on invalid input.
|
||||
"""
|
||||
if workdir is None:
|
||||
return None
|
||||
raw = str(workdir).strip()
|
||||
if not raw:
|
||||
return None
|
||||
expanded = Path(raw).expanduser()
|
||||
if not expanded.is_absolute():
|
||||
raise ValueError(
|
||||
f"Cron workdir must be an absolute path (got {raw!r}). "
|
||||
f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
|
||||
)
|
||||
resolved = expanded.resolve()
|
||||
if not resolved.exists():
|
||||
raise ValueError(f"Cron workdir does not exist: {resolved}")
|
||||
if not resolved.is_dir():
|
||||
raise ValueError(f"Cron workdir is not a directory: {resolved}")
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def create_job(
|
||||
prompt: str,
|
||||
schedule: str,
|
||||
|
|
@ -384,6 +417,8 @@ def create_job(
|
|||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
|
|
@ -403,6 +438,15 @@ def create_job(
|
|||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
workdir: Optional absolute path. When set, the job runs as if launched
|
||||
from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
|
||||
that directory are injected into the system prompt, and the
|
||||
terminal/file/code_exec tools use it as their working directory
|
||||
(via TERMINAL_CWD). When unset, the old behaviour is preserved
|
||||
(no context files injected, tools use the scheduler's cwd).
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
|
|
@ -433,6 +477,9 @@ def create_job(
|
|||
normalized_base_url = normalized_base_url or None
|
||||
normalized_script = str(script).strip() if isinstance(script, str) else None
|
||||
normalized_script = normalized_script or None
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
|
|
@ -464,6 +511,8 @@ def create_job(
|
|||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
"workdir": normalized_workdir,
|
||||
}
|
||||
|
||||
jobs = load_jobs()
|
||||
|
|
@ -497,6 +546,15 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
|
|||
if job["id"] != job_id:
|
||||
continue
|
||||
|
||||
# Validate / normalize workdir if present in updates. Empty string or
|
||||
# None both mean "clear the field" (restore old behaviour).
|
||||
if "workdir" in updates:
|
||||
_wd = updates["workdir"]
|
||||
if _wd in (None, "", False):
|
||||
updates["workdir"] = None
|
||||
else:
|
||||
updates["workdir"] = _normalize_workdir(_wd)
|
||||
|
||||
updated = _apply_skill_fields({**job, **updates})
|
||||
schedule_changed = "schedule" in updates
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
|
||||
"""Resolve the toolset list for a cron job.
|
||||
|
||||
Precedence:
|
||||
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
|
||||
Keeps the agent's job-scoped toolset override intact — #6130.
|
||||
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
|
||||
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
|
||||
so users can gate cron toolsets globally without recreating every job.
|
||||
3. ``None`` on any lookup failure — AIAgent loads the full default set
|
||||
(legacy behavior before this change, preserved as the safety net).
|
||||
|
||||
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
|
||||
``_get_platform_tools`` for unconfigured platforms, so fresh installs
|
||||
get cron WITHOUT ``moa`` by default (issue reported by Norbert —
|
||||
surprise $4.63 run).
|
||||
"""
|
||||
per_job = job.get("enabled_toolsets")
|
||||
if per_job:
|
||||
return per_job
|
||||
try:
|
||||
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
|
||||
return sorted(_get_platform_tools(cfg or {}, "cron"))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Cron toolset resolution failed, falling back to full default toolset: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# Valid delivery platforms — used to validate user-supplied platform names
|
||||
# in cron delivery targets, preventing env var enumeration via crafted names.
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
|
|
@ -764,6 +795,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
chat_name=origin.get("chat_name", "") if origin else "",
|
||||
)
|
||||
|
||||
# Per-job working directory. When set (and validated at create/update
|
||||
# time), we point TERMINAL_CWD at it so:
|
||||
# - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from the job's project dir, AND
|
||||
# - the terminal, file, and code-exec tools run commands from there.
|
||||
#
|
||||
# tick() serializes workdir-jobs outside the parallel pool, so mutating
|
||||
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
|
||||
# jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
|
||||
# (skip_context_files=True, tools use whatever cwd the scheduler has).
|
||||
_job_workdir = (job.get("workdir") or "").strip() or None
|
||||
if _job_workdir and not Path(_job_workdir).is_dir():
|
||||
# Directory was removed between create-time validation and now. Log
|
||||
# and drop back to old behaviour rather than crashing the job.
|
||||
logger.warning(
|
||||
"Job '%s': configured workdir %r no longer exists — running without it",
|
||||
job_id, _job_workdir,
|
||||
)
|
||||
_job_workdir = None
|
||||
_prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
|
||||
if _job_workdir:
|
||||
os.environ["TERMINAL_CWD"] = _job_workdir
|
||||
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
|
||||
|
||||
try:
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
|
|
@ -840,6 +895,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
try:
|
||||
runtime_kwargs = {
|
||||
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
|
|
@ -847,6 +903,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
if job.get("base_url"):
|
||||
runtime_kwargs["explicit_base_url"] = job.get("base_url")
|
||||
runtime = resolve_runtime_provider(**runtime_kwargs)
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed — try fallback chain before giving up.
|
||||
logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
|
||||
fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
|
||||
fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
|
||||
runtime = None
|
||||
for entry in fb_list:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
fb_kwargs = {"requested": entry.get("provider")}
|
||||
if entry.get("base_url"):
|
||||
fb_kwargs["explicit_base_url"] = entry["base_url"]
|
||||
if entry.get("api_key"):
|
||||
fb_kwargs["explicit_api_key"] = entry["api_key"]
|
||||
runtime = resolve_runtime_provider(**fb_kwargs)
|
||||
logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
|
||||
break
|
||||
except Exception as fb_exc:
|
||||
logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
|
||||
if runtime is None:
|
||||
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
|
|
@ -886,9 +964,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
# When a workdir is configured, inject AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from that directory; otherwise preserve the old
|
||||
# behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
|
||||
skip_context_files=not bool(_job_workdir),
|
||||
skip_memory=True, # Cron system prompts would corrupt user representations
|
||||
platform="cron",
|
||||
session_id=_cron_session_id,
|
||||
|
|
@ -1027,6 +1109,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||
return False, output, "", error_msg
|
||||
|
||||
finally:
|
||||
# Restore TERMINAL_CWD to whatever it was before this job ran. We
|
||||
# only ever mutate it when the job has a workdir; see the setup block
|
||||
# at the top of run_job for the serialization guarantee.
|
||||
if _job_workdir:
|
||||
if _prior_terminal_cwd == "_UNSET_":
|
||||
os.environ.pop("TERMINAL_CWD", None)
|
||||
else:
|
||||
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
if _session_db:
|
||||
|
|
@ -1154,14 +1244,28 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
|||
mark_job_run(job["id"], False, str(e))
|
||||
return False
|
||||
|
||||
# Run all due jobs concurrently, each in its own ContextVar copy
|
||||
# so session/delivery state stays isolated per-thread.
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in due_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results = [f.result() for f in _futures]
|
||||
# Partition due jobs: those with a per-job workdir mutate
|
||||
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
|
||||
# so they MUST run sequentially to avoid corrupting each other. Jobs
|
||||
# without a workdir leave env untouched and stay parallel-safe.
|
||||
workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
|
||||
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
|
||||
|
||||
_results: list = []
|
||||
|
||||
# Sequential pass for workdir jobs.
|
||||
for job in workdir_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_results.append(_ctx.run(_process_job, job))
|
||||
|
||||
# Parallel pass for the rest — same behaviour as before.
|
||||
if parallel_jobs:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in parallel_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results.extend(f.result() for f in _futures)
|
||||
|
||||
return sum(_results)
|
||||
finally:
|
||||
|
|
|
|||
52
docker-compose.yml
Normal file
52
docker-compose.yml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
#
|
||||
# docker-compose.yml for Hermes Agent
|
||||
#
|
||||
# Usage:
|
||||
# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
|
||||
#
|
||||
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
|
||||
# files created inside the container stay readable/writable on the host.
|
||||
# The entrypoint remaps the internal `hermes` user to these values via
|
||||
# usermod/groupmod + gosu.
|
||||
#
|
||||
# Security notes:
|
||||
# - The dashboard service binds to 127.0.0.1 by default. It stores API
|
||||
# keys; exposing it on LAN without auth is unsafe. If you want remote
|
||||
# access, use an SSH tunnel or put it behind a reverse proxy that
|
||||
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
|
||||
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
|
||||
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
|
||||
# this on an internet-facing host.
|
||||
#
|
||||
services:
|
||||
gateway:
|
||||
build: .
|
||||
image: hermes-agent
|
||||
container_name: hermes
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# To expose the OpenAI-compatible API server beyond localhost,
|
||||
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
|
||||
# - API_SERVER_HOST=0.0.0.0
|
||||
# - API_SERVER_KEY=${API_SERVER_KEY}
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
image: hermes-agent
|
||||
container_name: hermes-dashboard
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- gateway
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
|
||||
command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
|
||||
|
|
@ -22,9 +22,18 @@ if [ "$(id -u)" = "0" ]; then
|
|||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
|
||||
# files created by previous runs (under the old UID) become inaccessible.
|
||||
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
|
||||
needs_chown=false
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
# In rootless Podman the container's "root" is mapped to an unprivileged
|
||||
# host UID — chown will fail. That's fine: the volume is already owned
|
||||
# by the mapped user on the host side.
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ class SessionResetPolicy:
|
|||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
notify=notify if notify is not None else True,
|
||||
notify=_coerce_bool(notify, True),
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ class PlatformConfig:
|
|||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
return cls(
|
||||
enabled=data.get("enabled", False),
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
|
|
@ -435,7 +435,7 @@ class GatewayConfig:
|
|||
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
|
||||
quick_commands=quick_commands,
|
||||
sessions_dir=sessions_dir,
|
||||
always_log_local=data.get("always_log_local", True),
|
||||
always_log_local=_coerce_bool(data.get("always_log_local"), True),
|
||||
stt_enabled=_coerce_bool(stt_enabled, True),
|
||||
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
|
||||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
|
|
@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
|
|||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
gac = telegram_cfg["group_allowed_chats"]
|
||||
if isinstance(gac, list):
|
||||
gac = ",".join(str(v) for v in gac)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
|
|
|
|||
|
|
@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
|
||||
If the client disconnects mid-stream, ``agent.interrupt()`` is
|
||||
called so the agent stops issuing upstream LLM calls, then the
|
||||
asyncio task is cancelled. When ``store=True`` the full response
|
||||
is persisted to the ResponseStore in a ``finally`` block so GET
|
||||
/v1/responses/{id} and ``previous_response_id`` chaining work the
|
||||
same as the batch path.
|
||||
asyncio task is cancelled. When ``store=True`` an initial
|
||||
``in_progress`` snapshot is persisted immediately after
|
||||
``response.created`` and disconnects update it to an
|
||||
``incomplete`` snapshot so GET /v1/responses/{id} and
|
||||
``previous_response_id`` chaining still have something to
|
||||
recover from.
|
||||
"""
|
||||
import queue as _q
|
||||
|
||||
|
|
@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
final_response_text = ""
|
||||
agent_error: Optional[str] = None
|
||||
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
terminal_snapshot_persisted = False
|
||||
|
||||
def _persist_response_snapshot(
|
||||
response_env: Dict[str, Any],
|
||||
*,
|
||||
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
if not store:
|
||||
return
|
||||
if conversation_history_snapshot is None:
|
||||
conversation_history_snapshot = list(conversation_history)
|
||||
conversation_history_snapshot.append({"role": "user", "content": user_message})
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_env,
|
||||
"conversation_history": conversation_history_snapshot,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
def _persist_incomplete_if_needed() -> None:
|
||||
"""Persist an ``incomplete`` snapshot if no terminal one was written.
|
||||
|
||||
Called from both the client-disconnect (``ConnectionResetError``)
|
||||
and server-cancellation (``asyncio.CancelledError``) paths so
|
||||
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
|
||||
working after abrupt stream termination.
|
||||
"""
|
||||
if not store or terminal_snapshot_persisted:
|
||||
return
|
||||
incomplete_text = "".join(final_text_parts) or final_response_text
|
||||
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
if incomplete_text:
|
||||
incomplete_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": incomplete_text}],
|
||||
})
|
||||
incomplete_env = _envelope("incomplete")
|
||||
incomplete_env["output"] = incomplete_items
|
||||
incomplete_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
incomplete_history = list(conversation_history)
|
||||
incomplete_history.append({"role": "user", "content": user_message})
|
||||
if incomplete_text:
|
||||
incomplete_history.append({"role": "assistant", "content": incomplete_text})
|
||||
_persist_response_snapshot(
|
||||
incomplete_env,
|
||||
conversation_history_snapshot=incomplete_history,
|
||||
)
|
||||
|
||||
try:
|
||||
# response.created — initial envelope, status=in_progress
|
||||
|
|
@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
"type": "response.created",
|
||||
"response": created_env,
|
||||
})
|
||||
_persist_response_snapshot(created_env)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
async def _open_message_item() -> None:
|
||||
|
|
@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
_failed_history = list(conversation_history)
|
||||
_failed_history.append({"role": "user", "content": user_message})
|
||||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
conversation_history_snapshot=_failed_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
|
|
@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
_persist_response_snapshot(
|
||||
completed_env,
|
||||
conversation_history_snapshot=full_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": completed_env,
|
||||
})
|
||||
|
||||
# Persist for future chaining / GET retrieval, mirroring
|
||||
# the batch path behavior.
|
||||
if store:
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
self._response_store.put(response_id, {
|
||||
"response": completed_env,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
|
||||
_persist_incomplete_if_needed()
|
||||
# Client disconnected — interrupt the agent so it stops
|
||||
# making upstream LLM calls, then cancel the task.
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
|
|
@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
|
||||
except asyncio.CancelledError:
|
||||
# Server-side cancellation (e.g. shutdown, request timeout) —
|
||||
# persist an incomplete snapshot so GET /v1/responses/{id} and
|
||||
# previous_response_id chaining still work, then re-raise so the
|
||||
# runtime's cancellation semantics are respected.
|
||||
_persist_incomplete_if_needed()
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("SSE task cancelled")
|
||||
except Exception:
|
||||
pass
|
||||
if not agent_task.done():
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
|
|
|
|||
|
|
@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
|
|||
return None
|
||||
|
||||
|
||||
def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
def _split_host_port(value: str) -> tuple[str, int | None]:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return "", None
|
||||
if "://" in raw:
|
||||
parsed = urlsplit(raw)
|
||||
return (parsed.hostname or "").lower().rstrip("."), parsed.port
|
||||
if raw.startswith("[") and "]" in raw:
|
||||
host, _, rest = raw[1:].partition("]")
|
||||
port = None
|
||||
if rest.startswith(":") and rest[1:].isdigit():
|
||||
port = int(rest[1:])
|
||||
return host.lower().rstrip("."), port
|
||||
if raw.count(":") == 1:
|
||||
host, _, maybe_port = raw.rpartition(":")
|
||||
if maybe_port.isdigit():
|
||||
return host.lower().rstrip("."), int(maybe_port)
|
||||
return raw.lower().strip("[]").rstrip("."), None
|
||||
|
||||
|
||||
def _no_proxy_entries() -> list[str]:
|
||||
entries: list[str] = []
|
||||
for key in ("NO_PROXY", "no_proxy"):
|
||||
raw = os.environ.get(key, "")
|
||||
entries.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||
return entries
|
||||
|
||||
|
||||
def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
|
||||
token = str(entry or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
if token == "*":
|
||||
return True
|
||||
|
||||
token_host, token_port = _split_host_port(token)
|
||||
if token_port is not None and port is not None and token_port != port:
|
||||
return False
|
||||
if token_port is not None and port is None:
|
||||
return False
|
||||
if not token_host:
|
||||
return False
|
||||
|
||||
try:
|
||||
network = ipaddress.ip_network(token_host, strict=False)
|
||||
try:
|
||||
return ipaddress.ip_address(host) in network
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
token_ip = ipaddress.ip_address(token_host)
|
||||
try:
|
||||
return ipaddress.ip_address(host) == token_ip
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if token_host.startswith("*."):
|
||||
suffix = token_host[1:]
|
||||
return host.endswith(suffix)
|
||||
if token_host.startswith("."):
|
||||
return host == token_host[1:] or host.endswith(token_host)
|
||||
return host == token_host or host.endswith(f".{token_host}")
|
||||
|
||||
|
||||
def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
|
||||
"""Return True when NO_PROXY/no_proxy matches at least one target host.
|
||||
|
||||
Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
|
||||
CIDR ranges, optional host:port entries, and ``*``.
|
||||
"""
|
||||
entries = _no_proxy_entries()
|
||||
if not entries or not target_hosts:
|
||||
return False
|
||||
if isinstance(target_hosts, str):
|
||||
candidates = [target_hosts]
|
||||
else:
|
||||
candidates = list(target_hosts)
|
||||
for candidate in candidates:
|
||||
host, port = _split_host_port(str(candidate))
|
||||
if not host:
|
||||
continue
|
||||
if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def resolve_proxy_url(
|
||||
platform_env_var: str | None = None,
|
||||
*,
|
||||
target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
|
||||
) -> str | None:
|
||||
"""Return a proxy URL from env vars, or macOS system proxy.
|
||||
|
||||
Check order:
|
||||
|
|
@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
|||
1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
|
||||
2. macOS system proxy via ``scutil --proxy`` (auto-detect)
|
||||
|
||||
Returns *None* if no proxy is found.
|
||||
Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
|
||||
of ``target_hosts``.
|
||||
"""
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
return normalize_proxy_url(_detect_macos_system_proxy())
|
||||
detected = normalize_proxy_url(_detect_macos_system_proxy())
|
||||
if detected and should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return detected
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:
|
|||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
|
|
@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
|
||||
# Use the base splitter but skip pagination indicators — iMessage
|
||||
# bubbles flow naturally without "(1/3)" suffixes.
|
||||
chunks = BasePlatformAdapter.truncate_message(content, max_length)
|
||||
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = strip_markdown(content or "")
|
||||
text = self.format_message(content)
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
# Split on paragraph breaks first (double newlines) so each thought
|
||||
# becomes its own iMessage bubble, then truncate any that are still
|
||||
# too long.
|
||||
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
||||
chunks: List[str] = []
|
||||
for para in (paragraphs or [text]):
|
||||
if len(para) <= self.MAX_MESSAGE_LENGTH:
|
||||
chunks.append(para)
|
||||
else:
|
||||
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
|
||||
try:
|
||||
import discord
|
||||
|
|
@ -802,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
if not self._client:
|
||||
return
|
||||
try:
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
|
||||
sync_policy = self._get_discord_command_sync_policy()
|
||||
if sync_policy == "off":
|
||||
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
|
||||
return
|
||||
|
||||
if sync_policy == "bulk":
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
summary["total"],
|
||||
summary["unchanged"],
|
||||
summary["updated"],
|
||||
summary["recreated"],
|
||||
summary["created"],
|
||||
summary["deleted"],
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
except asyncio.CancelledError:
|
||||
|
|
@ -811,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
|
||||
|
||||
def _get_discord_command_sync_policy(self) -> str:
|
||||
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
|
||||
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
|
||||
return raw
|
||||
if raw:
|
||||
logger.warning(
|
||||
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
|
||||
self.name,
|
||||
raw,
|
||||
)
|
||||
return "safe"
|
||||
|
||||
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reduce command payloads to the semantic fields Hermes manages."""
|
||||
contexts = payload.get("contexts")
|
||||
integration_types = payload.get("integration_types")
|
||||
return {
|
||||
"type": int(payload.get("type", 1) or 1),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"default_member_permissions": self._normalize_permissions(
|
||||
payload.get("default_member_permissions")
|
||||
),
|
||||
"dm_permission": bool(payload.get("dm_permission", True)),
|
||||
"nsfw": bool(payload.get("nsfw", False)),
|
||||
"contexts": sorted(int(c) for c in contexts) if contexts else None,
|
||||
"integration_types": (
|
||||
sorted(int(i) for i in integration_types) if integration_types else None
|
||||
),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_permissions(value: Any) -> Optional[str]:
|
||||
"""Discord emits default_member_permissions as str server-side but discord.py
|
||||
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
|
||||
if value is None:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
|
||||
"""Build a canonical-ready dict from an AppCommand.
|
||||
|
||||
discord.py's AppCommand.to_dict() does NOT include nsfw,
|
||||
dm_permission, or default_member_permissions (they live only on the
|
||||
attributes). Pull them from the attributes so the canonicalizer sees
|
||||
the real server-side values instead of defaults — otherwise any
|
||||
command using non-default permissions would diff on every startup.
|
||||
"""
|
||||
payload = dict(command.to_dict())
|
||||
nsfw = getattr(command, "nsfw", None)
|
||||
if nsfw is not None:
|
||||
payload["nsfw"] = bool(nsfw)
|
||||
guild_only = getattr(command, "guild_only", None)
|
||||
if guild_only is not None:
|
||||
payload["dm_permission"] = not bool(guild_only)
|
||||
default_permissions = getattr(command, "default_member_permissions", None)
|
||||
if default_permissions is not None:
|
||||
payload["default_member_permissions"] = getattr(
|
||||
default_permissions, "value", default_permissions
|
||||
)
|
||||
return payload
|
||||
|
||||
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": int(payload.get("type", 0) or 0),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"required": bool(payload.get("required", False)),
|
||||
"autocomplete": bool(payload.get("autocomplete", False)),
|
||||
"choices": [
|
||||
{
|
||||
"name": str(choice.get("name", "") or ""),
|
||||
"value": choice.get("value"),
|
||||
}
|
||||
for choice in payload.get("choices", []) or []
|
||||
if isinstance(choice, dict)
|
||||
],
|
||||
"channel_types": list(payload.get("channel_types", []) or []),
|
||||
"min_value": payload.get("min_value"),
|
||||
"max_value": payload.get("max_value"),
|
||||
"min_length": payload.get("min_length"),
|
||||
"max_length": payload.get("max_length"),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fields supported by discord.py's edit_global_command route."""
|
||||
canonical = self._canonicalize_app_command_payload(payload)
|
||||
return {
|
||||
"name": canonical["name"],
|
||||
"description": canonical["description"],
|
||||
"options": canonical["options"],
|
||||
}
|
||||
|
||||
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
|
||||
"""Diff existing global commands and only mutate the commands that changed."""
|
||||
if not self._client:
|
||||
return {
|
||||
"total": 0,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
|
||||
tree = self._client.tree
|
||||
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
|
||||
if not app_id:
|
||||
raise RuntimeError("Discord application ID is unavailable for slash command sync")
|
||||
|
||||
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
|
||||
desired_by_key = {
|
||||
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
|
||||
for payload in desired_payloads
|
||||
}
|
||||
existing_commands = await tree.fetch_commands()
|
||||
existing_by_key = {
|
||||
(
|
||||
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
|
||||
str(command.name or "").lower(),
|
||||
): command
|
||||
for command in existing_commands
|
||||
}
|
||||
|
||||
unchanged = 0
|
||||
updated = 0
|
||||
recreated = 0
|
||||
created = 0
|
||||
deleted = 0
|
||||
http = self._client.http
|
||||
|
||||
for key, desired in desired_by_key.items():
|
||||
current = existing_by_key.pop(key, None)
|
||||
if current is None:
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
current_existing_payload = self._existing_command_to_payload(current)
|
||||
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
|
||||
desired_payload = self._canonicalize_app_command_payload(desired)
|
||||
if current_payload == desired_payload:
|
||||
unchanged += 1
|
||||
continue
|
||||
|
||||
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
recreated += 1
|
||||
continue
|
||||
|
||||
await http.edit_global_command(app_id, current.id, desired)
|
||||
updated += 1
|
||||
|
||||
for current in existing_by_key.values():
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
return {
|
||||
"total": len(desired_payloads),
|
||||
"unchanged": unchanged,
|
||||
"updated": updated,
|
||||
"recreated": recreated,
|
||||
"created": created,
|
||||
"deleted": deleted,
|
||||
}
|
||||
|
||||
async def _add_reaction(self, message: Any, emoji: str) -> bool:
|
||||
"""Add an emoji reaction to a Discord message."""
|
||||
if not message or not hasattr(message, "add_reaction"):
|
||||
|
|
@ -2049,10 +2246,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
async def slash_usage(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/usage")
|
||||
|
||||
@tree.command(name="provider", description="Show available providers")
|
||||
async def slash_provider(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/provider")
|
||||
|
||||
@tree.command(name="help", description="Show available commands")
|
||||
async def slash_help(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/help")
|
||||
|
|
@ -2522,7 +2715,12 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
|
||||
|
||||
def _discord_free_response_channels(self) -> set:
|
||||
"""Return Discord channel IDs where no bot mention is required."""
|
||||
"""Return Discord channel IDs where no bot mention is required.
|
||||
|
||||
A single ``"*"`` entry (either from a list or a comma-separated
|
||||
string) is preserved in the returned set so callers can short-circuit
|
||||
on wildcard membership, consistent with ``allowed_channels``.
|
||||
"""
|
||||
raw = self.config.extra.get("free_response_channels")
|
||||
if raw is None:
|
||||
raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
|
||||
|
|
@ -3015,14 +3213,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_channels_raw:
|
||||
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
|
||||
if not (channel_ids & allowed_channels):
|
||||
if "*" not in allowed_channels and not (channel_ids & allowed_channels):
|
||||
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
# Check ignored channels - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
if channel_ids & ignored_channels:
|
||||
if "*" in ignored_channels or (channel_ids & ignored_channels):
|
||||
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
|
|
@ -3036,7 +3234,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
|
||||
current_channel_id = str(message.channel.id)
|
||||
is_voice_linked_channel = current_channel_id in voice_linked_ids
|
||||
is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
|
||||
is_free_channel = (
|
||||
"*" in free_channels
|
||||
or bool(channel_ids & free_channels)
|
||||
or is_voice_linked_channel
|
||||
)
|
||||
|
||||
# Skip the mention check if the message is in a thread where
|
||||
# the bot has previously participated (auto-created or replied in).
|
||||
|
|
@ -3669,6 +3871,15 @@ if DISCORD_AVAILABLE:
|
|||
|
||||
self.resolved = True
|
||||
model_id = interaction.data["values"][0]
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Switching Model",
|
||||
description=f"Switching to `{model_id}`...",
|
||||
color=discord.Color.blue(),
|
||||
),
|
||||
view=None,
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = await self.on_model_selected(
|
||||
|
|
@ -3679,14 +3890,13 @@ if DISCORD_AVAILABLE:
|
|||
except Exception as exc:
|
||||
result_text = f"Error switching model: {exc}"
|
||||
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
await interaction.edit_original_response(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Switched",
|
||||
description=result_text,
|
||||
color=discord.Color.green(),
|
||||
),
|
||||
view=self,
|
||||
view=None,
|
||||
)
|
||||
|
||||
async def _on_back(self, interaction: discord.Interaction):
|
||||
|
|
|
|||
|
|
@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
|
|
@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
proxy_targets = ["api.telegram.org", *fallback_ips]
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
|
||||
if fallback_ips and not proxy_url and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
|
|
|
|||
|
|
@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
|
|||
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
def _resolve_proxy_url(target_hosts=None) -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url("TELEGRAM_PROXY")
|
||||
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
|
|
@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
|||
|
||||
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
|
||||
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
|
||||
proxy_url = _resolve_proxy_url()
|
||||
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
|
||||
if proxy_url and "proxy" not in transport_kwargs:
|
||||
transport_kwargs["proxy"] = proxy_url
|
||||
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
|
||||
|
|
|
|||
373
gateway/run.py
373
gateway/run.py
|
|
@ -14,6 +14,7 @@ Usage:
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -297,50 +298,16 @@ from gateway.restart import (
|
|||
)
|
||||
|
||||
|
||||
def _normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
from gateway.whatsapp_identity import (
|
||||
canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401
|
||||
expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
|
||||
normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
def _expand_whatsapp_auth_aliases(identifier: str) -> set:
|
||||
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
|
||||
normalized = _normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = _hermes_home / "whatsapp" / "session"
|
||||
resolved = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = _normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Sentinel placed into _running_agents immediately when a session starts
|
||||
# processing, *before* any await. Prevents a second message for the same
|
||||
# session from bypassing the "already running" guard during the async gap
|
||||
|
|
@ -349,16 +316,30 @@ _AGENT_PENDING_SENTINEL = object()
|
|||
|
||||
|
||||
def _resolve_runtime_agent_kwargs() -> dict:
|
||||
"""Resolve provider credentials for gateway-created AIAgent instances."""
|
||||
"""Resolve provider credentials for gateway-created AIAgent instances.
|
||||
|
||||
If the primary provider fails with an authentication error, attempt to
|
||||
resolve credentials using the fallback provider chain from config.yaml
|
||||
before giving up.
|
||||
"""
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
)
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed (expired token, revoked key, etc.).
|
||||
# Try the fallback provider chain before raising.
|
||||
logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc)
|
||||
fb_config = _try_resolve_fallback_provider()
|
||||
if fb_config is not None:
|
||||
return fb_config
|
||||
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
|
||||
except Exception as exc:
|
||||
raise RuntimeError(format_runtime_provider_error(exc)) from exc
|
||||
|
||||
|
|
@ -373,6 +354,48 @@ def _resolve_runtime_agent_kwargs() -> dict:
|
|||
}
|
||||
|
||||
|
||||
def _try_resolve_fallback_provider() -> dict | None:
|
||||
"""Attempt to resolve credentials from the fallback_model/fallback_providers config."""
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if not cfg_path.exists():
|
||||
return None
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
fb = cfg.get("fallback_providers") or cfg.get("fallback_model")
|
||||
if not fb:
|
||||
return None
|
||||
# Normalize to list
|
||||
fb_list = fb if isinstance(fb, list) else [fb]
|
||||
for entry in fb_list:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=entry.get("provider"),
|
||||
explicit_base_url=entry.get("base_url"),
|
||||
explicit_api_key=entry.get("api_key"),
|
||||
)
|
||||
logger.info("Fallback provider resolved: %s", runtime.get("provider"))
|
||||
return {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
}
|
||||
except Exception as fb_exc:
|
||||
logger.debug("Fallback entry %s failed: %s", entry.get("provider"), fb_exc)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _build_media_placeholder(event) -> str:
|
||||
"""Build a text placeholder for media-only events so they aren't dropped.
|
||||
|
||||
|
|
@ -1551,27 +1574,23 @@ class GatewayRunner:
|
|||
)
|
||||
return True
|
||||
|
||||
# --- Normal busy case (agent actively running a task) ---
|
||||
# The user sent a message while the agent is working. Interrupt the
|
||||
# agent immediately so it stops the current tool-calling loop and
|
||||
# processes the new message. The pending message is stored in the
|
||||
# adapter so the base adapter picks it up once the interrupted run
|
||||
# returns. A brief ack tells the user what's happening (debounced
|
||||
# to avoid spam when they fire multiple messages quickly).
|
||||
|
||||
# Normal busy case (agent actively running a task)
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
if not adapter:
|
||||
return False # let default path handle it
|
||||
|
||||
# Store the message so it's processed as the next turn after the
|
||||
# interrupt causes the current run to exit.
|
||||
# current run finishes (or is interrupted).
|
||||
from gateway.platforms.base import merge_pending_message_event
|
||||
merge_pending_message_event(adapter._pending_messages, session_key, event)
|
||||
|
||||
# Interrupt the running agent — this aborts in-flight tool calls and
|
||||
# causes the agent loop to exit at the next check point.
|
||||
is_queue_mode = self._busy_input_mode == "queue"
|
||||
|
||||
# If not in queue mode, interrupt the running agent immediately.
|
||||
# This aborts in-flight tool calls and causes the agent loop to exit
|
||||
# at the next check point.
|
||||
running_agent = self._running_agents.get(session_key)
|
||||
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
try:
|
||||
running_agent.interrupt(event.text)
|
||||
except Exception:
|
||||
|
|
@ -1583,7 +1602,7 @@ class GatewayRunner:
|
|||
now = time.time()
|
||||
last_ack = self._busy_ack_ts.get(session_key, 0)
|
||||
if now - last_ack < _BUSY_ACK_COOLDOWN:
|
||||
return True # interrupt sent, ack already delivered recently
|
||||
return True # interrupt sent (if not queue), ack already delivered recently
|
||||
|
||||
self._busy_ack_ts[session_key] = now
|
||||
|
||||
|
|
@ -1608,10 +1627,16 @@ class GatewayRunner:
|
|||
pass
|
||||
|
||||
status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
|
||||
message = (
|
||||
f"⚡ Interrupting current task{status_detail}. "
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
if is_queue_mode:
|
||||
message = (
|
||||
f"⏳ Queued for the next turn{status_detail}. "
|
||||
f"I'll respond once the current task finishes."
|
||||
)
|
||||
else:
|
||||
message = (
|
||||
f"⚡ Interrupting current task{status_detail}. "
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
try:
|
||||
|
|
@ -2307,6 +2332,17 @@ class GatewayRunner:
|
|||
for key, entry in _expired_entries:
|
||||
try:
|
||||
await self._async_flush_memories(entry.session_id, key)
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_parts = key.split(":")
|
||||
_platform = _parts[2] if len(_parts) > 2 else ""
|
||||
_invoke_hook(
|
||||
"on_session_finalize",
|
||||
session_id=entry.session_id,
|
||||
platform=_platform,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# Shut down memory provider and close tool resources
|
||||
# on the cached agent. Idle agents live in
|
||||
# _agent_cache (not _running_agents), so look there.
|
||||
|
|
@ -2560,6 +2596,40 @@ class GatewayRunner:
|
|||
return
|
||||
|
||||
async def _stop_impl() -> None:
|
||||
def _kill_tool_subprocesses(phase: str) -> None:
|
||||
"""Kill tool subprocesses + tear down terminal envs + browsers.
|
||||
|
||||
Called twice in the shutdown path: once eagerly after a
|
||||
drain timeout forces agent interrupt (so we reclaim bash/
|
||||
sleep children before systemd TimeoutStopSec escalates to
|
||||
SIGKILL on the cgroup — #8202), and once as a final
|
||||
catch-all at the end of _stop_impl() for the graceful
|
||||
path or anything respawned mid-teardown.
|
||||
|
||||
All steps are best-effort; exceptions are swallowed so
|
||||
one subsystem's failure doesn't block the rest.
|
||||
"""
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
_killed = process_registry.kill_all()
|
||||
if _killed:
|
||||
logger.info(
|
||||
"Shutdown (%s): killed %d tool subprocess(es)",
|
||||
phase, _killed,
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception as _e:
|
||||
logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception as _e:
|
||||
logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
|
||||
|
||||
logger.info(
|
||||
"Stopping gateway%s...",
|
||||
" for restart" if self._restart_requested else "",
|
||||
|
|
@ -2621,6 +2691,16 @@ class GatewayRunner:
|
|||
self._update_runtime_status("draining")
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Kill lingering tool subprocesses NOW, before we spend more
|
||||
# budget on adapter disconnect / session DB close. Under
|
||||
# systemd (TimeoutStopSec bounded by drain_timeout+headroom),
|
||||
# deferring this to the end of stop() risks systemd escalating
|
||||
# to SIGKILL on the cgroup first — at which point bash/sleep
|
||||
# children left behind by an interrupted terminal tool get
|
||||
# killed by systemd instead of us (issue #8202). The final
|
||||
# catch-all cleanup below still runs for the graceful path.
|
||||
_kill_tool_subprocesses("post-interrupt")
|
||||
|
||||
if self._restart_requested and self._restart_detached:
|
||||
try:
|
||||
await self._launch_detached_restart_command()
|
||||
|
|
@ -2656,22 +2736,13 @@ class GatewayRunner:
|
|||
self._shutdown_event.set()
|
||||
|
||||
# Global cleanup: kill any remaining tool subprocesses not tied
|
||||
# to a specific agent (catch-all for zombie prevention).
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
process_registry.kill_all()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
# to a specific agent (catch-all for zombie prevention). On the
|
||||
# drain-timeout path we already did this earlier after agent
|
||||
# interrupt — this second call catches (a) the graceful path
|
||||
# where drain succeeded without interrupt, and (b) anything
|
||||
# that got respawned between the earlier call and adapter
|
||||
# disconnect (defense in depth; safe to call repeatedly).
|
||||
_kill_tool_subprocesses("final-cleanup")
|
||||
|
||||
# Close SQLite session DBs so the WAL write lock is released.
|
||||
# Without this, --replace and similar restart flows leave the
|
||||
|
|
@ -2932,6 +3003,7 @@ class GatewayRunner:
|
|||
Platform.QQBOT: "QQ_ALLOWED_USERS",
|
||||
}
|
||||
platform_group_env_map = {
|
||||
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
|
||||
Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
|
||||
}
|
||||
platform_allow_all_map = {
|
||||
|
|
@ -2988,7 +3060,7 @@ class GatewayRunner:
|
|||
# Check platform-specific and global allowlists
|
||||
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
|
||||
group_allowlist = ""
|
||||
if source.chat_type == "group":
|
||||
if source.chat_type in {"group", "forum"}:
|
||||
group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
|
||||
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
|
||||
|
||||
|
|
@ -2997,7 +3069,7 @@ class GatewayRunner:
|
|||
return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
|
||||
|
||||
# Some platforms authorize group traffic by chat ID rather than sender ID.
|
||||
if group_allowlist and source.chat_type == "group" and source.chat_id:
|
||||
if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
|
||||
allowed_group_ids = {
|
||||
chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
|
||||
}
|
||||
|
|
@ -3108,7 +3180,50 @@ class GatewayRunner:
|
|||
|
||||
# Internal events (e.g. background-process completion notifications)
|
||||
# are system-generated and must skip user authorization.
|
||||
if getattr(event, "internal", False):
|
||||
is_internal = bool(getattr(event, "internal", False))
|
||||
|
||||
# Fire pre_gateway_dispatch plugin hook for user-originated messages.
|
||||
# Plugins receive the MessageEvent and may return a dict influencing flow:
|
||||
# {"action": "skip", "reason": ...} -> drop (no reply, plugin handled)
|
||||
# {"action": "rewrite", "text": ...} -> replace event.text, continue
|
||||
# {"action": "allow"} / None -> normal dispatch
|
||||
# Hook runs BEFORE auth so plugins can handle unauthorized senders
|
||||
# (e.g. customer handover ingest) without triggering the pairing flow.
|
||||
if not is_internal:
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_hook_results = _invoke_hook(
|
||||
"pre_gateway_dispatch",
|
||||
event=event,
|
||||
gateway=self,
|
||||
session_store=self.session_store,
|
||||
)
|
||||
except Exception as _hook_exc:
|
||||
logger.warning("pre_gateway_dispatch invocation failed: %s", _hook_exc)
|
||||
_hook_results = []
|
||||
|
||||
for _result in _hook_results:
|
||||
if not isinstance(_result, dict):
|
||||
continue
|
||||
_action = _result.get("action")
|
||||
if _action == "skip":
|
||||
logger.info(
|
||||
"pre_gateway_dispatch skip: reason=%s platform=%s chat=%s",
|
||||
_result.get("reason"),
|
||||
source.platform.value if source.platform else "unknown",
|
||||
source.chat_id or "unknown",
|
||||
)
|
||||
return None
|
||||
if _action == "rewrite":
|
||||
_new_text = _result.get("text")
|
||||
if isinstance(_new_text, str):
|
||||
event = dataclasses.replace(event, text=_new_text)
|
||||
source = event.source
|
||||
break
|
||||
if _action == "allow":
|
||||
break
|
||||
|
||||
if is_internal:
|
||||
pass
|
||||
elif source.user_id is None:
|
||||
# Messages with no user identity (Telegram service messages,
|
||||
|
|
@ -3405,7 +3520,7 @@ class GatewayRunner:
|
|||
# running-agent guard. Reject gracefully rather than falling
|
||||
# through to interrupt + discard. Without this, commands
|
||||
# like /model, /reasoning, /voice, /insights, /title,
|
||||
# /resume, /retry, /undo, /compress, /usage, /provider,
|
||||
# /resume, /retry, /undo, /compress, /usage,
|
||||
# /reload-mcp, /sethome, /reset (all registered as Discord
|
||||
# slash commands) would interrupt the agent AND get
|
||||
# silently discarded by the slash-command safety net,
|
||||
|
|
@ -3476,6 +3591,10 @@ class GatewayRunner:
|
|||
if self._queue_during_drain_enabled()
|
||||
else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
|
||||
)
|
||||
if self._busy_input_mode == "queue":
|
||||
logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
|
||||
self._queue_or_replace_pending_event(_quick_key, event)
|
||||
return None
|
||||
logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
|
||||
running_agent.interrupt(event.text)
|
||||
if _quick_key in self._pending_messages:
|
||||
|
|
@ -3592,34 +3711,9 @@ class GatewayRunner:
|
|||
if canonical == "model":
|
||||
return await self._handle_model_command(event)
|
||||
|
||||
if canonical == "provider":
|
||||
return await self._handle_provider_command(event)
|
||||
|
||||
if canonical == "personality":
|
||||
return await self._handle_personality_command(event)
|
||||
|
||||
if canonical == "plan":
|
||||
try:
|
||||
from agent.skill_commands import build_plan_path, build_skill_invocation_message
|
||||
|
||||
user_instruction = event.get_command_args().strip()
|
||||
plan_path = build_plan_path(user_instruction)
|
||||
event.text = build_skill_invocation_message(
|
||||
"/plan",
|
||||
user_instruction,
|
||||
task_id=_quick_key,
|
||||
runtime_note=(
|
||||
"Save the markdown plan with write_file to this exact relative path "
|
||||
f"inside the active workspace/backend cwd: {plan_path}"
|
||||
),
|
||||
)
|
||||
if not event.text:
|
||||
return "Failed to load the bundled /plan skill."
|
||||
canonical = None
|
||||
except Exception as e:
|
||||
logger.exception("Failed to prepare /plan command")
|
||||
return f"Failed to enter plan mode: {e}"
|
||||
|
||||
if canonical == "retry":
|
||||
return await self._handle_retry_command(event)
|
||||
|
||||
|
|
@ -5742,63 +5836,6 @@ class GatewayRunner:
|
|||
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _handle_provider_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /provider command - show available providers."""
|
||||
import yaml
|
||||
from hermes_cli.models import (
|
||||
list_available_providers,
|
||||
normalize_provider,
|
||||
_PROVIDER_LABELS,
|
||||
)
|
||||
|
||||
# Resolve current provider from config
|
||||
current_provider = "openrouter"
|
||||
model_cfg = {}
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
try:
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
current_provider = model_cfg.get("provider", current_provider)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
current_provider = normalize_provider(current_provider)
|
||||
if current_provider == "auto":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
current_provider = _resolve_provider(current_provider)
|
||||
except Exception:
|
||||
current_provider = "openrouter"
|
||||
|
||||
# Detect custom endpoint from config base_url
|
||||
if current_provider == "openrouter":
|
||||
_cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
|
||||
if _cfg_base and "openrouter.ai" not in _cfg_base:
|
||||
current_provider = "custom"
|
||||
|
||||
current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
|
||||
|
||||
lines = [
|
||||
f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
|
||||
"",
|
||||
"**Available providers:**",
|
||||
]
|
||||
|
||||
providers = list_available_providers()
|
||||
for p in providers:
|
||||
marker = " ← active" if p["id"] == current_provider else ""
|
||||
auth = "✅" if p["authenticated"] else "❌"
|
||||
aliases = f" _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
|
||||
lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Switch: `/model provider:model-name`")
|
||||
lines.append("Setup: `hermes setup`")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _handle_personality_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /personality command - list or set a personality."""
|
||||
import yaml
|
||||
|
|
@ -7065,10 +7102,7 @@ class GatewayRunner:
|
|||
tmp_agent._print_fn = lambda *a, **kw: None
|
||||
|
||||
compressor = tmp_agent.context_compressor
|
||||
compress_start = compressor.protect_first_n
|
||||
compress_start = compressor._align_boundary_forward(msgs, compress_start)
|
||||
compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
|
||||
if compress_start >= compress_end:
|
||||
if not compressor.has_content_to_compress(msgs):
|
||||
return "Nothing to compress yet (the transcript is still all protected context)."
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
|
@ -10338,9 +10372,9 @@ class GatewayRunner:
|
|||
# Periodic "still working" notifications for long-running tasks.
|
||||
# Fires every N seconds so the user knows the agent hasn't died.
|
||||
# Config: agent.gateway_notify_interval in config.yaml, or
|
||||
# HERMES_AGENT_NOTIFY_INTERVAL env var. Default 600s (10 min).
|
||||
# HERMES_AGENT_NOTIFY_INTERVAL env var. Default 180s (3 min).
|
||||
# 0 = disable notifications.
|
||||
_NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
|
||||
_NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180))
|
||||
_NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
|
||||
_notify_start = time.time()
|
||||
|
||||
|
|
@ -10919,6 +10953,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
from gateway.status import (
|
||||
acquire_gateway_runtime_lock,
|
||||
get_running_pid,
|
||||
get_process_start_time,
|
||||
release_gateway_runtime_lock,
|
||||
remove_pid_file,
|
||||
terminate_pid,
|
||||
|
|
@ -10926,6 +10961,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
existing_pid = get_running_pid()
|
||||
if existing_pid is not None and existing_pid != os.getpid():
|
||||
if replace:
|
||||
existing_start_time = get_process_start_time(existing_pid)
|
||||
logger.info(
|
||||
"Replacing existing gateway instance (PID %d) with --replace.",
|
||||
existing_pid,
|
||||
|
|
@ -10994,7 +11030,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
# leaving stale lock files that block the new gateway from starting.
|
||||
try:
|
||||
from gateway.status import release_all_scoped_locks
|
||||
_released = release_all_scoped_locks()
|
||||
_released = release_all_scoped_locks(
|
||||
owner_pid=existing_pid,
|
||||
owner_start_time=existing_start_time,
|
||||
)
|
||||
if _released:
|
||||
logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
|
||||
except Exception:
|
||||
|
|
|
|||
|
|
@ -60,6 +60,10 @@ from .config import (
|
|||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -281,6 +285,18 @@ def build_session_context_prompt(
|
|||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are responding via iMessage. "
|
||||
"Keep responses short and conversational — think texts, not essays. "
|
||||
"Structure longer replies as separate short thoughts, each separated "
|
||||
"by a blank line (double newline). Each block between blank lines "
|
||||
"will be delivered as its own iMessage bubble, so write accordingly: "
|
||||
"one idea per bubble, 1–3 sentences each. "
|
||||
"If the user needs a detailed answer, give the short version first "
|
||||
"and offer to elaborate."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
|
|
@ -518,15 +534,24 @@ def build_session_key(
|
|||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if source.chat_id:
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
|
|
|
|||
|
|
@ -113,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
|
|||
return None
|
||||
|
||||
|
||||
def get_process_start_time(pid: int) -> Optional[int]:
|
||||
"""Public wrapper for retrieving a process start time when available."""
|
||||
return _get_process_start_time(pid)
|
||||
|
||||
|
||||
def _read_process_cmdline(pid: int) -> Optional[str]:
|
||||
"""Return the process command line as a space-separated string."""
|
||||
cmdline_path = Path(f"/proc/{pid}/cmdline")
|
||||
|
|
@ -562,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
|
|||
pass
|
||||
|
||||
|
||||
def release_all_scoped_locks() -> int:
|
||||
"""Remove all scoped lock files in the lock directory.
|
||||
def release_all_scoped_locks(
|
||||
*,
|
||||
owner_pid: Optional[int] = None,
|
||||
owner_start_time: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Remove scoped lock files in the lock directory.
|
||||
|
||||
Called during --replace to clean up stale locks left by stopped/killed
|
||||
gateway processes that did not release their locks gracefully.
|
||||
gateway processes that did not release their locks gracefully. When an
|
||||
``owner_pid`` is provided, only lock records belonging to that gateway
|
||||
process are removed. ``owner_start_time`` further narrows the match to
|
||||
protect against PID reuse.
|
||||
|
||||
When no owner is provided, preserves the legacy behavior and removes every
|
||||
scoped lock file in the directory.
|
||||
|
||||
Returns the number of lock files removed.
|
||||
"""
|
||||
lock_dir = _get_lock_dir()
|
||||
removed = 0
|
||||
if lock_dir.exists():
|
||||
for lock_file in lock_dir.glob("*.lock"):
|
||||
if owner_pid is not None:
|
||||
record = _read_json_file(lock_file)
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
try:
|
||||
record_pid = int(record.get("pid"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if record_pid != owner_pid:
|
||||
continue
|
||||
if (
|
||||
owner_start_time is not None
|
||||
and record.get("start_time") != owner_start_time
|
||||
):
|
||||
continue
|
||||
try:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
removed += 1
|
||||
|
|
|
|||
135
gateway/whatsapp_identity.py
Normal file
135
gateway/whatsapp_identity.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""Shared helpers for canonicalising WhatsApp sender identity.
|
||||
|
||||
WhatsApp's bridge can surface the same human under two different JID shapes
|
||||
within a single conversation:
|
||||
|
||||
- LID form: ``999999999999999@lid``
|
||||
- Phone form: ``15551234567@s.whatsapp.net``
|
||||
|
||||
Both the authorisation path (:mod:`gateway.run`) and the session-key path
|
||||
(:mod:`gateway.session`) need to collapse these aliases to a single stable
|
||||
identity. This module is the single source of truth for that resolution so
|
||||
the two paths can never drift apart.
|
||||
|
||||
Public helpers:
|
||||
|
||||
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
|
||||
down to the bare numeric identifier.
|
||||
- :func:`canonical_whatsapp_identifier` — walk the bridge's
|
||||
``lid-mapping-*.json`` files and return a stable canonical identity
|
||||
across phone/LID variants.
|
||||
- :func:`expand_whatsapp_aliases` — return the full alias set for an
|
||||
identifier. Used by authorisation code that needs to match any known
|
||||
form of a sender against an allow-list.
|
||||
|
||||
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
|
||||
per-contact authorisation, policy gating in a gateway hook) should use
|
||||
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
|
||||
Hermes' own session keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
|
||||
|
||||
Returns the set of all identifiers transitively reachable through the
|
||||
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
|
||||
starting from ``identifier``. The result always includes the
|
||||
normalized input itself, so callers can safely ``in`` check against
|
||||
the return value without a separate fallback branch.
|
||||
|
||||
Returns an empty set if ``identifier`` normalizes to empty.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: Set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity.
|
||||
:func:`gateway.session.build_session_key` uses this for both WhatsApp
|
||||
DM chat_ids and WhatsApp group participant_ids, so callers get the
|
||||
same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorisation, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
|
|
@ -11,5 +11,5 @@ Provides subcommands for:
|
|||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.10.0"
|
||||
__release_date__ = "2026.4.16"
|
||||
__version__ = "0.11.0"
|
||||
__release_date__ = "2026.4.23"
|
||||
|
|
|
|||
1051
hermes_cli/auth.py
1051
hermes_cli/auth.py
File diff suppressed because it is too large
Load diff
|
|
@ -110,18 +110,40 @@ def _display_source(source: str) -> str:
|
|||
return source.split(":", 1)[1] if source.startswith("manual:") else source
|
||||
|
||||
|
||||
def _classify_exhausted_status(entry) -> tuple[str, bool]:
|
||||
code = getattr(entry, "last_error_code", None)
|
||||
reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower()
|
||||
message = str(getattr(entry, "last_error_message", "") or "").strip().lower()
|
||||
|
||||
if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any(
|
||||
token in message for token in ("rate limit", "usage limit", "quota", "too many requests")
|
||||
):
|
||||
return "rate-limited", True
|
||||
|
||||
if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any(
|
||||
token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication")
|
||||
):
|
||||
return "auth failed", False
|
||||
|
||||
return "exhausted", True
|
||||
|
||||
|
||||
|
||||
def _format_exhausted_status(entry) -> str:
|
||||
if entry.last_status != STATUS_EXHAUSTED:
|
||||
return ""
|
||||
label, show_retry_window = _classify_exhausted_status(entry)
|
||||
reason = getattr(entry, "last_error_reason", None)
|
||||
reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
|
||||
code = f" ({entry.last_error_code})" if entry.last_error_code else ""
|
||||
if not show_retry_window:
|
||||
return f" {label}{reason_text}{code} (re-auth may be required)"
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is None:
|
||||
return f" exhausted{reason_text}{code}"
|
||||
return f" {label}{reason_text}{code}"
|
||||
remaining = max(0, int(math.ceil(exhausted_until - time.time())))
|
||||
if remaining <= 0:
|
||||
return f" exhausted{reason_text}{code} (ready to retry)"
|
||||
return f" {label}{reason_text}{code} (ready to retry)"
|
||||
minutes, seconds = divmod(remaining, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
|
|
@ -133,7 +155,7 @@ def _format_exhausted_status(entry) -> str:
|
|||
wait = f"{minutes}m {seconds}s"
|
||||
else:
|
||||
wait = f"{seconds}s"
|
||||
return f" exhausted{reason_text}{code} ({wait} left)"
|
||||
return f" {label}{reason_text}{code} ({wait} left)"
|
||||
|
||||
|
||||
def auth_add_command(args) -> None:
|
||||
|
|
@ -386,6 +408,44 @@ def auth_reset_command(args) -> None:
|
|||
print(f"Reset status on {count} {provider} credentials")
|
||||
|
||||
|
||||
def auth_status_command(args) -> None:
|
||||
provider = _normalize_provider(getattr(args, "provider", "") or "")
|
||||
if not provider:
|
||||
raise SystemExit("Provider is required. Example: `hermes auth status spotify`.")
|
||||
status = auth_mod.get_auth_status(provider)
|
||||
if not status.get("logged_in"):
|
||||
reason = status.get("error")
|
||||
if reason:
|
||||
print(f"{provider}: logged out ({reason})")
|
||||
else:
|
||||
print(f"{provider}: logged out")
|
||||
return
|
||||
|
||||
print(f"{provider}: logged in")
|
||||
for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"):
|
||||
value = status.get(key)
|
||||
if value:
|
||||
print(f" {key}: {value}")
|
||||
|
||||
|
||||
def auth_logout_command(args) -> None:
|
||||
auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None)))
|
||||
|
||||
|
||||
def auth_spotify_command(args) -> None:
|
||||
action = str(getattr(args, "spotify_action", "") or "login").strip().lower()
|
||||
if action in {"", "login"}:
|
||||
auth_mod.login_spotify_command(args)
|
||||
return
|
||||
if action == "status":
|
||||
auth_status_command(SimpleNamespace(provider="spotify"))
|
||||
return
|
||||
if action == "logout":
|
||||
auth_logout_command(SimpleNamespace(provider="spotify"))
|
||||
return
|
||||
raise SystemExit(f"Unknown Spotify auth action: {action}")
|
||||
|
||||
|
||||
def _interactive_auth() -> None:
|
||||
"""Interactive credential pool management when `hermes auth` is called bare."""
|
||||
# Show current pool status first
|
||||
|
|
@ -583,5 +643,14 @@ def auth_command(args) -> None:
|
|||
if action == "reset":
|
||||
auth_reset_command(args)
|
||||
return
|
||||
if action == "status":
|
||||
auth_status_command(args)
|
||||
return
|
||||
if action == "logout":
|
||||
auth_logout_command(args)
|
||||
return
|
||||
if action == "spotify":
|
||||
auth_spotify_command(args)
|
||||
return
|
||||
# No subcommand — launch interactive mode
|
||||
_interactive_auth()
|
||||
|
|
|
|||
|
|
@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
|
|||
return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
|
||||
|
||||
|
||||
_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
|
||||
_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved
|
||||
|
||||
|
||||
def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
|
||||
"""Return ``(tag, release_url)`` for the latest git tag, or None.
|
||||
|
||||
Local-only — runs ``git describe --tags --abbrev=0`` against the
|
||||
Hermes checkout. Cached per-process. Release URL always points at the
|
||||
canonical NousResearch/hermes-agent repo (forks don't get a link).
|
||||
"""
|
||||
global _latest_release_cache
|
||||
if _latest_release_cache is not None:
|
||||
return _latest_release_cache or None
|
||||
|
||||
repo_dir = repo_dir or _resolve_repo_dir()
|
||||
if repo_dir is None:
|
||||
_latest_release_cache = () # falsy sentinel — skip future lookups
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "describe", "--tags", "--abbrev=0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
tag = (result.stdout or "").strip()
|
||||
if not tag:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
url = f"{_RELEASE_URL_BASE}/{tag}"
|
||||
_latest_release_cache = (tag, url)
|
||||
return _latest_release_cache
|
||||
|
||||
|
||||
def format_banner_version_label() -> str:
|
||||
"""Return the version label shown in the startup banner title."""
|
||||
base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
|
||||
|
|
@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
|||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
release_info = get_latest_release_tag()
|
||||
if release_info:
|
||||
_tag, _url = release_info
|
||||
title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
|
||||
else:
|
||||
title_markup = f"[bold {title_color}]{version_label}[/]"
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold {title_color}]{format_banner_version_label()}[/]",
|
||||
title=title_markup,
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import os
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CODEX_MODELS: List[str] = [
|
||||
"gpt-5.5",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.4",
|
||||
"gpt-5.3-codex",
|
||||
|
|
@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [
|
|||
]
|
||||
|
||||
_FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
|
||||
("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
|
||||
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.3-codex", ("gpt-5.2-codex",)),
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
|
||||
args_hint="[number]"),
|
||||
CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
|
||||
aliases=("snap",), args_hint="[create|restore <id>|prune]"),
|
||||
cli_only=True, aliases=("snap",), args_hint="[create|restore <id>|prune]"),
|
||||
CommandDef("stop", "Kill all running background processes", "Session"),
|
||||
CommandDef("approve", "Approve a pending dangerous command", "Session",
|
||||
gateway_only=True, args_hint="[session|always]"),
|
||||
|
|
@ -104,9 +104,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
CommandDef("personality", "Set a predefined personality", "Configuration",
|
||||
args_hint="[name]"),
|
||||
|
|
@ -124,9 +123,12 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
args_hint="[name]"),
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
cli_only=True, args_hint="[queue|interrupt|status]",
|
||||
subcommands=("queue", "interrupt", "status")),
|
||||
|
||||
# Tools & Skills
|
||||
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
|
||||
|
|
@ -139,7 +141,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
aliases=("reload_mcp",)),
|
||||
CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
|
||||
|
|
@ -317,7 +320,7 @@ def should_bypass_active_session(command_name: str | None) -> bool:
|
|||
safety net in gateway.run discards any command text that reaches
|
||||
the pending queue — which meant a mid-run /model (or /reasoning,
|
||||
/voice, /insights, /title, /resume, /retry, /undo, /compress,
|
||||
/usage, /provider, /reload-mcp, /sethome, /reset) would silently
|
||||
/usage, /reload-mcp, /sethome, /reset) would silently
|
||||
interrupt the agent AND get discarded, producing a zero-char
|
||||
response. See issue #5057 / PRs #6252, #10370, #4665.
|
||||
|
||||
|
|
|
|||
|
|
@ -361,6 +361,15 @@ DEFAULT_CONFIG = {
|
|||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
# (max_retries=2 default) for transient network errors; this is
|
||||
# the Hermes-level retry loop that wraps the whole call. Lower
|
||||
# this to 1 if you use fallback providers and want fast failover
|
||||
# on flaky primaries; raise it if you prefer to tolerate longer
|
||||
# provider hiccups on a single provider.
|
||||
"api_max_retries": 3,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
|
|
@ -375,7 +384,11 @@ DEFAULT_CONFIG = {
|
|||
# Periodic "still working" notification interval (seconds).
|
||||
# Sends a status message every N seconds so the user knows the
|
||||
# agent hasn't died during long tasks. 0 = disable notifications.
|
||||
"gateway_notify_interval": 600,
|
||||
# Lower values mean faster feedback on slow tasks but more chat
|
||||
# noise; 180s is a compromise that catches spinning weak-model runs
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
|
|
@ -453,6 +466,12 @@ DEFAULT_CONFIG = {
|
|||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
# local Chrome via /browser connect). See
|
||||
# website/docs/developer-guide/browser-supervisor.md.
|
||||
"dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept
|
||||
"dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond
|
||||
"camofox": {
|
||||
# When true, Hermes sends a stable profile-scoped userId to Camofox
|
||||
# so the server maps it to a persistent Firefox profile automatically.
|
||||
|
|
@ -473,7 +492,27 @@ DEFAULT_CONFIG = {
|
|||
# exceed this are rejected with guidance to use offset+limit.
|
||||
# 100K chars ≈ 25–35K tokens across typical tokenisers.
|
||||
"file_read_max_chars": 100_000,
|
||||
|
||||
|
||||
# Tool-output truncation thresholds. When terminal output or a
|
||||
# single read_file page exceeds these limits, Hermes truncates the
|
||||
# payload sent to the model (keeping head + tail for terminal,
|
||||
# enforcing pagination for read_file). Tuning these trades context
|
||||
# footprint against how much raw output the model can see in one
|
||||
# shot. Ported from anomalyco/opencode PR #23770.
|
||||
#
|
||||
# - max_bytes: terminal_tool output cap, in chars
|
||||
# (default 50_000 ≈ 12-15K tokens).
|
||||
# - max_lines: read_file pagination cap — the maximum `limit`
|
||||
# a single read_file call can request before
|
||||
# being clamped (default 2000).
|
||||
# - max_line_length: per-line cap applied when read_file emits a
|
||||
# line-numbered view (default 2000 chars).
|
||||
"tool_output": {
|
||||
"max_bytes": 50_000,
|
||||
"max_lines": 2000,
|
||||
"max_line_length": 2000,
|
||||
},
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
|
|
@ -482,6 +521,12 @@ DEFAULT_CONFIG = {
|
|||
|
||||
},
|
||||
|
||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
||||
"prompt_caching": {
|
||||
"cache_ttl": "5m",
|
||||
},
|
||||
|
||||
# AWS Bedrock provider configuration.
|
||||
# Only used when model.provider is "bedrock".
|
||||
"bedrock": {
|
||||
|
|
@ -726,6 +771,10 @@ DEFAULT_CONFIG = {
|
|||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
|
||||
# no ceiling). High-reasoning models on large tasks
|
||||
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
|
||||
# raise if children time out before producing output.
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
|
|
@ -760,6 +809,17 @@ DEFAULT_CONFIG = {
|
|||
"inline_shell": False,
|
||||
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
|
||||
"inline_shell_timeout": 10,
|
||||
# Run the keyword/pattern security scanner on skills the agent
|
||||
# writes via skill_manage (create/edit/patch). Off by default
|
||||
# because the agent can already execute the same code paths via
|
||||
# terminal() with no gate, so the scan adds friction (blocks
|
||||
# skills that mention risky keywords in prose) without meaningful
|
||||
# security. Turn on if you want the belt-and-suspenders — a
|
||||
# dangerous verdict will then surface as a tool error to the
|
||||
# agent, which can retry with the flagged content removed.
|
||||
# External hub installs (trusted/community sources) are always
|
||||
# scanned regardless of this setting.
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
|
|
@ -1280,7 +1340,7 @@ OPTIONAL_ENV_VARS = {
|
|||
"advanced": True,
|
||||
},
|
||||
"XIAOMI_API_KEY": {
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"prompt": "Xiaomi MiMo API Key",
|
||||
"url": "https://platform.xiaomimimo.com",
|
||||
"password": True,
|
||||
|
|
|
|||
|
|
@ -275,6 +275,99 @@ def copilot_device_code_login(
|
|||
return None
|
||||
|
||||
|
||||
# ─── Copilot Token Exchange ────────────────────────────────────────────────
|
||||
|
||||
# Module-level cache for exchanged Copilot API tokens.
|
||||
# Maps raw_token_fingerprint -> (api_token, expires_at_epoch).
|
||||
_jwt_cache: dict[str, tuple[str, float]] = {}
|
||||
_JWT_REFRESH_MARGIN_SECONDS = 120 # refresh 2 min before expiry
|
||||
|
||||
# Token exchange endpoint and headers (matching VS Code / Copilot CLI)
|
||||
_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token"
|
||||
_EDITOR_VERSION = "vscode/1.104.1"
|
||||
_EXCHANGE_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
||||
|
||||
|
||||
def _token_fingerprint(raw_token: str) -> str:
|
||||
"""Short fingerprint of a raw token for cache keying (avoids storing full token)."""
|
||||
import hashlib
|
||||
return hashlib.sha256(raw_token.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def exchange_copilot_token(raw_token: str, *, timeout: float = 10.0) -> tuple[str, float]:
|
||||
"""Exchange a raw GitHub token for a short-lived Copilot API token.
|
||||
|
||||
Calls ``GET https://api.github.com/copilot_internal/v2/token`` with
|
||||
the raw GitHub token and returns ``(api_token, expires_at)``.
|
||||
|
||||
The returned token is a semicolon-separated string (not a standard JWT)
|
||||
used as ``Authorization: Bearer <token>`` for Copilot API requests.
|
||||
|
||||
Results are cached in-process and reused until close to expiry.
|
||||
Raises ``ValueError`` on failure.
|
||||
"""
|
||||
import urllib.request
|
||||
|
||||
fp = _token_fingerprint(raw_token)
|
||||
|
||||
# Check cache first
|
||||
cached = _jwt_cache.get(fp)
|
||||
if cached:
|
||||
api_token, expires_at = cached
|
||||
if time.time() < expires_at - _JWT_REFRESH_MARGIN_SECONDS:
|
||||
return api_token, expires_at
|
||||
|
||||
req = urllib.request.Request(
|
||||
_TOKEN_EXCHANGE_URL,
|
||||
method="GET",
|
||||
headers={
|
||||
"Authorization": f"token {raw_token}",
|
||||
"User-Agent": _EXCHANGE_USER_AGENT,
|
||||
"Accept": "application/json",
|
||||
"Editor-Version": _EDITOR_VERSION,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except Exception as exc:
|
||||
raise ValueError(f"Copilot token exchange failed: {exc}") from exc
|
||||
|
||||
api_token = data.get("token", "")
|
||||
expires_at = data.get("expires_at", 0)
|
||||
if not api_token:
|
||||
raise ValueError("Copilot token exchange returned empty token")
|
||||
|
||||
# Convert expires_at to float if needed
|
||||
expires_at = float(expires_at) if expires_at else time.time() + 1800
|
||||
|
||||
_jwt_cache[fp] = (api_token, expires_at)
|
||||
logger.debug(
|
||||
"Copilot token exchanged, expires_at=%s",
|
||||
expires_at,
|
||||
)
|
||||
return api_token, expires_at
|
||||
|
||||
|
||||
def get_copilot_api_token(raw_token: str) -> str:
|
||||
"""Exchange a raw GitHub token for a Copilot API token, with fallback.
|
||||
|
||||
Convenience wrapper: returns the exchanged token on success, or the
|
||||
raw token unchanged if the exchange fails (e.g. network error, unsupported
|
||||
account type). This preserves existing behaviour for accounts that don't
|
||||
need exchange while enabling access to internal-only models for those that do.
|
||||
"""
|
||||
if not raw_token:
|
||||
return raw_token
|
||||
try:
|
||||
api_token, _ = exchange_copilot_token(raw_token)
|
||||
return api_token
|
||||
except Exception as exc:
|
||||
logger.debug("Copilot token exchange failed, using raw token: %s", exc)
|
||||
return raw_token
|
||||
|
||||
|
||||
# ─── Copilot API Headers ───────────────────────────────────────────────────
|
||||
|
||||
def copilot_request_headers(
|
||||
|
|
|
|||
|
|
@ -93,6 +93,9 @@ def cron_list(show_all: bool = False):
|
|||
script = job.get("script")
|
||||
if script:
|
||||
print(f" Script: {script}")
|
||||
workdir = job.get("workdir")
|
||||
if workdir:
|
||||
print(f" Workdir: {workdir}")
|
||||
|
||||
# Execution history
|
||||
last_status = job.get("last_status")
|
||||
|
|
@ -168,6 +171,7 @@ def cron_create(args):
|
|||
skill=getattr(args, "skill", None),
|
||||
skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
|
|
@ -180,6 +184,8 @@ def cron_create(args):
|
|||
job_data = result.get("job", {})
|
||||
if job_data.get("script"):
|
||||
print(f" Script: {job_data['script']}")
|
||||
if job_data.get("workdir"):
|
||||
print(f" Workdir: {job_data['workdir']}")
|
||||
print(f" Next run: {result['next_run_at']}")
|
||||
return 0
|
||||
|
||||
|
|
@ -218,6 +224,7 @@ def cron_edit(args):
|
|||
repeat=getattr(args, "repeat", None),
|
||||
skills=final_skills,
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
|
|
@ -233,6 +240,8 @@ def cron_edit(args):
|
|||
print(" Skills: none")
|
||||
if updated.get("script"):
|
||||
print(f" Script: {updated['script']}")
|
||||
if updated.get("workdir"):
|
||||
print(f" Workdir: {updated['workdir']}")
|
||||
return 0
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ if _env_path.exists():
|
|||
load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.models import _HERMES_USER_AGENT
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
from utils import base_url_host_matches
|
||||
|
||||
|
|
@ -295,16 +296,33 @@ def run_doctor(args):
|
|||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
|
||||
from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
|
||||
except Exception:
|
||||
_resolve_provider = None
|
||||
_compatible_custom_providers = None
|
||||
_resolve_provider_full = None
|
||||
|
||||
custom_providers = []
|
||||
if _compatible_custom_providers is not None:
|
||||
try:
|
||||
custom_providers = _compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
custom_providers = []
|
||||
|
||||
user_providers = cfg.get("providers")
|
||||
if isinstance(user_providers, dict):
|
||||
known_providers.update(str(name).strip().lower() for name in user_providers if str(name).strip())
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = str(entry.get("name") or "").strip()
|
||||
if name:
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
canonical_provider = provider
|
||||
if provider and _resolve_provider is not None and provider != "auto":
|
||||
try:
|
||||
canonical_provider = _resolve_provider(provider)
|
||||
except Exception:
|
||||
canonical_provider = None
|
||||
if provider and _resolve_provider_full is not None and provider != "auto":
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
|
||||
if provider and provider != "auto":
|
||||
if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
|
||||
|
|
@ -957,7 +975,10 @@ def run_doctor(args):
|
|||
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
|
||||
_base = _base.rstrip("/") + "/v1"
|
||||
_url = (_base.rstrip("/") + "/models") if _base else _default_url
|
||||
_headers = {"Authorization": f"Bearer {_key}"}
|
||||
_headers = {
|
||||
"Authorization": f"Bearer {_key}",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
if base_url_host_matches(_base, "api.kimi.com"):
|
||||
_headers["User-Agent"] = "claude-code/0.1.0"
|
||||
_resp = httpx.get(
|
||||
|
|
|
|||
|
|
@ -267,6 +267,8 @@ def run_dump(args):
|
|||
("ANTHROPIC_API_KEY", "anthropic"),
|
||||
("ANTHROPIC_TOKEN", "anthropic_token"),
|
||||
("NOUS_API_KEY", "nous"),
|
||||
("GOOGLE_API_KEY", "google/gemini"),
|
||||
("GEMINI_API_KEY", "gemini"),
|
||||
("GLM_API_KEY", "glm/zai"),
|
||||
("ZAI_API_KEY", "zai"),
|
||||
("KIMI_API_KEY", "kimi"),
|
||||
|
|
|
|||
|
|
@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
"""Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=on-failure``
|
||||
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
|
||||
= false``) relaunch the process after the graceful exit.
|
||||
|
||||
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
|
||||
which SIGKILL in-flight agents after a short timeout.
|
||||
|
||||
Args:
|
||||
pid: Gateway process PID (systemd MainPID, launchd PID, or bare
|
||||
process PID).
|
||||
drain_timeout: Seconds to wait for the process to exit after sending
|
||||
SIGUSR1. Should be slightly larger than the gateway's
|
||||
``agent.restart_drain_timeout`` to allow the drain loop to
|
||||
finish cleanly.
|
||||
|
||||
Returns:
|
||||
True if the PID was signalled and exited within the timeout.
|
||||
False if SIGUSR1 couldn't be sent or the process didn't exit in
|
||||
time (caller should fall back to a harder restart path).
|
||||
"""
|
||||
if not hasattr(signal, "SIGUSR1"):
|
||||
return False
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
except ProcessLookupError:
|
||||
# Already gone — nothing to drain.
|
||||
return True
|
||||
except (PermissionError, OSError):
|
||||
return False
|
||||
|
||||
import time as _time
|
||||
|
||||
deadline = _time.monotonic() + max(drain_timeout, 1.0)
|
||||
while _time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 — probe liveness
|
||||
except ProcessLookupError:
|
||||
return True
|
||||
except PermissionError:
|
||||
# Process still exists but we can't signal it. Treat as alive
|
||||
# so the caller falls back.
|
||||
pass
|
||||
_time.sleep(0.5)
|
||||
# Drain didn't finish in time.
|
||||
return False
|
||||
|
||||
|
||||
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
|
||||
if pid is None or pid <= 0:
|
||||
return
|
||||
|
|
@ -1469,7 +1523,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
|||
path_entries.append(resolved_node_dir)
|
||||
|
||||
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
|
||||
restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
|
||||
# systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
|
||||
# there's budget left for post-interrupt cleanup (tool subprocess kill,
|
||||
# adapter disconnect, session DB close) before systemd escalates to
|
||||
# SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
|
||||
# by a force-interrupted agent get reaped by systemd instead of us
|
||||
# (#8202). 30s of headroom covers the worst case we've observed.
|
||||
_drain_timeout = int(_get_restart_drain_timeout() or 0)
|
||||
restart_timeout = max(60, _drain_timeout) + 30
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
|
|
|
|||
|
|
@ -166,6 +166,27 @@ from hermes_cli.env_loader import load_hermes_dotenv
|
|||
|
||||
load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
|
||||
|
||||
# Bridge security.redact_secrets from config.yaml → HERMES_REDACT_SECRETS env
|
||||
# var BEFORE hermes_logging imports agent.redact (which snapshots the flag at
|
||||
# module-import time). Without this, config.yaml's toggle is ignored because
|
||||
# the setup_logging() call below imports agent.redact, which reads the env var
|
||||
# exactly once. Env var in .env still wins — this is config.yaml fallback only.
|
||||
try:
|
||||
if "HERMES_REDACT_SECRETS" not in os.environ:
|
||||
import yaml as _yaml_early
|
||||
_cfg_path = get_hermes_home() / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path, encoding="utf-8") as _f:
|
||||
_early_sec_cfg = (_yaml_early.safe_load(_f) or {}).get("security", {})
|
||||
if isinstance(_early_sec_cfg, dict):
|
||||
_early_redact = _early_sec_cfg.get("redact_secrets")
|
||||
if _early_redact is not None:
|
||||
os.environ["HERMES_REDACT_SECRETS"] = str(_early_redact).lower()
|
||||
del _early_sec_cfg
|
||||
del _cfg_path
|
||||
except Exception:
|
||||
pass # best-effort — redaction stays at default (enabled) on config errors
|
||||
|
||||
# Initialize centralized file logging early — all `hermes` subcommands
|
||||
# (chat, setup, gateway, config, etc.) write to agent.log + errors.log.
|
||||
try:
|
||||
|
|
@ -1429,6 +1450,7 @@ def select_provider_and_model(args=None):
|
|||
load_config,
|
||||
get_env_value,
|
||||
)
|
||||
from hermes_cli.providers import resolve_provider_full
|
||||
|
||||
config = load_config()
|
||||
current_model = config.get("model")
|
||||
|
|
@ -1446,14 +1468,30 @@ def select_provider_and_model(args=None):
|
|||
effective_provider = (
|
||||
config_provider or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto"
|
||||
)
|
||||
try:
|
||||
active = resolve_provider(effective_provider)
|
||||
except AuthError as exc:
|
||||
warning = format_auth_error(exc)
|
||||
print(f"Warning: {warning} Falling back to auto provider detection.")
|
||||
compatible_custom_providers = get_compatible_custom_providers(config)
|
||||
active = None
|
||||
if effective_provider != "auto":
|
||||
active_def = resolve_provider_full(
|
||||
effective_provider,
|
||||
config.get("providers"),
|
||||
compatible_custom_providers,
|
||||
)
|
||||
if active_def is not None:
|
||||
active = active_def.id
|
||||
else:
|
||||
warning = (
|
||||
f"Unknown provider '{effective_provider}'. Check 'hermes model' for "
|
||||
"available providers, or run 'hermes doctor' to diagnose config "
|
||||
"issues."
|
||||
)
|
||||
print(f"Warning: {warning} Falling back to auto provider detection.")
|
||||
if active is None:
|
||||
try:
|
||||
active = resolve_provider("auto")
|
||||
except AuthError:
|
||||
except AuthError as exc:
|
||||
if effective_provider == "auto":
|
||||
warning = format_auth_error(exc)
|
||||
print(f"Warning: {warning} Falling back to auto provider detection.")
|
||||
active = None # no provider yet; default to first in list
|
||||
|
||||
# Detect custom endpoint
|
||||
|
|
@ -2311,7 +2349,41 @@ def _model_flow_openai_codex(config, current_model=""):
|
|||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
|
||||
status = get_codex_auth_status()
|
||||
if not status.get("logged_in"):
|
||||
if status.get("logged_in"):
|
||||
print(" OpenAI Codex credentials: ✓")
|
||||
print()
|
||||
print(" 1. Use existing credentials")
|
||||
print(" 2. Reauthenticate (new OAuth login)")
|
||||
print(" 3. Cancel")
|
||||
print()
|
||||
try:
|
||||
choice = input(" Choice [1/2/3]: ").strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
choice = "1"
|
||||
|
||||
if choice == "2":
|
||||
print("Starting a fresh OpenAI Codex login...")
|
||||
print()
|
||||
try:
|
||||
mock_args = argparse.Namespace()
|
||||
_login_openai_codex(
|
||||
mock_args,
|
||||
PROVIDER_REGISTRY["openai-codex"],
|
||||
force_new_login=True,
|
||||
)
|
||||
except SystemExit:
|
||||
print("Login cancelled or failed.")
|
||||
return
|
||||
except Exception as exc:
|
||||
print(f"Login failed: {exc}")
|
||||
return
|
||||
status = get_codex_auth_status()
|
||||
if not status.get("logged_in"):
|
||||
print("Login failed.")
|
||||
return
|
||||
elif choice == "3":
|
||||
return
|
||||
else:
|
||||
print("Not logged into OpenAI Codex. Starting login...")
|
||||
print()
|
||||
try:
|
||||
|
|
@ -2828,11 +2900,16 @@ def _model_flow_named_custom(config, provider_info):
|
|||
|
||||
name = provider_info["name"]
|
||||
base_url = provider_info["base_url"]
|
||||
api_mode = provider_info.get("api_mode", "")
|
||||
api_key = provider_info.get("api_key", "")
|
||||
key_env = provider_info.get("key_env", "")
|
||||
saved_model = provider_info.get("model", "")
|
||||
provider_key = (provider_info.get("provider_key") or "").strip()
|
||||
|
||||
# Resolve key from env var if api_key not set directly
|
||||
if not api_key and key_env:
|
||||
api_key = os.environ.get(key_env, "")
|
||||
|
||||
print(f" Provider: {name}")
|
||||
print(f" URL: {base_url}")
|
||||
if saved_model:
|
||||
|
|
@ -2840,7 +2917,10 @@ def _model_flow_named_custom(config, provider_info):
|
|||
print()
|
||||
|
||||
print("Fetching available models...")
|
||||
models = fetch_api_models(api_key, base_url, timeout=8.0)
|
||||
models = fetch_api_models(
|
||||
api_key, base_url, timeout=8.0,
|
||||
api_mode=api_mode or None,
|
||||
)
|
||||
|
||||
if models:
|
||||
default_idx = 0
|
||||
|
|
@ -3930,12 +4010,71 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
|||
print("Cancelled.")
|
||||
return
|
||||
save_env_value(key_env, new_key)
|
||||
existing_key = new_key
|
||||
print("API key saved.")
|
||||
print()
|
||||
else:
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||
print()
|
||||
|
||||
# Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash)
|
||||
# are exhausted in a handful of agent turns, so refuse to wire up the
|
||||
# provider with a free-tier key. Probe is best-effort; network or auth
|
||||
# errors fall through without blocking.
|
||||
if provider_id == "gemini" and existing_key:
|
||||
try:
|
||||
from agent.gemini_native_adapter import probe_gemini_tier
|
||||
except Exception:
|
||||
probe_gemini_tier = None
|
||||
if probe_gemini_tier is not None:
|
||||
print(" Checking Gemini API tier...")
|
||||
probe_base = (
|
||||
(get_env_value(base_url_env) if base_url_env else "")
|
||||
or os.getenv(base_url_env or "", "")
|
||||
or pconfig.inference_base_url
|
||||
)
|
||||
tier = probe_gemini_tier(existing_key, probe_base)
|
||||
if tier == "free":
|
||||
print()
|
||||
print(
|
||||
"❌ This Google API key is on the free tier "
|
||||
"(<= 250 requests/day for gemini-2.5-flash)."
|
||||
)
|
||||
print(
|
||||
" Hermes typically makes 3-10 API calls per user turn "
|
||||
"(tool iterations + auxiliary tasks),"
|
||||
)
|
||||
print(
|
||||
" so the free tier is exhausted after a handful of "
|
||||
"messages and cannot sustain"
|
||||
)
|
||||
print(" an agent session.")
|
||||
print()
|
||||
print(
|
||||
" To use Gemini with Hermes, enable billing on your "
|
||||
"Google Cloud project and regenerate"
|
||||
)
|
||||
print(
|
||||
" the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
print()
|
||||
print(
|
||||
" Alternatives with workable free usage: DeepSeek, "
|
||||
"OpenRouter (free models), Groq, Nous."
|
||||
)
|
||||
print()
|
||||
print("Not saving Gemini as the default provider.")
|
||||
return
|
||||
if tier == "paid":
|
||||
print(" Tier check: paid ✓")
|
||||
else:
|
||||
# "unknown" -- network issue, auth problem, unexpected response.
|
||||
# Don't block; the runtime 429 handler will surface free-tier
|
||||
# guidance if the key turns out to be free tier.
|
||||
print(" Tier check: could not verify (proceeding anyway).")
|
||||
print()
|
||||
|
||||
# Optional base URL override
|
||||
current_base = ""
|
||||
if base_url_env:
|
||||
|
|
@ -3984,7 +4123,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
|||
pass
|
||||
|
||||
if mdev_models:
|
||||
model_list = mdev_models
|
||||
# Merge models.dev with curated list so newly added models
|
||||
# (not yet in models.dev) still appear in the picker.
|
||||
if curated:
|
||||
seen = {m.lower() for m in mdev_models}
|
||||
merged = list(mdev_models)
|
||||
for m in curated:
|
||||
if m.lower() not in seen:
|
||||
merged.append(m)
|
||||
seen.add(m.lower())
|
||||
model_list = merged
|
||||
else:
|
||||
model_list = mdev_models
|
||||
print(f" Found {len(model_list)} model(s) from models.dev registry")
|
||||
elif curated and len(curated) >= 8:
|
||||
# Curated list is substantial — use it directly, skip live probe
|
||||
|
|
@ -4166,6 +4316,8 @@ def _model_flow_anthropic(config, current_model=""):
|
|||
from agent.anthropic_adapter import (
|
||||
read_claude_code_credentials,
|
||||
is_claude_code_token_valid,
|
||||
_is_oauth_token,
|
||||
_resolve_claude_code_token_from_credentials,
|
||||
)
|
||||
|
||||
cc_creds = read_claude_code_credentials()
|
||||
|
|
@ -4174,7 +4326,14 @@ def _model_flow_anthropic(config, current_model=""):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
has_creds = bool(existing_key) or cc_available
|
||||
# Stale-OAuth guard: if the only existing cred is an expired OAuth token
|
||||
# (no valid cc_creds to fall back on), treat it as missing so the re-auth
|
||||
# path is offered instead of silently accepting a broken token.
|
||||
existing_is_stale_oauth = False
|
||||
if existing_key and _is_oauth_token(existing_key) and not cc_available:
|
||||
existing_is_stale_oauth = True
|
||||
|
||||
has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available
|
||||
needs_auth = not has_creds
|
||||
|
||||
if has_creds:
|
||||
|
|
@ -5853,12 +6012,15 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
# Write exit code *before* the gateway restart attempt.
|
||||
# When running as ``hermes update --gateway`` (spawned by the gateway's
|
||||
# /update command), this process lives inside the gateway's systemd
|
||||
# cgroup. ``systemctl restart hermes-gateway`` kills everything in the
|
||||
# cgroup (KillMode=mixed → SIGKILL to remaining processes), including
|
||||
# us and the wrapping bash shell. The shell never reaches its
|
||||
# ``printf $status > .update_exit_code`` epilogue, so the exit-code
|
||||
# marker file is never created. The new gateway's update watcher then
|
||||
# polls for 30 minutes and sends a spurious timeout message.
|
||||
# cgroup. A graceful SIGUSR1 restart keeps the drain loop alive long
|
||||
# enough for the exit-code marker to be written below, but the
|
||||
# fallback ``systemctl restart`` path (see below) kills everything in
|
||||
# the cgroup (KillMode=mixed → SIGKILL to remaining processes),
|
||||
# including us and the wrapping bash shell. The shell never reaches
|
||||
# its ``printf $status > .update_exit_code`` epilogue, so the
|
||||
# exit-code marker file would never be created. The new gateway's
|
||||
# update watcher would then poll for 30 minutes and send a spurious
|
||||
# timeout message.
|
||||
#
|
||||
# Writing the marker here — after git pull + pip install succeed but
|
||||
# before we attempt the restart — ensures the new gateway sees it
|
||||
|
|
@ -5880,9 +6042,37 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
_ensure_user_systemd_env,
|
||||
find_gateway_pids,
|
||||
_get_service_pids,
|
||||
_graceful_restart_via_sigusr1,
|
||||
)
|
||||
import signal as _signal
|
||||
|
||||
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
|
||||
# for up to ``agent.restart_drain_timeout`` (default 60s) before
|
||||
# exiting with code 75; we wait slightly longer so the drain
|
||||
# completes before we fall back to a hard restart. On older
|
||||
# systemd units without SIGUSR1 wiring this wait just times out
|
||||
# and we fall back to ``systemctl restart`` (the old behaviour).
|
||||
try:
|
||||
from hermes_constants import (
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
|
||||
)
|
||||
except Exception:
|
||||
_DEFAULT_DRAIN = 60.0
|
||||
_cfg_drain = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg_agent = (load_config().get("agent") or {})
|
||||
_cfg_drain = _cfg_agent.get("restart_drain_timeout")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
|
||||
except (TypeError, ValueError):
|
||||
_drain_budget = float(_DEFAULT_DRAIN)
|
||||
# Add a 15s margin so the drain loop + final exit finish before
|
||||
# we escalate to ``systemctl restart`` / SIGTERM.
|
||||
_drain_budget = max(_drain_budget, 30.0) + 15.0
|
||||
|
||||
restarted_services = []
|
||||
killed_pids = set()
|
||||
|
||||
|
|
@ -5929,59 +6119,114 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
|||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if check.stdout.strip() == "active":
|
||||
restart = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
if check.stdout.strip() != "active":
|
||||
continue
|
||||
|
||||
# Prefer a graceful SIGUSR1 restart so in-flight
|
||||
# agent runs drain instead of being SIGKILLed.
|
||||
# The gateway's SIGUSR1 handler calls
|
||||
# request_restart(via_service=True) → drain →
|
||||
# exit(75); systemd's Restart=on-failure (and
|
||||
# RestartForceExitStatus=75) respawns the unit.
|
||||
_main_pid = 0
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd + [
|
||||
"show", svc_name,
|
||||
"--property=MainPID", "--value",
|
||||
],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
_main_pid = int((_show.stdout or "").strip() or 0)
|
||||
except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
|
||||
_main_pid = 0
|
||||
|
||||
_graceful_ok = False
|
||||
if _main_pid > 0:
|
||||
print(
|
||||
f" → {svc_name}: draining (up to {int(_drain_budget)}s)..."
|
||||
)
|
||||
_graceful_ok = _graceful_restart_via_sigusr1(
|
||||
_main_pid, drain_timeout=_drain_budget,
|
||||
)
|
||||
|
||||
if _graceful_ok:
|
||||
# Gateway exited 75; systemd should relaunch
|
||||
# via Restart=on-failure. Verify the new
|
||||
# process came up.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
continue
|
||||
# Process exited but wasn't respawned (older
|
||||
# unit without Restart=on-failure or
|
||||
# RestartForceExitStatus=75). Fall through
|
||||
# to systemctl start/restart.
|
||||
print(
|
||||
f" ⚠ {svc_name} drained but didn't relaunch — forcing restart"
|
||||
)
|
||||
|
||||
# Fallback: blunt systemctl restart. This is
|
||||
# what the old code always did; we get here only
|
||||
# when the graceful path failed (unit missing
|
||||
# SIGUSR1 wiring, drain exceeded the budget,
|
||||
# restart-policy mismatch).
|
||||
restart = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
timeout=5,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
# (stale module cache, import race) often
|
||||
# resolve on the second attempt.
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
)
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
# (stale module cache, import race) often
|
||||
# resolve on the second attempt.
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
f" ✗ {svc_name} failed to stay running after restart.\n"
|
||||
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
|
||||
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
|
||||
)
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
print(
|
||||
f" ✗ {svc_name} failed to stay running after restart.\n"
|
||||
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
|
||||
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
|
|
@ -6470,9 +6715,15 @@ def cmd_dashboard(args):
|
|||
try:
|
||||
import fastapi # noqa: F401
|
||||
import uvicorn # noqa: F401
|
||||
except ImportError:
|
||||
print("Web UI dependencies not installed.")
|
||||
print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
|
||||
except ImportError as e:
|
||||
print("Web UI dependencies not installed (need fastapi + uvicorn).")
|
||||
print(
|
||||
f"Re-install the package into this interpreter so metadata updates apply:\n"
|
||||
f" cd {PROJECT_ROOT}\n"
|
||||
f" {sys.executable} -m pip install -e .\n"
|
||||
"If `pip` is missing in this venv, use: uv pip install -e ."
|
||||
)
|
||||
print(f"Import error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if "HERMES_WEB_DIST" not in os.environ:
|
||||
|
|
@ -6481,11 +6732,13 @@ def cmd_dashboard(args):
|
|||
|
||||
from hermes_cli.web_server import start_server
|
||||
|
||||
embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
|
||||
start_server(
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
open_browser=not args.no_open,
|
||||
allow_public=getattr(args, "insecure", False),
|
||||
embedded_chat=embedded_chat,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -7088,7 +7341,7 @@ For more help on a command:
|
|||
)
|
||||
logout_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["nous", "openai-codex"],
|
||||
choices=["nous", "openai-codex", "spotify"],
|
||||
default=None,
|
||||
help="Provider to log out from (default: active provider)",
|
||||
)
|
||||
|
|
@ -7145,6 +7398,17 @@ For more help on a command:
|
|||
"reset", help="Clear exhaustion status for all credentials for a provider"
|
||||
)
|
||||
auth_reset.add_argument("provider", help="Provider id")
|
||||
auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider")
|
||||
auth_status.add_argument("provider", help="Provider id")
|
||||
auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state")
|
||||
auth_logout.add_argument("provider", help="Provider id")
|
||||
auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE")
|
||||
auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login")
|
||||
auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)")
|
||||
auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app")
|
||||
auth_spotify.add_argument("--scope", help="Override requested Spotify scopes")
|
||||
auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically")
|
||||
auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds")
|
||||
auth_parser.set_defaults(func=cmd_auth)
|
||||
|
||||
# =========================================================================
|
||||
|
|
@ -7201,6 +7465,10 @@ For more help on a command:
|
|||
"--script",
|
||||
help="Path to a Python script whose stdout is injected into the prompt each run",
|
||||
)
|
||||
cron_create.add_argument(
|
||||
"--workdir",
|
||||
help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).",
|
||||
)
|
||||
|
||||
# cron edit
|
||||
cron_edit = cron_subparsers.add_parser(
|
||||
|
|
@ -7239,6 +7507,10 @@ For more help on a command:
|
|||
"--script",
|
||||
help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.",
|
||||
)
|
||||
cron_edit.add_argument(
|
||||
"--workdir",
|
||||
help="Absolute path for the job to run from (injects AGENTS.md etc. and sets terminal cwd). Pass empty string to clear.",
|
||||
)
|
||||
|
||||
# lifecycle actions
|
||||
cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job")
|
||||
|
|
@ -8652,6 +8924,14 @@ Examples:
|
|||
action="store_true",
|
||||
help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
|
||||
)
|
||||
dashboard_parser.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
|
||||
"Alternatively set HERMES_DASHBOARD_TUI=1."
|
||||
),
|
||||
)
|
||||
dashboard_parser.set_defaults(func=cmd_dashboard)
|
||||
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -12,8 +12,12 @@ Different LLM providers expect model identifiers in different formats:
|
|||
model IDs, but Claude still uses hyphenated native names like
|
||||
``claude-sonnet-4-6``.
|
||||
- **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
|
||||
- **DeepSeek** only accepts two model identifiers:
|
||||
``deepseek-chat`` and ``deepseek-reasoner``.
|
||||
- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner``
|
||||
(R1-family), and the first-class V-series IDs (``deepseek-v4-pro``,
|
||||
``deepseek-v4-flash``, and any future ``deepseek-v<N>-*``). Older
|
||||
Hermes revisions folded every non-reasoner input into
|
||||
``deepseek-chat``, which on aggregators routes to V3 — so a user
|
||||
picking V4 Pro was silently downgraded.
|
||||
- **Custom** and remaining providers pass the name through as-is.
|
||||
|
||||
This module centralises that translation so callers can simply write::
|
||||
|
|
@ -25,6 +29,7 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -100,6 +105,15 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
|||
"custom",
|
||||
})
|
||||
|
||||
# Providers whose APIs require lowercase model IDs. Xiaomi's
|
||||
# ``api.xiaomimimo.com`` rejects mixed-case names like ``MiMo-V2.5-Pro``
|
||||
# that users might copy from marketing docs — it only accepts
|
||||
# ``mimo-v2.5-pro``. After stripping a matching provider prefix, these
|
||||
# providers also get ``.lower()`` applied.
|
||||
_LOWERCASE_MODEL_PROVIDERS: frozenset[str] = frozenset({
|
||||
"xiaomi",
|
||||
})
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DeepSeek special handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -115,17 +129,30 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({
|
|||
})
|
||||
|
||||
_DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
"deepseek-chat", # V3 on DeepSeek direct and most aggregators
|
||||
"deepseek-reasoner", # R1-family reasoning model
|
||||
"deepseek-v4-pro", # V4 Pro — first-class model ID
|
||||
"deepseek-v4-flash", # V4 Flash — first-class model ID
|
||||
})
|
||||
|
||||
# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
|
||||
# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``).
|
||||
# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns
|
||||
# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called
|
||||
# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases
|
||||
# of ``deepseek-chat`` and must not be folded into it.
|
||||
_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$")
|
||||
|
||||
|
||||
def _normalize_for_deepseek(model_name: str) -> str:
|
||||
"""Map any model input to one of DeepSeek's two accepted identifiers.
|
||||
"""Map a model input to a DeepSeek-accepted identifier.
|
||||
|
||||
Rules:
|
||||
- Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through.
|
||||
- Contains any reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||
- Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/
|
||||
``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through.
|
||||
- Matches the V-series pattern ``deepseek-v<digit>...`` -> pass through
|
||||
(covers future ``deepseek-v5-*`` and dated variants without a release).
|
||||
- Contains a reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||
-> ``deepseek-reasoner``.
|
||||
- Everything else -> ``deepseek-chat``.
|
||||
|
||||
|
|
@ -133,13 +160,17 @@ def _normalize_for_deepseek(model_name: str) -> str:
|
|||
model_name: The bare model name (vendor prefix already stripped).
|
||||
|
||||
Returns:
|
||||
One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``.
|
||||
A DeepSeek-accepted model identifier.
|
||||
"""
|
||||
bare = _strip_vendor_prefix(model_name).lower()
|
||||
|
||||
if bare in _DEEPSEEK_CANONICAL_MODELS:
|
||||
return bare
|
||||
|
||||
# V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants)
|
||||
if _DEEPSEEK_V_SERIES_RE.match(bare):
|
||||
return bare
|
||||
|
||||
# Check for reasoner-like keywords anywhere in the name
|
||||
for keyword in _DEEPSEEK_REASONER_KEYWORDS:
|
||||
if keyword in bare:
|
||||
|
|
@ -347,6 +378,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
|||
|
||||
>>> normalize_model_for_provider("claude-sonnet-4.6", "zai")
|
||||
'claude-sonnet-4.6'
|
||||
|
||||
>>> normalize_model_for_provider("MiMo-V2.5-Pro", "xiaomi")
|
||||
'mimo-v2.5-pro'
|
||||
"""
|
||||
name = (model_input or "").strip()
|
||||
if not name:
|
||||
|
|
@ -410,7 +444,12 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
|||
|
||||
# --- Direct providers: repair matching provider prefixes only ---
|
||||
if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
result = _strip_matching_provider_prefix(name, provider)
|
||||
# Some providers require lowercase model IDs (e.g. Xiaomi's API
|
||||
# rejects "MiMo-V2.5-Pro" but accepts "mimo-v2.5-pro").
|
||||
if provider in _LOWERCASE_MODEL_PROVIDERS:
|
||||
result = result.lower()
|
||||
return result
|
||||
|
||||
# --- Authoritative native providers: preserve user-facing slugs as-is ---
|
||||
if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
|
||||
|
|
|
|||
|
|
@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
|||
# Alias resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _model_sort_key(model_id: str, prefix: str) -> tuple:
|
||||
"""Sort key for model version preference.
|
||||
|
||||
Extracts version numbers after the family prefix and returns a sort key
|
||||
that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.)
|
||||
are used as tiebreakers, with common quality indicators ranked.
|
||||
|
||||
Examples (with prefix ``"mimo"``)::
|
||||
|
||||
mimo-v2.5-pro → (-2.5, 0, 'pro') # highest version wins
|
||||
mimo-v2.5 → (-2.5, 1, '') # no suffix = lower than pro
|
||||
mimo-v2-pro → (-2.0, 0, 'pro')
|
||||
mimo-v2-omni → (-2.0, 1, 'omni')
|
||||
mimo-v2-flash → (-2.0, 1, 'flash')
|
||||
"""
|
||||
# Strip the prefix (and optional "/" separator for aggregator slugs)
|
||||
rest = model_id[len(prefix):]
|
||||
if rest.startswith("/"):
|
||||
rest = rest[1:]
|
||||
rest = rest.lstrip("-").strip()
|
||||
|
||||
# Parse version and suffix from the remainder.
|
||||
# "v2.5-pro" → version [2.5], suffix "pro"
|
||||
# "-omni" → version [], suffix "omni"
|
||||
# State machine: start → in_version → between → in_suffix
|
||||
nums: list[float] = []
|
||||
suffix_buf = ""
|
||||
state = "start"
|
||||
num_buf = ""
|
||||
|
||||
for ch in rest:
|
||||
if state == "start":
|
||||
if ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
pass # skip separators before any content
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_version":
|
||||
if ch.isdigit():
|
||||
num_buf += ch
|
||||
elif ch == ".":
|
||||
if "." in num_buf:
|
||||
# Second dot — flush current number, start new component
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
else:
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "between"
|
||||
else:
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "between":
|
||||
if ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf = ch
|
||||
elif ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch in "-_.":
|
||||
pass
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_suffix":
|
||||
suffix_buf += ch
|
||||
|
||||
# Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
|
||||
if num_buf and state == "in_version":
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
suffix = suffix_buf.lower().strip("-_.")
|
||||
suffix = suffix.strip()
|
||||
|
||||
# Negate versions so higher → sorts first
|
||||
version_key = tuple(-n for n in nums)
|
||||
|
||||
# Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
|
||||
# Lower number = preferred
|
||||
_SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
|
||||
suffix_rank = _SUFFIX_RANK.get(suffix, 1)
|
||||
|
||||
return version_key + (suffix_rank, suffix)
|
||||
|
||||
|
||||
def resolve_alias(
|
||||
raw_input: str,
|
||||
current_provider: str,
|
||||
|
|
@ -311,9 +418,9 @@ def resolve_alias(
|
|||
"""Resolve a short alias against the current provider's catalog.
|
||||
|
||||
Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
|
||||
current provider's models.dev catalog for the first model whose ID
|
||||
starts with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers).
|
||||
current provider's models.dev catalog for the model whose ID starts
|
||||
with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers) and has the **highest version**.
|
||||
|
||||
Returns:
|
||||
``(provider, resolved_model_id, alias_name)`` if a match is
|
||||
|
|
@ -341,28 +448,44 @@ def resolve_alias(
|
|||
|
||||
vendor, family = identity
|
||||
|
||||
# Search the provider's catalog from models.dev
|
||||
# Build catalog from models.dev, then merge in static _PROVIDER_MODELS
|
||||
# entries that models.dev may be missing (e.g. newly added models not
|
||||
# yet synced to the registry).
|
||||
catalog = list_provider_models(current_provider)
|
||||
if not catalog:
|
||||
return None
|
||||
try:
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
static = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if static:
|
||||
seen = {m.lower() for m in catalog}
|
||||
for m in static:
|
||||
if m.lower() not in seen:
|
||||
catalog.append(m)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For aggregators, models are vendor/model-name format
|
||||
aggregator = is_aggregator(current_provider)
|
||||
|
||||
for model_id in catalog:
|
||||
mid_lower = model_id.lower()
|
||||
if aggregator:
|
||||
# Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
if mid_lower.startswith(prefix):
|
||||
return (current_provider, model_id, key)
|
||||
else:
|
||||
# Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
|
||||
family_lower = family.lower()
|
||||
if mid_lower.startswith(family_lower):
|
||||
return (current_provider, model_id, key)
|
||||
if aggregator:
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(prefix)
|
||||
]
|
||||
else:
|
||||
family_lower = family.lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(family_lower)
|
||||
]
|
||||
|
||||
return None
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Sort by version descending — prefer the latest/highest version
|
||||
prefix_for_sort = f"{vendor}/{family}" if aggregator else family
|
||||
matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
|
||||
return (current_provider, matches[0], key)
|
||||
|
||||
|
||||
def get_authenticated_provider_slugs(
|
||||
|
|
@ -648,7 +771,10 @@ def switch_model(
|
|||
|
||||
if provider_changed or explicit_provider:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=target_provider)
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=target_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
|
|
@ -665,7 +791,10 @@ def switch_model(
|
|||
)
|
||||
else:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=current_provider)
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=current_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
|
|
@ -692,6 +821,7 @@ def switch_model(
|
|||
target_provider,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_mode=api_mode or None,
|
||||
)
|
||||
except Exception as e:
|
||||
validation = {
|
||||
|
|
@ -813,7 +943,7 @@ def list_authenticated_providers(
|
|||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.models import (
|
||||
OPENROUTER_MODELS, _PROVIDER_MODELS,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
|
||||
)
|
||||
|
||||
results: List[dict] = []
|
||||
|
|
@ -861,6 +991,14 @@ def list_authenticated_providers(
|
|||
|
||||
# Check if any env var is set
|
||||
has_creds = any(os.environ.get(ev) for ev in env_vars)
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if store and hermes_id in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
|
|
@ -972,11 +1110,14 @@ def list_authenticated_providers(
|
|||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
if hermes_slug in {"copilot", "copilot-acp"}:
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
|
|
@ -1099,6 +1240,15 @@ def list_authenticated_providers(
|
|||
if m and m not in models_list:
|
||||
models_list.append(m)
|
||||
|
||||
# Official OpenAI API rows in providers: often have base_url but no
|
||||
# explicit models: dict — avoid a misleading zero count in /model.
|
||||
if not models_list:
|
||||
url_lower = str(api_url).strip().lower()
|
||||
if "api.openai.com" in url_lower:
|
||||
fb = curated.get("openai") or []
|
||||
if fb:
|
||||
models_list = list(fb)
|
||||
|
||||
# Try to probe /v1/models if URL is set (but don't block on it)
|
||||
# For now just show what we know from config
|
||||
results.append({
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
|||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("moonshotai/kimi-k2.6", "recommended"),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
("deepseek/deepseek-v4-flash", ""),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
|
|
@ -40,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|||
("anthropic/claude-sonnet-4.5", ""),
|
||||
("anthropic/claude-haiku-4.5", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openai/gpt-5.5", ""),
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
("xiaomi/mimo-v2.5", ""),
|
||||
|
|
@ -63,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|||
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||
("arcee-ai/trinity-large-preview:free", "free"),
|
||||
("arcee-ai/trinity-large-thinking", ""),
|
||||
("openai/gpt-5.4-pro", ""),
|
||||
("openai/gpt-5.5-pro", ""),
|
||||
("openai/gpt-5.4-nano", ""),
|
||||
]
|
||||
|
||||
|
|
@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
|
|||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"anthropic/claude-opus-4.7",
|
||||
|
|
@ -116,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"anthropic/claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4.5",
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.5",
|
||||
"openai/gpt-5.4-mini",
|
||||
"openai/gpt-5.3-codex",
|
||||
"google/gemini-3-pro-preview",
|
||||
|
|
@ -135,9 +139,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"x-ai/grok-4.20-beta",
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"arcee-ai/trinity-large-thinking",
|
||||
"openai/gpt-5.4-pro",
|
||||
"openai/gpt-5.5-pro",
|
||||
"openai/gpt-5.4-nano",
|
||||
],
|
||||
# Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
|
||||
# provider_model_ids fallback when /v1/models is unavailable.
|
||||
"openai": [
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-4.1",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
],
|
||||
"openai-codex": _codex_curated_models(),
|
||||
"copilot-acp": [
|
||||
"copilot-acp",
|
||||
|
|
@ -151,10 +167,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"gpt-4.1",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"claude-opus-4.6",
|
||||
"claude-sonnet-4.6",
|
||||
"claude-sonnet-4",
|
||||
"claude-sonnet-4.5",
|
||||
"claude-haiku-4.5",
|
||||
"gemini-3.1-pro-preview",
|
||||
"gemini-3-pro-preview",
|
||||
"gemini-3-flash-preview",
|
||||
"gemini-2.5-pro",
|
||||
"grok-code-fast-1",
|
||||
],
|
||||
|
|
@ -246,10 +265,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"claude-haiku-4-5-20251001",
|
||||
],
|
||||
"deepseek": [
|
||||
"deepseek-v4-pro",
|
||||
"deepseek-v4-flash",
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
],
|
||||
"xiaomi": [
|
||||
"mimo-v2.5-pro",
|
||||
"mimo-v2.5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-flash",
|
||||
|
|
@ -301,6 +324,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"mimo-v2.5-pro",
|
||||
"mimo-v2.5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
|
|
@ -672,7 +697,7 @@ def get_nous_recommended_aux_model(
|
|||
# ---------------------------------------------------------------------------
|
||||
# Canonical provider list — single source of truth for provider identity.
|
||||
# Every code path that lists, displays, or iterates providers derives from
|
||||
# this list: hermes model, /model, /provider, list_authenticated_providers.
|
||||
# this list: hermes model, /model, list_authenticated_providers.
|
||||
#
|
||||
# Fields:
|
||||
# slug — internal provider ID (used in config.yaml, --provider flag)
|
||||
|
|
@ -692,7 +717,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
|||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
|
|
@ -1100,7 +1125,10 @@ def fetch_models_with_pricing(
|
|||
return _pricing_cache[cache_key]
|
||||
|
||||
url = cache_key.rstrip("/") + "/v1/models"
|
||||
headers: dict[str, str] = {"Accept": "application/json"}
|
||||
headers: dict[str, str] = {
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
|
|
@ -1674,7 +1702,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
|||
if normalized == "openai-codex":
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
|
||||
return get_codex_model_ids()
|
||||
# Pass the live OAuth access token so the picker matches whatever
|
||||
# ChatGPT lists for this account right now (new models appear without
|
||||
# a Hermes release). Falls back to the hardcoded catalog if no token
|
||||
# or the endpoint is unreachable.
|
||||
access_token = None
|
||||
try:
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
access_token = creds.get("api_key")
|
||||
except Exception:
|
||||
access_token = None
|
||||
return get_codex_model_ids(access_token=access_token)
|
||||
if normalized in {"copilot", "copilot-acp"}:
|
||||
try:
|
||||
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
|
||||
|
|
@ -1720,6 +1760,17 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
|||
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
|
||||
if live:
|
||||
return live
|
||||
if normalized == "openai":
|
||||
api_key = os.getenv("OPENAI_API_KEY", "").strip()
|
||||
if api_key:
|
||||
base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
|
||||
base = base_raw or "https://api.openai.com/v1"
|
||||
try:
|
||||
live = fetch_api_models(api_key, base)
|
||||
if live:
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
if normalized == "custom":
|
||||
base_url = _get_custom_base_url()
|
||||
if base_url:
|
||||
|
|
@ -1874,6 +1925,51 @@ def fetch_github_model_catalog(
|
|||
return None
|
||||
|
||||
|
||||
# ─── Copilot catalog context-window helpers ─────────────────────────────────
|
||||
|
||||
# Module-level cache: {model_id: max_prompt_tokens}
|
||||
_copilot_context_cache: dict[str, int] = {}
|
||||
_copilot_context_cache_time: float = 0.0
|
||||
_COPILOT_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def get_copilot_model_context(model_id: str, api_key: Optional[str] = None) -> Optional[int]:
|
||||
"""Look up max_prompt_tokens for a Copilot model from the live /models API.
|
||||
|
||||
Results are cached in-process for 1 hour to avoid repeated API calls.
|
||||
Returns the token limit or None if not found.
|
||||
"""
|
||||
global _copilot_context_cache, _copilot_context_cache_time
|
||||
|
||||
# Serve from cache if fresh
|
||||
if _copilot_context_cache and (time.time() - _copilot_context_cache_time < _COPILOT_CONTEXT_CACHE_TTL):
|
||||
if model_id in _copilot_context_cache:
|
||||
return _copilot_context_cache[model_id]
|
||||
# Cache is fresh but model not in it — don't re-fetch
|
||||
return None
|
||||
|
||||
# Fetch and populate cache
|
||||
catalog = fetch_github_model_catalog(api_key=api_key)
|
||||
if not catalog:
|
||||
return None
|
||||
|
||||
cache: dict[str, int] = {}
|
||||
for item in catalog:
|
||||
mid = str(item.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
caps = item.get("capabilities") or {}
|
||||
limits = caps.get("limits") or {}
|
||||
max_prompt = limits.get("max_prompt_tokens")
|
||||
if isinstance(max_prompt, int) and max_prompt > 0:
|
||||
cache[mid] = max_prompt
|
||||
|
||||
_copilot_context_cache = cache
|
||||
_copilot_context_cache_time = time.time()
|
||||
|
||||
return cache.get(model_id)
|
||||
|
||||
|
||||
def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
||||
normalized = (base_url or "").strip().rstrip("/").lower()
|
||||
return (
|
||||
|
|
@ -1907,6 +2003,7 @@ _COPILOT_MODEL_ALIASES = {
|
|||
"openai/o4-mini": "gpt-5-mini",
|
||||
"anthropic/claude-opus-4.6": "claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4": "claude-sonnet-4",
|
||||
"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
|
||||
# Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use
|
||||
|
|
@ -1916,10 +2013,12 @@ _COPILOT_MODEL_ALIASES = {
|
|||
# "model_not_supported". See issue #6879.
|
||||
"claude-opus-4-6": "claude-opus-4.6",
|
||||
"claude-sonnet-4-6": "claude-sonnet-4.6",
|
||||
"claude-sonnet-4-0": "claude-sonnet-4",
|
||||
"claude-sonnet-4-5": "claude-sonnet-4.5",
|
||||
"claude-haiku-4-5": "claude-haiku-4.5",
|
||||
"anthropic/claude-opus-4-6": "claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4-6": "claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4-0": "claude-sonnet-4",
|
||||
"anthropic/claude-sonnet-4-5": "claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4-5": "claude-haiku-4.5",
|
||||
}
|
||||
|
|
@ -2144,8 +2243,15 @@ def probe_api_models(
|
|||
api_key: Optional[str],
|
||||
base_url: Optional[str],
|
||||
timeout: float = 5.0,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
|
||||
"""Probe a ``/models`` endpoint with light URL heuristics.
|
||||
|
||||
For ``anthropic_messages`` mode, uses ``x-api-key`` and
|
||||
``anthropic-version`` headers (Anthropic's native auth) instead of
|
||||
``Authorization: Bearer``. The response shape (``data[].id``) is
|
||||
identical, so the same parser works for both.
|
||||
"""
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
return {
|
||||
|
|
@ -2177,7 +2283,10 @@ def probe_api_models(
|
|||
|
||||
tried: list[str] = []
|
||||
headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
|
||||
if api_key:
|
||||
if api_key and api_mode == "anthropic_messages":
|
||||
headers["x-api-key"] = api_key
|
||||
headers["anthropic-version"] = "2023-06-01"
|
||||
elif api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
if normalized.startswith(COPILOT_BASE_URL):
|
||||
headers.update(copilot_default_headers())
|
||||
|
|
@ -2219,7 +2328,10 @@ def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|||
base_url = AI_GATEWAY_BASE_URL
|
||||
|
||||
url = base_url.rstrip("/") + "/models"
|
||||
headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
|
||||
headers: dict[str, str] = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
|
|
@ -2239,13 +2351,14 @@ def fetch_api_models(
|
|||
api_key: Optional[str],
|
||||
base_url: Optional[str],
|
||||
timeout: float = 5.0,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> Optional[list[str]]:
|
||||
"""Fetch the list of available model IDs from the provider's ``/models`` endpoint.
|
||||
|
||||
Returns a list of model ID strings, or ``None`` if the endpoint could not
|
||||
be reached (network error, timeout, auth failure, etc.).
|
||||
"""
|
||||
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
||||
return probe_api_models(api_key, base_url, timeout=timeout, api_mode=api_mode).get("models")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -2373,6 +2486,7 @@ def validate_requested_model(
|
|||
*,
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Validate a ``/model`` value for the active provider.
|
||||
|
|
@ -2414,7 +2528,11 @@ def validate_requested_model(
|
|||
}
|
||||
|
||||
if normalized == "custom":
|
||||
probe = probe_api_models(api_key, base_url)
|
||||
# Try probing with correct auth for the api_mode.
|
||||
if api_mode == "anthropic_messages":
|
||||
probe = probe_api_models(api_key, base_url, api_mode=api_mode)
|
||||
else:
|
||||
probe = probe_api_models(api_key, base_url)
|
||||
api_models = probe.get("models")
|
||||
if api_models is not None:
|
||||
if requested_for_lookup in set(api_models):
|
||||
|
|
@ -2463,12 +2581,17 @@ def validate_requested_model(
|
|||
f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
|
||||
f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
|
||||
)
|
||||
if api_mode == "anthropic_messages":
|
||||
message += (
|
||||
"\n Many Anthropic-compatible proxies do not implement the Models API "
|
||||
"(GET /v1/models). The model name has been accepted without verification."
|
||||
)
|
||||
if probe.get("suggested_base_url"):
|
||||
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
|
||||
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": api_mode == "anthropic_messages",
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": message,
|
||||
}
|
||||
|
|
@ -2556,10 +2679,100 @@ def validate_requested_model(
|
|||
),
|
||||
}
|
||||
|
||||
# Native Anthropic provider: /v1/models requires x-api-key (or Bearer for
|
||||
# OAuth) plus anthropic-version headers. The generic OpenAI-style probe
|
||||
# below uses plain Bearer auth and 401s against Anthropic, so dispatch to
|
||||
# the native fetcher which handles both API keys and Claude-Code OAuth
|
||||
# tokens. (The api_mode=="anthropic_messages" branch below handles the
|
||||
# Messages-API transport case separately.)
|
||||
if normalized == "anthropic":
|
||||
anthropic_models = _fetch_anthropic_models()
|
||||
if anthropic_models is not None:
|
||||
if requested_for_lookup in set(anthropic_models):
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"message": None,
|
||||
}
|
||||
auto = get_close_matches(requested_for_lookup, anthropic_models, n=1, cutoff=0.9)
|
||||
if auto:
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"corrected_model": auto[0],
|
||||
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
||||
}
|
||||
suggestions = get_close_matches(requested, anthropic_models, n=3, cutoff=0.5)
|
||||
suggestion_text = ""
|
||||
if suggestions:
|
||||
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
||||
# Accept anyway — Anthropic sometimes gates newer/preview models
|
||||
# (e.g. snapshot IDs, early-access releases) behind accounts
|
||||
# even though they aren't listed on /v1/models.
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Note: `{requested}` was not found in Anthropic's /v1/models listing. "
|
||||
f"It may still work if you have early-access or snapshot IDs."
|
||||
f"{suggestion_text}"
|
||||
),
|
||||
}
|
||||
# _fetch_anthropic_models returned None — no token resolvable or
|
||||
# network failure. Fall through to the generic warning below.
|
||||
|
||||
# Anthropic Messages API: many proxies don't implement /v1/models.
|
||||
# Try probing with correct auth; if it fails, accept with a warning.
|
||||
if api_mode == "anthropic_messages":
|
||||
api_models = fetch_api_models(api_key, base_url, api_mode=api_mode)
|
||||
if api_models is not None:
|
||||
if requested_for_lookup in set(api_models):
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"message": None,
|
||||
}
|
||||
auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
|
||||
if auto:
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"corrected_model": auto[0],
|
||||
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
||||
}
|
||||
# Probe failed or model not found — accept anyway (proxy likely
|
||||
# doesn't implement the Anthropic Models API).
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Note: could not verify `{requested}` against this endpoint's "
|
||||
f"model listing. Many Anthropic-compatible proxies do not "
|
||||
f"implement GET /v1/models. The model name has been accepted "
|
||||
f"without verification."
|
||||
),
|
||||
}
|
||||
|
||||
# Probe the live API to check if the model actually exists
|
||||
api_models = fetch_api_models(api_key, base_url)
|
||||
|
||||
if api_models is not None:
|
||||
# Gemini's OpenAI-compat /v1beta/openai/models endpoint returns IDs
|
||||
# prefixed with "models/" (e.g. "models/gemini-2.5-flash") — native
|
||||
# Gemini-API convention. Our curated list and user input both use
|
||||
# the bare ID, so a direct set-membership check drops every known
|
||||
# Gemini model. Strip the prefix before comparison. See #12532.
|
||||
if normalized == "gemini":
|
||||
api_models = [
|
||||
m[len("models/"):] if isinstance(m, str) and m.startswith("models/") else m
|
||||
for m in api_models
|
||||
]
|
||||
if requested_for_lookup in set(api_models):
|
||||
# API confirmed the model exists
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
|||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
])
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -71,6 +71,14 @@ VALID_HOOKS: Set[str] = {
|
|||
"on_session_finalize",
|
||||
"on_session_reset",
|
||||
"subagent_stop",
|
||||
# Gateway pre-dispatch hook. Fired once per incoming MessageEvent
|
||||
# after the internal-event guard but BEFORE auth/pairing and agent
|
||||
# dispatch. Plugins may return a dict to influence flow:
|
||||
# {"action": "skip", "reason": "..."} -> drop message (no reply)
|
||||
# {"action": "rewrite", "text": "..."} -> replace event.text, continue
|
||||
# {"action": "allow"} / None -> normal dispatch
|
||||
# Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
|
||||
"pre_gateway_dispatch",
|
||||
}
|
||||
|
||||
ENTRY_POINTS_GROUP = "hermes_agent.plugins"
|
||||
|
|
|
|||
|
|
@ -116,6 +116,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
|||
transport="openai_chat",
|
||||
base_url_env_var="DASHSCOPE_BASE_URL",
|
||||
),
|
||||
"alibaba-coding-plan": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL",
|
||||
),
|
||||
"vercel": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
is_aggregator=True,
|
||||
|
|
@ -259,6 +263,9 @@ ALIASES: Dict[str, str] = {
|
|||
"aliyun": "alibaba",
|
||||
"qwen": "alibaba",
|
||||
"alibaba-cloud": "alibaba",
|
||||
"alibaba_coding": "alibaba-coding-plan",
|
||||
"alibaba-coding": "alibaba-coding-plan",
|
||||
"alibaba_coding_plan": "alibaba-coding-plan",
|
||||
|
||||
# google-gemini-cli (OAuth + Code Assist)
|
||||
"gemini-cli": "google-gemini-cli",
|
||||
|
|
|
|||
229
hermes_cli/pty_bridge.py
Normal file
229
hermes_cli/pty_bridge.py
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
"""PTY bridge for `hermes dashboard` chat tab.
|
||||
|
||||
Wraps a child process behind a pseudo-terminal so its ANSI output can be
|
||||
streamed to a browser-side terminal emulator (xterm.js) and typed
|
||||
keystrokes can be fed back in. The only caller today is the
|
||||
``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
|
||||
|
||||
Design constraints:
|
||||
|
||||
* **POSIX-only.** Hermes Agent supports Windows exclusively via WSL, which
|
||||
exposes a native POSIX PTY via ``openpty(3)``. Native Windows Python
|
||||
has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
|
||||
install/platform message so the dashboard can render a banner instead of
|
||||
crashing.
|
||||
* **Zero Node dependency on the server side.** We use :mod:`ptyprocess`,
|
||||
which is a pure-Python wrapper around the OS calls. The browser talks
|
||||
to the same ``hermes --tui`` binary it would launch from the CLI, so
|
||||
every TUI feature (slash popover, model picker, tool rows, markdown,
|
||||
skin engine, clarify/sudo/approval prompts) ships automatically.
|
||||
* **Byte-safe I/O.** Reads and writes go through the PTY master fd
|
||||
directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
|
||||
streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
|
||||
mid-read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import os
|
||||
import select
|
||||
import signal
|
||||
import struct
|
||||
import sys
|
||||
import termios
|
||||
import time
|
||||
from typing import Optional, Sequence
|
||||
|
||||
try:
|
||||
import ptyprocess # type: ignore
|
||||
_PTY_AVAILABLE = not sys.platform.startswith("win")
|
||||
except ImportError: # pragma: no cover - dev env without ptyprocess
|
||||
ptyprocess = None # type: ignore
|
||||
_PTY_AVAILABLE = False
|
||||
|
||||
|
||||
__all__ = ["PtyBridge", "PtyUnavailableError"]
|
||||
|
||||
|
||||
class PtyUnavailableError(RuntimeError):
|
||||
"""Raised when a PTY cannot be created on this platform.
|
||||
|
||||
Today this means native Windows (no ConPTY bindings) or a dev
|
||||
environment missing the ``ptyprocess`` dependency. The dashboard
|
||||
surfaces the message to the user as a chat-tab banner.
|
||||
"""
|
||||
|
||||
|
||||
class PtyBridge:
|
||||
"""Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
|
||||
|
||||
Not thread-safe. A single bridge is owned by the WebSocket handler
|
||||
that spawned it; the reader runs in an executor thread while writes
|
||||
happen on the event-loop thread. Both sides are OK because the
|
||||
kernel PTY is the actual synchronization point — we never call
|
||||
:mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
|
||||
``os.write`` on the master fd, which is safe.
|
||||
"""
|
||||
|
||||
def __init__(self, proc: "ptyprocess.PtyProcess"): # type: ignore[name-defined]
|
||||
self._proc = proc
|
||||
self._fd: int = proc.fd
|
||||
self._closed = False
|
||||
|
||||
# -- lifecycle --------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""True if a PTY can be spawned on this platform."""
|
||||
return bool(_PTY_AVAILABLE)
|
||||
|
||||
@classmethod
|
||||
def spawn(
|
||||
cls,
|
||||
argv: Sequence[str],
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
env: Optional[dict] = None,
|
||||
cols: int = 80,
|
||||
rows: int = 24,
|
||||
) -> "PtyBridge":
|
||||
"""Spawn ``argv`` behind a new PTY and return a bridge.
|
||||
|
||||
Raises :class:`PtyUnavailableError` if the platform can't host a
|
||||
PTY. Raises :class:`FileNotFoundError` or :class:`OSError` for
|
||||
ordinary exec failures (missing binary, bad cwd, etc.).
|
||||
"""
|
||||
if not _PTY_AVAILABLE:
|
||||
if sys.platform.startswith("win"):
|
||||
raise PtyUnavailableError(
|
||||
"Pseudo-terminals are unavailable on this platform. "
|
||||
"Hermes Agent supports Windows only via WSL."
|
||||
)
|
||||
if ptyprocess is None:
|
||||
raise PtyUnavailableError(
|
||||
"The `ptyprocess` package is missing. "
|
||||
"Install with: pip install ptyprocess "
|
||||
"(or pip install -e '.[pty]')."
|
||||
)
|
||||
raise PtyUnavailableError("Pseudo-terminals are unavailable.")
|
||||
# Let caller-supplied env fully override inheritance; if they pass
|
||||
# None we inherit the server's env (same semantics as subprocess).
|
||||
spawn_env = os.environ.copy() if env is None else env
|
||||
proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr]
|
||||
list(argv),
|
||||
cwd=cwd,
|
||||
env=spawn_env,
|
||||
dimensions=(rows, cols),
|
||||
)
|
||||
return cls(proc)
|
||||
|
||||
@property
|
||||
def pid(self) -> int:
|
||||
return int(self._proc.pid)
|
||||
|
||||
def is_alive(self) -> bool:
|
||||
if self._closed:
|
||||
return False
|
||||
try:
|
||||
return bool(self._proc.isalive())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# -- I/O --------------------------------------------------------------
|
||||
|
||||
def read(self, timeout: float = 0.2) -> Optional[bytes]:
|
||||
"""Read up to 64 KiB of raw bytes from the PTY master.
|
||||
|
||||
Returns:
|
||||
* bytes — zero or more bytes of child output
|
||||
* empty bytes (``b""``) — no data available within ``timeout``
|
||||
* None — child has exited and the master fd is at EOF
|
||||
|
||||
Never blocks longer than ``timeout`` seconds. Safe to call after
|
||||
:meth:`close`; returns ``None`` in that case.
|
||||
"""
|
||||
if self._closed:
|
||||
return None
|
||||
try:
|
||||
readable, _, _ = select.select([self._fd], [], [], timeout)
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
if not readable:
|
||||
return b""
|
||||
try:
|
||||
data = os.read(self._fd, 65536)
|
||||
except OSError as exc:
|
||||
# EIO on Linux = slave side closed. EBADF = already closed.
|
||||
if exc.errno in (errno.EIO, errno.EBADF):
|
||||
return None
|
||||
raise
|
||||
if not data:
|
||||
return None
|
||||
return data
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
"""Write raw bytes to the PTY master (i.e. the child's stdin)."""
|
||||
if self._closed or not data:
|
||||
return
|
||||
# os.write can return a short write under load; loop until drained.
|
||||
view = memoryview(data)
|
||||
while view:
|
||||
try:
|
||||
n = os.write(self._fd, view)
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
|
||||
return
|
||||
raise
|
||||
if n <= 0:
|
||||
return
|
||||
view = view[n:]
|
||||
|
||||
def resize(self, cols: int, rows: int) -> None:
|
||||
"""Forward a terminal resize to the child via ``TIOCSWINSZ``."""
|
||||
if self._closed:
|
||||
return
|
||||
# struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
|
||||
winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
|
||||
try:
|
||||
fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# -- teardown ---------------------------------------------------------
|
||||
|
||||
def close(self) -> None:
|
||||
"""Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
|
||||
|
||||
Idempotent. Reaping the child is important so we don't leak
|
||||
zombies across the lifetime of the dashboard process.
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# SIGHUP is the conventional "your terminal went away" signal.
|
||||
# We escalate if the child ignores it.
|
||||
for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
|
||||
if not self._proc.isalive():
|
||||
break
|
||||
try:
|
||||
self._proc.kill(sig)
|
||||
except Exception:
|
||||
pass
|
||||
deadline = time.monotonic() + 0.5
|
||||
while self._proc.isalive() and time.monotonic() < deadline:
|
||||
time.sleep(0.02)
|
||||
|
||||
try:
|
||||
self._proc.close(force=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Context-manager sugar — handy in tests and ad-hoc scripts.
|
||||
def __enter__(self) -> "PtyBridge":
|
||||
return self
|
||||
|
||||
def __exit__(self, *_exc) -> None:
|
||||
self.close()
|
||||
|
|
@ -36,6 +36,29 @@ def _normalize_custom_provider_name(value: str) -> str:
|
|||
return value.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def _loopback_hostname(host: str) -> bool:
|
||||
h = (host or "").lower().rstrip(".")
|
||||
return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}
|
||||
|
||||
|
||||
def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool:
|
||||
"""Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution.
|
||||
|
||||
GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a
|
||||
previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``,
|
||||
so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions.
|
||||
"""
|
||||
cfg_provider_norm = (cfg_provider or "").strip().lower()
|
||||
bu = (cfg_base_url or "").strip()
|
||||
if not bu:
|
||||
return False
|
||||
if cfg_provider_norm == "custom":
|
||||
return True
|
||||
if base_url_host_matches(bu, "openrouter.ai"):
|
||||
return False
|
||||
return _loopback_hostname(base_url_hostname(bu))
|
||||
|
||||
|
||||
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
"""Auto-detect api_mode from the resolved base URL.
|
||||
|
||||
|
|
@ -160,8 +183,16 @@ def _resolve_runtime_from_pool_entry(
|
|||
requested_provider: str,
|
||||
model_cfg: Optional[Dict[str, Any]] = None,
|
||||
pool: Optional[CredentialPool] = None,
|
||||
target_model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
model_cfg = model_cfg or _get_model_config()
|
||||
# When the caller is resolving for a specific target model (e.g. a /model
|
||||
# mid-session switch), prefer that over the persisted model.default. This
|
||||
# prevents api_mode being computed from a stale config default that no
|
||||
# longer matches the model actually being used — the bug that caused
|
||||
# opencode-zen /v1 to be stripped for chat_completions requests when
|
||||
# config.default was still a Claude model.
|
||||
effective_model = (target_model or model_cfg.get("default") or "")
|
||||
base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
|
||||
api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
|
||||
api_mode = "chat_completions"
|
||||
|
|
@ -207,7 +238,7 @@ def _resolve_runtime_from_pool_entry(
|
|||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
|
||||
api_mode = opencode_model_api_mode(provider, effective_model)
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
|
||||
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
|
||||
|
|
@ -323,12 +354,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
|||
# Found match by provider key
|
||||
base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
|
||||
if base_url:
|
||||
return {
|
||||
result = {
|
||||
"name": entry.get("name", ep_name),
|
||||
"base_url": base_url.strip(),
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
return result
|
||||
# Also check the 'name' field if present
|
||||
display_name = entry.get("name", "")
|
||||
if display_name:
|
||||
|
|
@ -337,12 +372,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
|||
# Found match by display name
|
||||
base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
|
||||
if base_url:
|
||||
return {
|
||||
result = {
|
||||
"name": display_name,
|
||||
"base_url": base_url.strip(),
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
return result
|
||||
|
||||
# Fall back to custom_providers: list (legacy format)
|
||||
custom_providers = config.get("custom_providers")
|
||||
|
|
@ -464,6 +503,7 @@ def _resolve_openrouter_runtime(
|
|||
cfg_provider = cfg_provider.strip().lower()
|
||||
|
||||
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
|
||||
env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()
|
||||
|
||||
# Use config base_url when available and the provider context matches.
|
||||
# OPENAI_BASE_URL env var is no longer consulted — config.yaml is
|
||||
|
|
@ -473,11 +513,14 @@ def _resolve_openrouter_runtime(
|
|||
if requested_norm == "auto":
|
||||
if not cfg_provider or cfg_provider == "auto":
|
||||
use_config_base_url = True
|
||||
elif requested_norm == "custom" and cfg_provider == "custom":
|
||||
elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom(
|
||||
cfg_base_url, cfg_provider
|
||||
):
|
||||
use_config_base_url = True
|
||||
|
||||
base_url = (
|
||||
(explicit_base_url or "").strip()
|
||||
or env_custom_base_url
|
||||
or (cfg_base_url.strip() if use_config_base_url else "")
|
||||
or env_openrouter_base_url
|
||||
or OPENROUTER_BASE_URL
|
||||
|
|
@ -689,8 +732,18 @@ def resolve_runtime_provider(
|
|||
requested: Optional[str] = None,
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
target_model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve runtime provider credentials for agent execution."""
|
||||
"""Resolve runtime provider credentials for agent execution.
|
||||
|
||||
target_model: Optional override for model_cfg.get("default") when
|
||||
computing provider-specific api_mode (e.g. OpenCode Zen/Go where different
|
||||
models route through different API surfaces). Callers performing an
|
||||
explicit mid-session model switch should pass the new model here so
|
||||
api_mode is derived from the model they are switching TO, not the stale
|
||||
persisted default. Other callers can leave it None to preserve existing
|
||||
behavior (api_mode derived from config).
|
||||
"""
|
||||
requested_provider = resolve_requested_provider(requested)
|
||||
|
||||
custom_runtime = _resolve_named_custom_runtime(
|
||||
|
|
@ -772,6 +825,7 @@ def resolve_runtime_provider(
|
|||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
pool=pool,
|
||||
target_model=target_model,
|
||||
)
|
||||
|
||||
if provider == "nous":
|
||||
|
|
@ -990,7 +1044,11 @@ def resolve_runtime_provider(
|
|||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
|
||||
# Prefer the target_model from the caller (explicit mid-session
|
||||
# switch) over the stale model.default; see _resolve_runtime_from_pool_entry
|
||||
# for the same rationale.
|
||||
_effective = target_model or model_cfg.get("default", "")
|
||||
api_mode = opencode_model_api_mode(provider, _effective)
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
|||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
|
||||
|
|
@ -500,6 +500,15 @@ def _print_setup_summary(config: dict, hermes_home):
|
|||
if get_env_value("HASS_TOKEN"):
|
||||
tool_status.append(("Smart Home (Home Assistant)", True, None))
|
||||
|
||||
# Spotify (OAuth via hermes auth spotify — check auth.json, not env vars)
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
_spotify_state = get_provider_auth_state("spotify") or {}
|
||||
if _spotify_state.get("access_token") or _spotify_state.get("refresh_token"):
|
||||
tool_status.append(("Spotify (PKCE OAuth)", True, None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Skills Hub
|
||||
if get_env_value("GITHUB_TOKEN"):
|
||||
tool_status.append(("Skills Hub (GitHub)", True, None))
|
||||
|
|
|
|||
|
|
@ -164,19 +164,26 @@ def show_status(args):
|
|||
qwen_status = {}
|
||||
|
||||
nous_logged_in = bool(nous_status.get("logged_in"))
|
||||
nous_error = nous_status.get("error")
|
||||
nous_label = "logged in" if nous_logged_in else "not logged in (run: hermes auth add nous --type oauth)"
|
||||
print(
|
||||
f" {'Nous Portal':<12} {check_mark(nous_logged_in)} "
|
||||
f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
|
||||
f"{nous_label}"
|
||||
)
|
||||
if nous_logged_in:
|
||||
portal_url = nous_status.get("portal_base_url") or "(unknown)"
|
||||
access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
|
||||
key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
|
||||
refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
|
||||
portal_url = nous_status.get("portal_base_url") or "(unknown)"
|
||||
access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
|
||||
key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
|
||||
refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
|
||||
if nous_logged_in or portal_url != "(unknown)" or nous_error:
|
||||
print(f" Portal URL: {portal_url}")
|
||||
if nous_logged_in or nous_status.get("access_expires_at"):
|
||||
print(f" Access exp: {access_exp}")
|
||||
if nous_logged_in or nous_status.get("agent_key_expires_at"):
|
||||
print(f" Key exp: {key_exp}")
|
||||
if nous_logged_in or nous_status.get("has_refresh_token"):
|
||||
print(f" Refresh: {refresh_label}")
|
||||
if nous_error and not nous_logged_in:
|
||||
print(f" Error: {nous_error}")
|
||||
|
||||
codex_logged_in = bool(codex_status.get("logged_in"))
|
||||
print(
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ TIPS = [
|
|||
|
||||
# --- Tools & Capabilities ---
|
||||
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
|
||||
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
|
||||
"delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.",
|
||||
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
|
||||
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
|
||||
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
|
||||
|
|
@ -289,6 +289,7 @@ TIPS = [
|
|||
"When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
|
||||
"agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
|
||||
"agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
|
||||
"agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
|
||||
"The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
|
||||
"Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
|
||||
"The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
|
||||
|
|
|
|||
|
|
@ -67,12 +67,13 @@ CONFIGURABLE_TOOLSETS = [
|
|||
("messaging", "📨 Cross-Platform Messaging", "send_message"),
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
|
|
@ -361,6 +362,18 @@ TOOL_CATEGORIES = {
|
|||
},
|
||||
],
|
||||
},
|
||||
"spotify": {
|
||||
"name": "Spotify",
|
||||
"icon": "🎵",
|
||||
"providers": [
|
||||
{
|
||||
"name": "Spotify Web API",
|
||||
"tag": "PKCE OAuth — opens the setup wizard",
|
||||
"env_vars": [],
|
||||
"post_setup": "spotify",
|
||||
},
|
||||
],
|
||||
},
|
||||
"rl": {
|
||||
"name": "RL Training",
|
||||
"icon": "🧪",
|
||||
|
|
@ -461,6 +474,35 @@ def _run_post_setup(post_setup_key: str):
|
|||
_print_warning(" kittentts install timed out (>5min)")
|
||||
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
|
||||
elif post_setup_key == "spotify":
|
||||
# Run the full `hermes auth spotify` flow — if the user has no
|
||||
# client_id yet, this drops them into the interactive wizard
|
||||
# (opens the Spotify dashboard, prompts for client_id, persists
|
||||
# to ~/.hermes/.env), then continues straight into PKCE. If they
|
||||
# already have an app, it skips the wizard and just does OAuth.
|
||||
from types import SimpleNamespace
|
||||
try:
|
||||
from hermes_cli.auth import login_spotify_command
|
||||
except Exception as exc:
|
||||
_print_warning(f" Could not load Spotify auth: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
return
|
||||
_print_info(" Starting Spotify login...")
|
||||
try:
|
||||
login_spotify_command(SimpleNamespace(
|
||||
client_id=None, redirect_uri=None, scope=None,
|
||||
no_browser=False, timeout=None,
|
||||
))
|
||||
_print_success(" Spotify authenticated")
|
||||
except SystemExit as exc:
|
||||
# User aborted the wizard, or OAuth failed — don't fail the
|
||||
# toolset enable; they can retry with `hermes auth spotify`.
|
||||
_print_warning(f" Spotify login did not complete: {exc}")
|
||||
_print_info(" Run later: hermes auth spotify")
|
||||
except Exception as exc:
|
||||
_print_warning(f" Spotify login failed: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
|
||||
elif post_setup_key == "rl_training":
|
||||
try:
|
||||
__import__("tinker_atropos")
|
||||
|
|
@ -590,7 +632,10 @@ def _get_platform_tools(
|
|||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled.
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled, or
|
||||
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
|
||||
# shipped as a bundled plugin but user must opt in via `hermes tools`
|
||||
# so we don't ship 7 Spotify tool schemas to users who don't use it).
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
|
|
@ -602,6 +647,9 @@ def _get_platform_tools(
|
|||
if pts in toolset_names:
|
||||
# Explicitly listed in config — enabled
|
||||
enabled_toolsets.add(pts)
|
||||
elif pts in _DEFAULT_OFF_TOOLSETS:
|
||||
# Opt-in plugin toolset — stay off until user picks it
|
||||
continue
|
||||
elif pts not in known_for_platform:
|
||||
# New plugin not yet seen by hermes tools — default enabled
|
||||
enabled_toolsets.add(pts)
|
||||
|
|
|
|||
548
hermes_cli/voice.py
Normal file
548
hermes_cli/voice.py
Normal file
|
|
@ -0,0 +1,548 @@
|
|||
"""Process-wide voice recording + TTS API for the TUI gateway.
|
||||
|
||||
Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
|
||||
(text-to-speech) behind idempotent, stateful entry points that the gateway's
|
||||
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
|
||||
call from a dedicated thread. The gateway imports this module lazily so that
|
||||
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
|
||||
an ``ImportError`` at call time, not at startup.
|
||||
|
||||
Two usage modes are exposed:
|
||||
|
||||
* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
|
||||
manually-bounded capture used when the caller drives the start/stop pair
|
||||
explicitly.
|
||||
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
|
||||
the classic CLI voice mode: recording auto-stops on silence, transcribes,
|
||||
hands the result to a callback, and then auto-restarts for the next turn.
|
||||
Three consecutive no-speech cycles stop the loop and fire
|
||||
``on_silent_limit`` so the UI can turn the mode off.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from tools.voice_mode import (
|
||||
create_audio_recorder,
|
||||
is_whisper_hallucination,
|
||||
play_audio_file,
|
||||
transcribe_recording,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _debug(msg: str) -> None:
|
||||
"""Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
|
||||
|
||||
Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
|
||||
which createGatewayEventHandler shows as an Activity line — exactly
|
||||
what we need to diagnose "why didn't the loop auto-restart?" in the
|
||||
user's real terminal without shipping a separate debug RPC.
|
||||
|
||||
Any OSError / BrokenPipeError is swallowed because this fires from
|
||||
background threads (silence callback, TTS daemon, beep) where a
|
||||
broken stderr pipe must not kill the whole gateway — the main
|
||||
command pipe (stdin+stdout) is what actually matters.
|
||||
"""
|
||||
if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
|
||||
return
|
||||
try:
|
||||
print(f"[voice] {msg}", file=sys.stderr, flush=True)
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _beeps_enabled() -> bool:
|
||||
"""CLI parity: voice.beep_enabled in config.yaml (default True)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
voice_cfg = load_config().get("voice", {})
|
||||
if isinstance(voice_cfg, dict):
|
||||
return bool(voice_cfg.get("beep_enabled", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _play_beep(frequency: int, count: int = 1) -> None:
|
||||
"""Audible cue matching cli.py's record/stop beeps.
|
||||
|
||||
880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
|
||||
660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
|
||||
Best-effort — sounddevice failures are silently swallowed so the
|
||||
voice loop never breaks because a speaker was unavailable.
|
||||
"""
|
||||
if not _beeps_enabled():
|
||||
return
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
|
||||
play_beep(frequency=frequency, count=count)
|
||||
except Exception as e:
|
||||
_debug(f"beep {frequency}Hz failed: {e}")
|
||||
|
||||
# ── Push-to-talk state ───────────────────────────────────────────────
|
||||
_recorder = None
|
||||
_recorder_lock = threading.Lock()
|
||||
|
||||
# ── Continuous (VAD) state ───────────────────────────────────────────
|
||||
_continuous_lock = threading.Lock()
|
||||
_continuous_active = False
|
||||
_continuous_recorder: Any = None
|
||||
|
||||
# ── TTS-vs-STT feedback guard ────────────────────────────────────────
|
||||
# When TTS plays the agent reply over the speakers, the live microphone
|
||||
# picks it up and transcribes the agent's own voice as user input — an
|
||||
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
|
||||
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
|
||||
# playing, set while silent. _continuous_on_silence waits on it before
|
||||
# re-arming the recorder, and speak_text itself cancels any live capture
|
||||
# before starting playback so the tail of the previous utterance doesn't
|
||||
# leak into the mic.
|
||||
_tts_playing = threading.Event()
|
||||
_tts_playing.set() # initially "not playing"
|
||||
_continuous_on_transcript: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_status: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
|
||||
_continuous_no_speech_count = 0
|
||||
_CONTINUOUS_NO_SPEECH_LIMIT = 3
|
||||
|
||||
|
||||
# ── Push-to-talk API ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_recording() -> None:
|
||||
"""Begin capturing from the default input device (push-to-talk).
|
||||
|
||||
Idempotent — calling again while a recording is in progress is a no-op.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
if _recorder is not None and getattr(_recorder, "is_recording", False):
|
||||
return
|
||||
rec = create_audio_recorder()
|
||||
rec.start()
|
||||
_recorder = rec
|
||||
|
||||
|
||||
def stop_and_transcribe() -> Optional[str]:
|
||||
"""Stop the active push-to-talk recording, transcribe, return text.
|
||||
|
||||
Returns ``None`` when no recording is active, when the microphone
|
||||
captured no speech, or when Whisper returned a known hallucination.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
rec = _recorder
|
||||
_recorder = None
|
||||
|
||||
if rec is None:
|
||||
return None
|
||||
|
||||
wav_path = rec.stop()
|
||||
if not wav_path:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
except Exception as e:
|
||||
logger.warning("voice transcription failed: %s", e)
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# transcribe_recording returns {"success": bool, "transcript": str, ...}
|
||||
# — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
|
||||
if not result.get("success"):
|
||||
return None
|
||||
text = (result.get("transcript") or "").strip()
|
||||
if not text or is_whisper_hallucination(text):
|
||||
return None
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Continuous (VAD) API ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_continuous(
|
||||
on_transcript: Callable[[str], None],
|
||||
on_status: Optional[Callable[[str], None]] = None,
|
||||
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||
silence_threshold: int = 200,
|
||||
silence_duration: float = 3.0,
|
||||
) -> None:
|
||||
"""Start a VAD-driven continuous recording loop.
|
||||
|
||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||
transcribed successfully, then auto-restarts. After
|
||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
||||
so the UI can reflect "voice off". Idempotent — calling while already
|
||||
active is a no-op.
|
||||
|
||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||
``"idle"`` so the UI can show a live indicator.
|
||||
"""
|
||||
global _continuous_active, _continuous_recorder
|
||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if _continuous_active:
|
||||
_debug("start_continuous: already active — no-op")
|
||||
return
|
||||
_continuous_active = True
|
||||
_continuous_on_transcript = on_transcript
|
||||
_continuous_on_status = on_status
|
||||
_continuous_on_silent_limit = on_silent_limit
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if _continuous_recorder is None:
|
||||
_continuous_recorder = create_audio_recorder()
|
||||
|
||||
_continuous_recorder._silence_threshold = silence_threshold
|
||||
_continuous_recorder._silence_duration = silence_duration
|
||||
rec = _continuous_recorder
|
||||
|
||||
_debug(
|
||||
f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
|
||||
)
|
||||
|
||||
# CLI parity: single 880 Hz beep *before* opening the stream — placing
|
||||
# the beep after stream.start() on macOS triggers a CoreAudio conflict
|
||||
# (cli.py:7528 comment).
|
||||
_play_beep(frequency=880, count=1)
|
||||
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to start continuous recording: %s", e)
|
||||
_debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
raise
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def stop_continuous() -> None:
|
||||
"""Stop the active continuous loop and release the microphone.
|
||||
|
||||
Idempotent — calling while not active is a no-op. Any in-flight
|
||||
transcription completes but its result is discarded (the callback
|
||||
checks ``_continuous_active`` before firing).
|
||||
"""
|
||||
global _continuous_active, _continuous_on_transcript
|
||||
global _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_recorder, _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
return
|
||||
_continuous_active = False
|
||||
rec = _continuous_recorder
|
||||
on_status = _continuous_on_status
|
||||
_continuous_on_transcript = None
|
||||
_continuous_on_status = None
|
||||
_continuous_on_silent_limit = None
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if rec is not None:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
|
||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||
# silence-auto-stop path plays).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def is_continuous_active() -> bool:
|
||||
"""Whether a continuous voice loop is currently running."""
|
||||
with _continuous_lock:
|
||||
return _continuous_active
|
||||
|
||||
|
||||
def _continuous_on_silence() -> None:
|
||||
"""AudioRecorder silence callback — runs in a daemon thread.
|
||||
|
||||
Stops the current capture, transcribes, delivers the text via
|
||||
``on_transcript``, and — if the loop is still active — starts the
|
||||
next capture. Three consecutive silent cycles end the loop.
|
||||
"""
|
||||
global _continuous_active, _continuous_no_speech_count
|
||||
|
||||
_debug("_continuous_on_silence: fired")
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: loop inactive — abort")
|
||||
return
|
||||
rec = _continuous_recorder
|
||||
on_transcript = _continuous_on_transcript
|
||||
on_status = _continuous_on_status
|
||||
on_silent_limit = _continuous_on_silent_limit
|
||||
|
||||
if rec is None:
|
||||
_debug("_continuous_on_silence: no recorder — abort")
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("transcribing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
wav_path = rec.stop()
|
||||
# Peak RMS is the critical diagnostic when stop() returns None despite
|
||||
# the VAD firing — tells us at a glance whether the mic was too quiet
|
||||
# for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
|
||||
peak_rms = getattr(rec, "_peak_rms", -1)
|
||||
_debug(
|
||||
f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
|
||||
)
|
||||
|
||||
# CLI parity: double 660 Hz beep after the stream stops (safe from the
|
||||
# CoreAudio conflict that blocks pre-start beeps).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
transcript: Optional[str] = None
|
||||
|
||||
if wav_path:
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
# transcribe_recording returns {"success": bool, "transcript": str,
|
||||
# "error": str?} — NOT {"text": str}. Using the wrong key silently
|
||||
# produced empty transcripts even when Groq/local STT returned fine,
|
||||
# which masqueraded as "not hearing the user" to the caller.
|
||||
success = bool(result.get("success"))
|
||||
text = (result.get("transcript") or "").strip()
|
||||
err = result.get("error")
|
||||
_debug(
|
||||
f"_continuous_on_silence: transcribe -> success={success} "
|
||||
f"text={text!r} err={err!r}"
|
||||
)
|
||||
if success and text and not is_whisper_hallucination(text):
|
||||
transcript = text
|
||||
except Exception as e:
|
||||
logger.warning("continuous transcription failed: %s", e)
|
||||
_debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
# User stopped us while we were transcribing — discard.
|
||||
_debug("_continuous_on_silence: stopped during transcribe — no restart")
|
||||
return
|
||||
if transcript:
|
||||
_continuous_no_speech_count = 0
|
||||
else:
|
||||
_continuous_no_speech_count += 1
|
||||
should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
|
||||
no_speech = _continuous_no_speech_count
|
||||
|
||||
if transcript and on_transcript:
|
||||
try:
|
||||
on_transcript(transcript)
|
||||
except Exception as e:
|
||||
logger.warning("on_transcript callback raised: %s", e)
|
||||
|
||||
if should_halt:
|
||||
_debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
_continuous_no_speech_count = 0
|
||||
if on_silent_limit:
|
||||
try:
|
||||
on_silent_limit()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rec.cancel()
|
||||
except Exception:
|
||||
pass
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
|
||||
# finish before re-arming the mic, then leave a small gap to avoid
|
||||
# catching the tail of the speaker output. Without this the voice
|
||||
# loop becomes a feedback loop — the agent's spoken reply lands
|
||||
# back in the mic and gets re-submitted.
|
||||
if not _tts_playing.is_set():
|
||||
_debug("_continuous_on_silence: waiting for TTS to finish")
|
||||
_tts_playing.wait(timeout=60)
|
||||
import time as _time
|
||||
_time.sleep(0.3)
|
||||
|
||||
# User may have stopped the loop during the wait.
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||
return
|
||||
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── TTS API ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def speak_text(text: str) -> None:
|
||||
"""Synthesize ``text`` with the configured TTS provider and play it.
|
||||
|
||||
Mirrors cli.py:_voice_speak_response exactly — same markdown strip
|
||||
pipeline, same 4000-char cap, same explicit mp3 output path, same
|
||||
MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
|
||||
of both extensions. Keeping these in sync means a voice-mode TTS
|
||||
session in the TUI sounds identical to one in the classic CLI.
|
||||
|
||||
While playback is in flight the module-level _tts_playing Event is
|
||||
cleared so the continuous-recording loop knows to wait before
|
||||
re-arming the mic (otherwise the agent's spoken reply feedback-loops
|
||||
through the microphone and the agent ends up replying to itself).
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
# Cancel any live capture before we open the speakers — otherwise the
|
||||
# last ~200ms of the user's turn tail + the first syllables of our TTS
|
||||
# both end up in the next recording window. The continuous loop will
|
||||
# re-arm itself after _tts_playing flips back (see _continuous_on_silence).
|
||||
paused_recording = False
|
||||
with _continuous_lock:
|
||||
if (
|
||||
_continuous_active
|
||||
and _continuous_recorder is not None
|
||||
and getattr(_continuous_recorder, "is_recording", False)
|
||||
):
|
||||
try:
|
||||
_continuous_recorder.cancel()
|
||||
paused_recording = True
|
||||
except Exception as e:
|
||||
logger.warning("failed to pause recorder for TTS: %s", e)
|
||||
|
||||
_tts_playing.clear()
|
||||
_debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
|
||||
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
tts_text = text[:4000] if len(text) > 4000 else text
|
||||
tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks
|
||||
tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text
|
||||
tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs
|
||||
tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold
|
||||
tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic
|
||||
tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code
|
||||
tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers
|
||||
tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets
|
||||
tts_text = re.sub(r'---+', '', tts_text) # horizontal rules
|
||||
tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines
|
||||
tts_text = tts_text.strip()
|
||||
if not tts_text:
|
||||
return
|
||||
|
||||
# MP3 output path, pre-chosen so we can play the MP3 directly even
|
||||
# when text_to_speech_tool auto-converts to OGG for messaging
|
||||
# platforms. afplay's OGG support is flaky, MP3 always works.
|
||||
os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
|
||||
mp3_path = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
"hermes_voice",
|
||||
f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
|
||||
)
|
||||
|
||||
_debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
|
||||
text_to_speech_tool(text=tts_text, output_path=mp3_path)
|
||||
|
||||
if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
|
||||
_debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
|
||||
play_audio_file(mp3_path)
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
|
||||
if os.path.isfile(ogg_path):
|
||||
os.unlink(ogg_path)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
_debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
|
||||
except Exception as e:
|
||||
logger.warning("Voice TTS playback failed: %s", e)
|
||||
_debug(f"speak_text raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
_tts_playing.set()
|
||||
_debug("speak_text: TTS done")
|
||||
|
||||
# Re-arm the mic so the user can answer without pressing Ctrl+B.
|
||||
# Small delay lets the OS flush speaker output and afplay fully
|
||||
# release the audio device before sounddevice re-opens the input.
|
||||
if paused_recording:
|
||||
time.sleep(0.3)
|
||||
with _continuous_lock:
|
||||
if _continuous_active and _continuous_recorder is not None:
|
||||
try:
|
||||
_continuous_recorder.start(
|
||||
on_silence_stop=_continuous_on_silence
|
||||
)
|
||||
_debug("speak_text: recording resumed after TTS")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"failed to resume recorder after TTS: %s", e
|
||||
)
|
||||
|
|
@ -49,7 +49,7 @@ from hermes_cli.config import (
|
|||
from gateway.status import get_running_pid, read_runtime_status
|
||||
|
||||
try:
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
|
@ -71,6 +71,11 @@ app = FastAPI(title="Hermes Agent", version=__version__)
|
|||
# Injected into the SPA HTML so only the legitimate web UI can use it.
|
||||
# ---------------------------------------------------------------------------
|
||||
_SESSION_TOKEN = secrets.token_urlsafe(32)
|
||||
_SESSION_HEADER_NAME = "X-Hermes-Session-Token"
|
||||
|
||||
# In-browser Chat tab (/chat, /api/pty, …). Off unless ``hermes dashboard --tui``
|
||||
# or HERMES_DASHBOARD_TUI=1. Set from :func:`start_server`.
|
||||
_DASHBOARD_EMBEDDED_CHAT_ENABLED = False
|
||||
|
||||
# Simple rate limiter for the reveal endpoint
|
||||
_reveal_timestamps: List[float] = []
|
||||
|
|
@ -104,14 +109,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
|
|||
})
|
||||
|
||||
|
||||
def _require_token(request: Request) -> None:
|
||||
"""Validate the ephemeral session token. Raises 401 on mismatch.
|
||||
def _has_valid_session_token(request: Request) -> bool:
|
||||
"""True if the request carries a valid dashboard session token.
|
||||
|
||||
Uses ``hmac.compare_digest`` to prevent timing side-channels.
|
||||
The dedicated session header avoids collisions with reverse proxies that
|
||||
already use ``Authorization`` (for example Caddy ``basic_auth``). We still
|
||||
accept the legacy Bearer path for backward compatibility with older
|
||||
dashboard bundles.
|
||||
"""
|
||||
session_header = request.headers.get(_SESSION_HEADER_NAME, "")
|
||||
if session_header and hmac.compare_digest(
|
||||
session_header.encode(),
|
||||
_SESSION_TOKEN.encode(),
|
||||
):
|
||||
return True
|
||||
|
||||
auth = request.headers.get("authorization", "")
|
||||
expected = f"Bearer {_SESSION_TOKEN}"
|
||||
if not hmac.compare_digest(auth.encode(), expected.encode()):
|
||||
return hmac.compare_digest(auth.encode(), expected.encode())
|
||||
|
||||
|
||||
def _require_token(request: Request) -> None:
|
||||
"""Validate the ephemeral session token. Raises 401 on mismatch."""
|
||||
if not _has_valid_session_token(request):
|
||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
|
|
@ -205,9 +225,7 @@ async def auth_middleware(request: Request, call_next):
|
|||
"""Require the session token on all /api/ routes except the public list."""
|
||||
path = request.url.path
|
||||
if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
|
||||
auth = request.headers.get("authorization", "")
|
||||
expected = f"Bearer {_SESSION_TOKEN}"
|
||||
if not hmac.compare_digest(auth.encode(), expected.encode()):
|
||||
if not _has_valid_session_token(request):
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={"detail": "Unauthorized"},
|
||||
|
|
@ -269,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
|
|||
"display.busy_input_mode": {
|
||||
"type": "select",
|
||||
"description": "Input behavior while agent is running",
|
||||
"options": ["queue", "interrupt", "block"],
|
||||
"options": ["interrupt", "queue"],
|
||||
},
|
||||
"memory.provider": {
|
||||
"type": "select",
|
||||
|
|
@ -417,7 +435,14 @@ class EnvVarReveal(BaseModel):
|
|||
|
||||
|
||||
_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
try:
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
except (ValueError, TypeError):
|
||||
_log.warning(
|
||||
"Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s",
|
||||
os.getenv("GATEWAY_HEALTH_TIMEOUT"),
|
||||
)
|
||||
_GATEWAY_HEALTH_TIMEOUT = 3.0
|
||||
|
||||
|
||||
def _probe_gateway_health() -> tuple[bool, dict | None]:
|
||||
|
|
@ -1508,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
|
|||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
result = json.loads(resp.read().decode())
|
||||
except Exception as e:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Token exchange failed: {e}"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Token exchange failed: {e}"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
|
||||
access_token = result.get("access_token", "")
|
||||
refresh_token = result.get("refresh_token", "")
|
||||
expires_in = int(result.get("expires_in") or 3600)
|
||||
if not access_token:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = "No access token returned"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = "No access token returned"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
|
||||
expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
|
||||
try:
|
||||
_save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
|
||||
except Exception as e:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Save failed: {e}"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Save failed: {e}"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
sess["status"] = "approved"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "approved"
|
||||
_log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
|
||||
return {"ok": True, "status": "approved"}
|
||||
|
||||
|
|
@ -2242,6 +2271,329 @@ async def get_usage_analytics(days: int = 30):
|
|||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
|
||||
#
|
||||
# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
|
||||
# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
|
||||
# WebSocket. The browser renders the ANSI through xterm.js (see
|
||||
# web/src/pages/ChatPage.tsx).
|
||||
#
|
||||
# Auth: ``?token=<session_token>`` query param (browsers can't set
|
||||
# Authorization on the WS upgrade). Same ephemeral ``_SESSION_TOKEN`` as
|
||||
# REST. Localhost-only — we defensively reject non-loopback clients even
|
||||
# though uvicorn binds to 127.0.0.1.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
|
||||
|
||||
_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
|
||||
_PTY_READ_CHUNK_TIMEOUT = 0.2
|
||||
_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
|
||||
# Starlette's TestClient reports the peer as "testclient"; treat it as
|
||||
# loopback so tests don't need to rewrite request scope.
|
||||
_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
|
||||
|
||||
# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
|
||||
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
|
||||
# the chat tab generates on mount; entries auto-evict when the last subscriber
|
||||
# drops AND the publisher has disconnected.
|
||||
_event_channels: dict[str, set] = {}
|
||||
_event_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def _resolve_chat_argv(
|
||||
resume: Optional[str] = None,
|
||||
sidecar_url: Optional[str] = None,
|
||||
) -> tuple[list[str], Optional[str], Optional[dict]]:
|
||||
"""Resolve the argv + cwd + env for the chat PTY.
|
||||
|
||||
Default: whatever ``hermes --tui`` would run. Tests monkeypatch this
|
||||
function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
|
||||
so nothing has to build Node or the TUI bundle.
|
||||
|
||||
Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
|
||||
matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
|
||||
Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
|
||||
not parse its argv.
|
||||
|
||||
`sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so
|
||||
the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the
|
||||
dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`).
|
||||
"""
|
||||
from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
|
||||
|
||||
argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
|
||||
env: Optional[dict] = None
|
||||
|
||||
if resume or sidecar_url:
|
||||
env = os.environ.copy()
|
||||
|
||||
if resume:
|
||||
env["HERMES_TUI_RESUME"] = resume
|
||||
|
||||
if sidecar_url:
|
||||
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
|
||||
|
||||
return list(argv), str(cwd) if cwd else None, env
|
||||
|
||||
|
||||
def _build_sidecar_url(channel: str) -> Optional[str]:
|
||||
"""ws:// URL the PTY child should publish events to, or None when unbound."""
|
||||
host = getattr(app.state, "bound_host", None)
|
||||
port = getattr(app.state, "bound_port", None)
|
||||
|
||||
if not host or not port:
|
||||
return None
|
||||
|
||||
netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
|
||||
qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
|
||||
|
||||
return f"ws://{netloc}/api/pub?{qs}"
|
||||
|
||||
|
||||
async def _broadcast_event(channel: str, payload: str) -> None:
|
||||
"""Fan out one publisher frame to every subscriber on `channel`."""
|
||||
async with _event_lock:
|
||||
subs = list(_event_channels.get(channel, ()))
|
||||
|
||||
for sub in subs:
|
||||
try:
|
||||
await sub.send_text(payload)
|
||||
except Exception:
|
||||
# Subscriber went away mid-send; the /api/events finally clause
|
||||
# will remove it from the registry on its next iteration.
|
||||
pass
|
||||
|
||||
|
||||
def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
|
||||
"""Return the channel id from the query string or None if invalid."""
|
||||
channel = ws.query_params.get("channel", "")
|
||||
|
||||
return channel if _VALID_CHANNEL_RE.match(channel) else None
|
||||
|
||||
|
||||
@app.websocket("/api/pty")
|
||||
async def pty_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
# --- auth + loopback check (before accept so we can close cleanly) ---
|
||||
token = ws.query_params.get("token", "")
|
||||
expected = _SESSION_TOKEN
|
||||
if not hmac.compare_digest(token.encode(), expected.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
# --- spawn PTY ------------------------------------------------------
|
||||
resume = ws.query_params.get("resume") or None
|
||||
channel = _channel_or_close_code(ws)
|
||||
sidecar_url = _build_sidecar_url(channel) if channel else None
|
||||
|
||||
try:
|
||||
argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
|
||||
except SystemExit as exc:
|
||||
# _make_tui_argv calls sys.exit(1) when node/npm is missing.
|
||||
await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
|
||||
except PtyUnavailableError as exc:
|
||||
await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
except (FileNotFoundError, OSError) as exc:
|
||||
await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# --- reader task: PTY master → WebSocket ----------------------------
|
||||
async def pump_pty_to_ws() -> None:
|
||||
while True:
|
||||
chunk = await loop.run_in_executor(
|
||||
None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
|
||||
)
|
||||
if chunk is None: # EOF
|
||||
return
|
||||
if not chunk: # no data this tick; yield control and retry
|
||||
await asyncio.sleep(0)
|
||||
continue
|
||||
try:
|
||||
await ws.send_bytes(chunk)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
reader_task = asyncio.create_task(pump_pty_to_ws())
|
||||
|
||||
# --- writer loop: WebSocket → PTY master ----------------------------
|
||||
try:
|
||||
while True:
|
||||
msg = await ws.receive()
|
||||
msg_type = msg.get("type")
|
||||
if msg_type == "websocket.disconnect":
|
||||
break
|
||||
raw = msg.get("bytes")
|
||||
if raw is None:
|
||||
text = msg.get("text")
|
||||
raw = text.encode("utf-8") if isinstance(text, str) else b""
|
||||
if not raw:
|
||||
continue
|
||||
|
||||
# Resize escape is consumed locally, never written to the PTY.
|
||||
match = _RESIZE_RE.match(raw)
|
||||
if match and match.end() == len(raw):
|
||||
cols = int(match.group(1))
|
||||
rows = int(match.group(2))
|
||||
bridge.resize(cols=cols, rows=rows)
|
||||
continue
|
||||
|
||||
bridge.write(raw)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
reader_task.cancel()
|
||||
try:
|
||||
await reader_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
bridge.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab.
|
||||
#
|
||||
# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the
|
||||
# dashboard can render structured metadata (model badge, tool-call sidebar,
|
||||
# slash launcher, session info) alongside the xterm.js terminal that PTY
|
||||
# already paints. Both transports bind to the same session id when one is
|
||||
# active, so a tool.start emitted by the agent fans out to both sinks.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.websocket("/api/ws")
|
||||
async def gateway_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
from tui_gateway.ws import handle_ws
|
||||
|
||||
await handle_ws(ws)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pub + /api/events — chat-tab event broadcast.
|
||||
#
|
||||
# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by
|
||||
# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every
|
||||
# dispatcher emit through it. The dashboard fans those frames out to any
|
||||
# subscriber that opened /api/events on the same channel id. This is what
|
||||
# gives the React sidebar its tool-call feed without breaking the PTY
|
||||
# child's stdio handshake with Ink.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.websocket("/api/pub")
|
||||
async def pub_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
channel = _channel_or_close_code(ws)
|
||||
if not channel:
|
||||
await ws.close(code=4400)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
try:
|
||||
while True:
|
||||
await _broadcast_event(channel, await ws.receive_text())
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
|
||||
|
||||
@app.websocket("/api/events")
|
||||
async def events_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
channel = _channel_or_close_code(ws)
|
||||
if not channel:
|
||||
await ws.close(code=4400)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
async with _event_lock:
|
||||
_event_channels.setdefault(channel, set()).add(ws)
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Subscribers don't speak — the receive() just blocks until
|
||||
# disconnect so the connection stays open as long as the
|
||||
# browser holds it.
|
||||
await ws.receive_text()
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
async with _event_lock:
|
||||
subs = _event_channels.get(channel)
|
||||
|
||||
if subs is not None:
|
||||
subs.discard(ws)
|
||||
|
||||
if not subs:
|
||||
_event_channels.pop(channel, None)
|
||||
|
||||
|
||||
def mount_spa(application: FastAPI):
|
||||
"""Mount the built SPA. Falls back to index.html for client-side routing.
|
||||
|
||||
|
|
@ -2263,8 +2615,10 @@ def mount_spa(application: FastAPI):
|
|||
def _serve_index():
|
||||
"""Return index.html with the session token injected."""
|
||||
html = _index_path.read_text()
|
||||
chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
|
||||
token_script = (
|
||||
f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
|
||||
f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
|
||||
f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
|
||||
)
|
||||
html = html.replace("</head>", f"{token_script}</head>", 1)
|
||||
return HTMLResponse(
|
||||
|
|
@ -2304,8 +2658,227 @@ _BUILTIN_DASHBOARD_THEMES = [
|
|||
]
|
||||
|
||||
|
||||
def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a theme layer spec from YAML into `{hex, alpha}` form.
|
||||
|
||||
Accepts shorthand (a bare hex string) or full dict form. Returns
|
||||
``None`` on garbage input so the caller can fall back to a built-in
|
||||
default rather than blowing up.
|
||||
"""
|
||||
if value is None:
|
||||
return {"hex": default_hex, "alpha": default_alpha}
|
||||
if isinstance(value, str):
|
||||
return {"hex": value, "alpha": default_alpha}
|
||||
if isinstance(value, dict):
|
||||
hex_val = value.get("hex", default_hex)
|
||||
alpha_val = value.get("alpha", default_alpha)
|
||||
if not isinstance(hex_val, str):
|
||||
return None
|
||||
try:
|
||||
alpha_f = float(alpha_val)
|
||||
except (TypeError, ValueError):
|
||||
alpha_f = default_alpha
|
||||
return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))}
|
||||
return None
|
||||
|
||||
|
||||
_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = {
|
||||
"fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
|
||||
"fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace',
|
||||
"baseSize": "15px",
|
||||
"lineHeight": "1.55",
|
||||
"letterSpacing": "0",
|
||||
}
|
||||
|
||||
_THEME_DEFAULT_LAYOUT: Dict[str, str] = {
|
||||
"radius": "0.5rem",
|
||||
"density": "comfortable",
|
||||
}
|
||||
|
||||
_THEME_OVERRIDE_KEYS = {
|
||||
"card", "cardForeground", "popover", "popoverForeground",
|
||||
"primary", "primaryForeground", "secondary", "secondaryForeground",
|
||||
"muted", "mutedForeground", "accent", "accentForeground",
|
||||
"destructive", "destructiveForeground", "success", "warning",
|
||||
"border", "input", "ring",
|
||||
}
|
||||
|
||||
# Well-known named asset slots themes can populate. Any other keys under
|
||||
# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
|
||||
# for plugin/shell use.
|
||||
_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
|
||||
|
||||
# Component-style buckets themes can override. The value under each bucket
|
||||
# is a mapping from camelCase property name to CSS string; each pair emits
|
||||
# ``--component-<bucket>-<kebab-property>`` on :root. The frontend's shell
|
||||
# components (Card, App header, Backdrop, etc.) consume these vars so themes
|
||||
# can restyle chrome (clip-path, border-image, segmented progress, etc.)
|
||||
# without shipping their own CSS.
|
||||
_THEME_COMPONENT_BUCKETS = {
|
||||
"card", "header", "footer", "sidebar", "tab",
|
||||
"progress", "badge", "backdrop", "page",
|
||||
}
|
||||
|
||||
_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
|
||||
|
||||
# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
|
||||
# the response payload or the <style> tag. 32 KiB is plenty for every
|
||||
# practical reskin (the Strike Freedom demo is ~2 KiB).
|
||||
_THEME_CUSTOM_CSS_MAX = 32 * 1024
|
||||
|
||||
|
||||
def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a user theme YAML into the wire format `ThemeProvider`
|
||||
expects. Returns ``None`` if the theme is unusable.
|
||||
|
||||
Accepts both the full schema (palette/typography/layout) and a loose
|
||||
form with bare hex strings, so hand-written YAMLs stay friendly.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
name = data.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
return None
|
||||
|
||||
# Palette
|
||||
palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
|
||||
# Allow top-level `colors.background` as a shorthand too.
|
||||
colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
|
||||
|
||||
def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
|
||||
spec = palette_src.get(key, colors_src.get(key))
|
||||
parsed = _parse_theme_layer(spec, default_hex, default_alpha)
|
||||
return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
|
||||
|
||||
palette = {
|
||||
"background": _layer("background", "#041c1c", 1.0),
|
||||
"midground": _layer("midground", "#ffe6cb", 1.0),
|
||||
"foreground": _layer("foreground", "#ffffff", 0.0),
|
||||
"warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
|
||||
"noiseOpacity": 1.0,
|
||||
}
|
||||
raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
|
||||
try:
|
||||
palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0
|
||||
except (TypeError, ValueError):
|
||||
palette["noiseOpacity"] = 1.0
|
||||
|
||||
# Typography
|
||||
typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
|
||||
typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
|
||||
for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
|
||||
val = typo_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
typography[key] = val
|
||||
|
||||
# Layout
|
||||
layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {}
|
||||
layout = dict(_THEME_DEFAULT_LAYOUT)
|
||||
radius = layout_src.get("radius")
|
||||
if isinstance(radius, str) and radius.strip():
|
||||
layout["radius"] = radius
|
||||
density = layout_src.get("density")
|
||||
if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
|
||||
layout["density"] = density
|
||||
|
||||
# Color overrides — keep only valid keys with string values.
|
||||
overrides_src = data.get("colorOverrides", {})
|
||||
color_overrides: Dict[str, str] = {}
|
||||
if isinstance(overrides_src, dict):
|
||||
for key, val in overrides_src.items():
|
||||
if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
|
||||
color_overrides[key] = val
|
||||
|
||||
# Assets — named slots + arbitrary user-defined keys. Values must be
|
||||
# strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
|
||||
# We don't fetch remote assets here; the frontend just injects them as
|
||||
# CSS vars. Empty values are dropped so a theme can explicitly clear a
|
||||
# slot by setting ``hero: ""``.
|
||||
assets_out: Dict[str, Any] = {}
|
||||
assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
|
||||
for key in _THEME_NAMED_ASSET_KEYS:
|
||||
val = assets_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
assets_out[key] = val
|
||||
custom_assets_src = assets_src.get("custom")
|
||||
if isinstance(custom_assets_src, dict):
|
||||
custom_assets: Dict[str, str] = {}
|
||||
for key, val in custom_assets_src.items():
|
||||
if (
|
||||
isinstance(key, str)
|
||||
and key.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(val, str)
|
||||
and val.strip()
|
||||
):
|
||||
custom_assets[key] = val
|
||||
if custom_assets:
|
||||
assets_out["custom"] = custom_assets
|
||||
|
||||
# Custom CSS — raw CSS text the frontend injects as a scoped <style>
|
||||
# tag on theme apply. Clipped to _THEME_CUSTOM_CSS_MAX to keep the
|
||||
# payload bounded. We intentionally do NOT parse/sanitise the CSS
|
||||
# here — the dashboard is localhost-only and themes are user-authored
|
||||
# YAML in ~/.hermes/, same trust level as the config file itself.
|
||||
custom_css_val = data.get("customCSS")
|
||||
custom_css: Optional[str] = None
|
||||
if isinstance(custom_css_val, str) and custom_css_val.strip():
|
||||
custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
|
||||
|
||||
# Component style overrides — per-bucket dicts of camelCase CSS
|
||||
# property -> CSS string. The frontend converts these into CSS vars
|
||||
# that shell components (Card, App header, Backdrop) consume.
|
||||
component_styles_src = data.get("componentStyles", {})
|
||||
component_styles: Dict[str, Dict[str, str]] = {}
|
||||
if isinstance(component_styles_src, dict):
|
||||
for bucket, props in component_styles_src.items():
|
||||
if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
|
||||
continue
|
||||
clean: Dict[str, str] = {}
|
||||
for prop, value in props.items():
|
||||
if (
|
||||
isinstance(prop, str)
|
||||
and prop.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(value, (str, int, float))
|
||||
and str(value).strip()
|
||||
):
|
||||
clean[prop] = str(value)
|
||||
if clean:
|
||||
component_styles[bucket] = clean
|
||||
|
||||
layout_variant_src = data.get("layoutVariant")
|
||||
layout_variant = (
|
||||
layout_variant_src
|
||||
if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
|
||||
else "standard"
|
||||
)
|
||||
|
||||
result: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"label": data.get("label") or name,
|
||||
"description": data.get("description", ""),
|
||||
"palette": palette,
|
||||
"typography": typography,
|
||||
"layout": layout,
|
||||
"layoutVariant": layout_variant,
|
||||
}
|
||||
if color_overrides:
|
||||
result["colorOverrides"] = color_overrides
|
||||
if assets_out:
|
||||
result["assets"] = assets_out
|
||||
if custom_css is not None:
|
||||
result["customCSS"] = custom_css
|
||||
if component_styles:
|
||||
result["componentStyles"] = component_styles
|
||||
return result
|
||||
|
||||
|
||||
def _discover_user_themes() -> list:
|
||||
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
|
||||
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.
|
||||
|
||||
Returns a list of fully-normalised theme definitions ready to ship
|
||||
to the frontend, so the client can apply them without a secondary
|
||||
round-trip or a built-in stub.
|
||||
"""
|
||||
themes_dir = get_hermes_home() / "dashboard-themes"
|
||||
if not themes_dir.is_dir():
|
||||
return []
|
||||
|
|
@ -2313,33 +2886,42 @@ def _discover_user_themes() -> list:
|
|||
for f in sorted(themes_dir.glob("*.yaml")):
|
||||
try:
|
||||
data = yaml.safe_load(f.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict) and data.get("name"):
|
||||
result.append({
|
||||
"name": data["name"],
|
||||
"label": data.get("label", data["name"]),
|
||||
"description": data.get("description", ""),
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
normalised = _normalise_theme_definition(data)
|
||||
if normalised is not None:
|
||||
result.append(normalised)
|
||||
return result
|
||||
|
||||
|
||||
@app.get("/api/dashboard/themes")
|
||||
async def get_dashboard_themes():
|
||||
"""Return available themes and the currently active one."""
|
||||
"""Return available themes and the currently active one.
|
||||
|
||||
Built-in entries ship name/label/description only (the frontend owns
|
||||
their full definitions in `web/src/themes/presets.ts`). User themes
|
||||
from `~/.hermes/dashboard-themes/*.yaml` ship with their full
|
||||
normalised definition under `definition`, so the client can apply
|
||||
them without a stub.
|
||||
"""
|
||||
config = load_config()
|
||||
active = config.get("dashboard", {}).get("theme", "default")
|
||||
user_themes = _discover_user_themes()
|
||||
# Merge built-in + user, user themes override built-in by name.
|
||||
seen = set()
|
||||
themes = []
|
||||
for t in _BUILTIN_DASHBOARD_THEMES:
|
||||
seen.add(t["name"])
|
||||
themes.append(t)
|
||||
for t in user_themes:
|
||||
if t["name"] not in seen:
|
||||
themes.append(t)
|
||||
seen.add(t["name"])
|
||||
if t["name"] in seen:
|
||||
continue
|
||||
themes.append({
|
||||
"name": t["name"],
|
||||
"label": t["label"],
|
||||
"description": t["description"],
|
||||
"definition": t,
|
||||
})
|
||||
seen.add(t["name"])
|
||||
return {"themes": themes, "active": active}
|
||||
|
||||
|
||||
|
|
@ -2396,13 +2978,35 @@ def _discover_dashboard_plugins() -> list:
|
|||
if name in seen_names:
|
||||
continue
|
||||
seen_names.add(name)
|
||||
# Tab options: ``path`` + ``position`` for a new tab, optional
|
||||
# ``override`` to replace a built-in route, and ``hidden`` to
|
||||
# register the plugin component/slots without adding a tab
|
||||
# (useful for slot-only plugins like a header-crest injector).
|
||||
raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
|
||||
tab_info = {
|
||||
"path": raw_tab.get("path", f"/{name}"),
|
||||
"position": raw_tab.get("position", "end"),
|
||||
}
|
||||
override_path = raw_tab.get("override")
|
||||
if isinstance(override_path, str) and override_path.startswith("/"):
|
||||
tab_info["override"] = override_path
|
||||
if bool(raw_tab.get("hidden")):
|
||||
tab_info["hidden"] = True
|
||||
# Slots: list of named slot locations this plugin populates.
|
||||
# The frontend exposes ``registerSlot(pluginName, slotName, Component)``
|
||||
# on window; plugins with non-empty slots call it from their JS bundle.
|
||||
slots_src = data.get("slots")
|
||||
slots: List[str] = []
|
||||
if isinstance(slots_src, list):
|
||||
slots = [s for s in slots_src if isinstance(s, str) and s]
|
||||
plugins.append({
|
||||
"name": name,
|
||||
"label": data.get("label", name),
|
||||
"description": data.get("description", ""),
|
||||
"icon": data.get("icon", "Puzzle"),
|
||||
"version": data.get("version", "0.0.0"),
|
||||
"tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
|
||||
"tab": tab_info,
|
||||
"slots": slots,
|
||||
"entry": data.get("entry", "dist/index.js"),
|
||||
"css": data.get("css"),
|
||||
"has_api": bool(data.get("api")),
|
||||
|
|
@ -2527,10 +3131,15 @@ def start_server(
|
|||
port: int = 9119,
|
||||
open_browser: bool = True,
|
||||
allow_public: bool = False,
|
||||
*,
|
||||
embedded_chat: bool = False,
|
||||
):
|
||||
"""Start the web UI server."""
|
||||
import uvicorn
|
||||
|
||||
global _DASHBOARD_EMBEDDED_CHAT_ENABLED
|
||||
_DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat
|
||||
|
||||
_LOCALHOST = ("127.0.0.1", "localhost", "::1")
|
||||
if host not in _LOCALHOST and not allow_public:
|
||||
raise SystemExit(
|
||||
|
|
@ -2546,7 +3155,10 @@ def start_server(
|
|||
|
||||
# Record the bound host so host_header_middleware can validate incoming
|
||||
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
|
||||
# bound_port is also stashed so /api/pty can build the back-WS URL the
|
||||
# PTY child uses to publish events to the dashboard sidebar.
|
||||
app.state.bound_host = host
|
||||
app.state.bound_port = port
|
||||
|
||||
if open_browser:
|
||||
import webbrowser
|
||||
|
|
|
|||
|
|
@ -1039,6 +1039,71 @@ class SessionDB:
|
|||
result.append(msg)
|
||||
return result
|
||||
|
||||
def resolve_resume_session_id(self, session_id: str) -> str:
|
||||
"""Redirect a resume target to the descendant session that holds the messages.
|
||||
|
||||
Context compression ends the current session and forks a new child session
|
||||
(linked via ``parent_session_id``). The flush cursor is reset, so the
|
||||
child is where new messages actually land — the parent ends up with
|
||||
``message_count = 0`` rows unless messages had already been flushed to
|
||||
it before compression. See #15000.
|
||||
|
||||
This helper walks ``parent_session_id`` forward from ``session_id`` and
|
||||
returns the first descendant in the chain that has at least one message
|
||||
row. If the original session already has messages, or no descendant
|
||||
has any, the original ``session_id`` is returned unchanged.
|
||||
|
||||
The chain is always walked via the child whose ``started_at`` is
|
||||
latest; that matches the single-chain shape that compression creates.
|
||||
A depth cap (32) guards against accidental loops in malformed data.
|
||||
"""
|
||||
if not session_id:
|
||||
return session_id
|
||||
|
||||
with self._lock:
|
||||
# If this session already has messages, nothing to redirect.
|
||||
try:
|
||||
row = self._conn.execute(
|
||||
"SELECT 1 FROM messages WHERE session_id = ? LIMIT 1",
|
||||
(session_id,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
return session_id
|
||||
if row is not None:
|
||||
return session_id
|
||||
|
||||
# Walk descendants: at each step, pick the most-recently-started
|
||||
# child session; stop once we find one with messages.
|
||||
current = session_id
|
||||
seen = {current}
|
||||
for _ in range(32):
|
||||
try:
|
||||
child_row = self._conn.execute(
|
||||
"SELECT id FROM sessions "
|
||||
"WHERE parent_session_id = ? "
|
||||
"ORDER BY started_at DESC, id DESC LIMIT 1",
|
||||
(current,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
return session_id
|
||||
if child_row is None:
|
||||
return session_id
|
||||
child_id = child_row["id"] if hasattr(child_row, "keys") else child_row[0]
|
||||
if not child_id or child_id in seen:
|
||||
return session_id
|
||||
seen.add(child_id)
|
||||
try:
|
||||
msg_row = self._conn.execute(
|
||||
"SELECT 1 FROM messages WHERE session_id = ? LIMIT 1",
|
||||
(child_id,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
return session_id
|
||||
if msg_row is not None:
|
||||
return child_id
|
||||
current = child_id
|
||||
return session_id
|
||||
|
||||
def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load messages in the OpenAI conversation format (role + content dicts).
|
||||
|
|
|
|||
|
|
@ -343,6 +343,18 @@ def get_tool_definitions(
|
|||
global _last_resolved_tool_names
|
||||
_last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]
|
||||
|
||||
# Sanitize schemas for broad backend compatibility. llama.cpp's
|
||||
# json-schema-to-grammar converter (used by its OAI server to build
|
||||
# GBNF tool-call parsers) rejects some shapes that cloud providers
|
||||
# silently accept — bare "type": "object" with no properties,
|
||||
# string-valued schema nodes from malformed MCP servers, etc. This
|
||||
# is a no-op for schemas that are already well-formed.
|
||||
try:
|
||||
from tools.schema_sanitizer import sanitize_tool_schemas
|
||||
filtered_tools = sanitize_tool_schemas(filtered_tools)
|
||||
except Exception as e: # pragma: no cover — defensive
|
||||
logger.warning("Schema sanitization skipped: %s", e)
|
||||
|
||||
return filtered_tools
|
||||
|
||||
|
||||
|
|
@ -418,6 +430,31 @@ def _coerce_value(value: str, expected_type):
|
|||
return _coerce_number(value, integer_only=(expected_type == "integer"))
|
||||
if expected_type == "boolean":
|
||||
return _coerce_boolean(value)
|
||||
if expected_type == "array":
|
||||
return _coerce_json(value, list)
|
||||
if expected_type == "object":
|
||||
return _coerce_json(value, dict)
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_json(value: str, expected_python_type: type):
|
||||
"""Parse *value* as JSON when the schema expects an array or object.
|
||||
|
||||
Handles model output drift where a complex oneOf/discriminated-union schema
|
||||
causes the LLM to emit the array/object as a JSON string instead of a native
|
||||
structure. Returns the original string if parsing fails or yields the wrong
|
||||
Python type.
|
||||
"""
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
except (ValueError, TypeError):
|
||||
return value
|
||||
if isinstance(parsed, expected_python_type):
|
||||
logger.debug(
|
||||
"coerce_tool_args: coerced string to %s via json.loads",
|
||||
expected_python_type.__name__,
|
||||
)
|
||||
return parsed
|
||||
return value
|
||||
|
||||
|
||||
|
|
@ -427,9 +464,9 @@ def _coerce_number(value: str, integer_only: bool = False):
|
|||
f = float(value)
|
||||
except (ValueError, OverflowError):
|
||||
return value
|
||||
# Guard against inf/nan before int() conversion
|
||||
# Guard against inf/nan — not JSON-serializable, keep original string
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return f
|
||||
return value
|
||||
# If it looks like an integer (no fractional part), return int
|
||||
if f == int(f):
|
||||
return int(f)
|
||||
|
|
|
|||
313
plugins/image_gen/xai/__init__.py
Normal file
313
plugins/image_gen/xai/__init__.py
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
"""xAI image generation backend.
|
||||
|
||||
Exposes xAI's ``grok-imagine-image`` model as an
|
||||
:class:`ImageGenProvider` implementation.
|
||||
|
||||
Features:
|
||||
- Text-to-image generation
|
||||
- Multiple aspect ratios (1:1, 16:9, 9:16, etc.)
|
||||
- Multiple resolutions (1K, 2K)
|
||||
- Base64 output saved to cache
|
||||
|
||||
Selection precedence (first hit wins):
|
||||
1. ``XAI_IMAGE_MODEL`` env var
|
||||
2. ``image_gen.xai.model`` in ``config.yaml``
|
||||
3. :data:`DEFAULT_MODEL`
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from agent.image_gen_provider import (
|
||||
DEFAULT_ASPECT_RATIO,
|
||||
ImageGenProvider,
|
||||
error_response,
|
||||
resolve_aspect_ratio,
|
||||
save_b64_image,
|
||||
success_response,
|
||||
)
|
||||
from tools.xai_http import hermes_xai_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model catalog
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
API_MODEL = "grok-imagine-image"
|
||||
|
||||
_MODELS: Dict[str, Dict[str, Any]] = {
|
||||
"grok-imagine-image": {
|
||||
"display": "Grok Imagine Image",
|
||||
"speed": "~5-10s",
|
||||
"strengths": "Fast, high-quality",
|
||||
},
|
||||
}
|
||||
|
||||
DEFAULT_MODEL = "grok-imagine-image"
|
||||
|
||||
# xAI aspect ratios (more options than FAL/OpenAI)
|
||||
_XAI_ASPECT_RATIOS = {
|
||||
"landscape": "16:9",
|
||||
"square": "1:1",
|
||||
"portrait": "9:16",
|
||||
"4:3": "4:3",
|
||||
"3:4": "3:4",
|
||||
"3:2": "3:2",
|
||||
"2:3": "2:3",
|
||||
}
|
||||
|
||||
# xAI resolutions
|
||||
_XAI_RESOLUTIONS = {
|
||||
"1k": "1024",
|
||||
"2k": "2048",
|
||||
}
|
||||
|
||||
DEFAULT_RESOLUTION = "1k"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_xai_config() -> Dict[str, Any]:
|
||||
"""Read ``image_gen.xai`` from config.yaml."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
xai_section = section.get("xai") if isinstance(section, dict) else None
|
||||
return xai_section if isinstance(xai_section, dict) else {}
|
||||
except Exception as exc:
|
||||
logger.debug("Could not load image_gen.xai config: %s", exc)
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_model() -> Tuple[str, Dict[str, Any]]:
|
||||
"""Decide which model to use and return ``(model_id, meta)``."""
|
||||
env_override = os.environ.get("XAI_IMAGE_MODEL")
|
||||
if env_override and env_override in _MODELS:
|
||||
return env_override, _MODELS[env_override]
|
||||
|
||||
cfg = _load_xai_config()
|
||||
candidate = cfg.get("model") if isinstance(cfg.get("model"), str) else None
|
||||
if candidate and candidate in _MODELS:
|
||||
return candidate, _MODELS[candidate]
|
||||
|
||||
return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
|
||||
|
||||
|
||||
def _resolve_resolution() -> str:
|
||||
"""Get configured resolution."""
|
||||
cfg = _load_xai_config()
|
||||
res = cfg.get("resolution") if isinstance(cfg.get("resolution"), str) else None
|
||||
if res and res in _XAI_RESOLUTIONS:
|
||||
return res
|
||||
return DEFAULT_RESOLUTION
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class XAIImageGenProvider(ImageGenProvider):
|
||||
"""xAI ``grok-imagine-image`` backend."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "xai"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "xAI (Grok)"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(os.getenv("XAI_API_KEY"))
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"id": model_id,
|
||||
"display": meta.get("display", model_id),
|
||||
"speed": meta.get("speed", ""),
|
||||
"strengths": meta.get("strengths", ""),
|
||||
}
|
||||
for model_id, meta in _MODELS.items()
|
||||
]
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "xAI (Grok)",
|
||||
"badge": "paid",
|
||||
"tag": "Native xAI image generation via grok-imagine-image",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "XAI_API_KEY",
|
||||
"prompt": "xAI API key",
|
||||
"url": "https://console.x.ai/",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image using xAI's grok-imagine-image."""
|
||||
api_key = os.getenv("XAI_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
return error_response(
|
||||
error="XAI_API_KEY not set. Get one at https://console.x.ai/",
|
||||
error_type="missing_api_key",
|
||||
provider="xai",
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
model_id, meta = _resolve_model()
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
xai_ar = _XAI_ASPECT_RATIOS.get(aspect, "1:1")
|
||||
resolution = _resolve_resolution()
|
||||
xai_res = _XAI_RESOLUTIONS.get(resolution, "1024")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": API_MODEL,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": xai_ar,
|
||||
"resolution": xai_res,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": hermes_xai_user_agent(),
|
||||
}
|
||||
|
||||
base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{base_url}/images/generations",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as exc:
|
||||
status = exc.response.status_code if exc.response else 0
|
||||
try:
|
||||
err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300])
|
||||
except Exception:
|
||||
err_msg = exc.response.text[:300] if exc.response else str(exc)
|
||||
logger.error("xAI image gen failed (%d): %s", status, err_msg)
|
||||
return error_response(
|
||||
error=f"xAI image generation failed ({status}): {err_msg}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.Timeout:
|
||||
return error_response(
|
||||
error="xAI image generation timed out (120s)",
|
||||
error_type="timeout",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.ConnectionError as exc:
|
||||
return error_response(
|
||||
error=f"xAI connection error: {exc}",
|
||||
error_type="connection_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
try:
|
||||
result = response.json()
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"xAI returned invalid JSON: {exc}",
|
||||
error_type="invalid_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
# Parse response — xAI returns data[0].b64_json or data[0].url
|
||||
data = result.get("data", [])
|
||||
if not data:
|
||||
return error_response(
|
||||
error="xAI returned no image data",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
first = data[0]
|
||||
b64 = first.get("b64_json")
|
||||
url = first.get("url")
|
||||
|
||||
if b64:
|
||||
try:
|
||||
saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"Could not save image to cache: {exc}",
|
||||
error_type="io_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
image_ref = str(saved_path)
|
||||
elif url:
|
||||
image_ref = url
|
||||
else:
|
||||
return error_response(
|
||||
error="xAI response contained neither b64_json nor URL",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
extra: Dict[str, Any] = {
|
||||
"resolution": xai_res,
|
||||
}
|
||||
|
||||
return success_response(
|
||||
image=image_ref,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
provider="xai",
|
||||
extra=extra,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register(ctx: Any) -> None:
|
||||
"""Register this provider with the image gen registry."""
|
||||
ctx.register_image_gen_provider(XAIImageGenProvider())
|
||||
7
plugins/image_gen/xai/plugin.yaml
Normal file
7
plugins/image_gen/xai/plugin.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
name: xai
|
||||
version: 1.0.0
|
||||
description: "xAI image generation backend (grok-imagine-image). Text-to-image."
|
||||
author: Julien Talbot
|
||||
kind: backend
|
||||
requires_env:
|
||||
- XAI_API_KEY
|
||||
|
|
@ -59,7 +59,8 @@ Config file: `~/.hermes/hindsight/config.json`
|
|||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `bank_id` | `hermes` | Memory bank name |
|
||||
| `bank_id` | `hermes` | Memory bank name (static fallback used when `bank_id_template` is unset or resolves empty) |
|
||||
| `bank_id_template` | — | Optional template to derive the bank name dynamically. Placeholders: `{profile}`, `{workspace}`, `{platform}`, `{user}`, `{session}`. Example: `hermes-{profile}` isolates memory per active Hermes profile. Empty placeholders collapse cleanly (e.g. `hermes-{user}` with no user becomes `hermes`). |
|
||||
| `bank_mission` | — | Reflect mission (identity/framing for reflect reasoning). Applied via Banks API. |
|
||||
| `bank_retain_mission` | — | Retain mission (steers what gets extracted). Applied via Banks API. |
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
Long-term memory with knowledge graph, entity resolution, and multi-strategy
|
||||
retrieval. Supports cloud (API key) and local modes.
|
||||
|
||||
Configurable timeout via HINDSIGHT_TIMEOUT env var or config.json.
|
||||
|
||||
Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
|
||||
|
||||
Config via environment variables:
|
||||
|
|
@ -11,6 +13,7 @@ Config via environment variables:
|
|||
HINDSIGHT_BUDGET — recall budget: low/mid/high (default: mid)
|
||||
HINDSIGHT_API_URL — API endpoint
|
||||
HINDSIGHT_MODE — cloud or local (default: cloud)
|
||||
HINDSIGHT_TIMEOUT — API request timeout in seconds (default: 120)
|
||||
HINDSIGHT_RETAIN_TAGS — comma-separated tags attached to retained memories
|
||||
HINDSIGHT_RETAIN_SOURCE — metadata source value attached to retained memories
|
||||
HINDSIGHT_RETAIN_USER_PREFIX — label used before user turns in retained transcripts
|
||||
|
|
@ -23,6 +26,7 @@ Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -40,6 +44,7 @@ logger = logging.getLogger(__name__)
|
|||
_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
|
||||
_DEFAULT_LOCAL_URL = "http://localhost:8888"
|
||||
_MIN_CLIENT_VERSION = "0.4.22"
|
||||
_DEFAULT_TIMEOUT = 120 # seconds — cloud API can take 30-40s per request
|
||||
_VALID_BUDGETS = {"low", "mid", "high"}
|
||||
_PROVIDER_DEFAULT_MODELS = {
|
||||
"openai": "gpt-4o-mini",
|
||||
|
|
@ -54,6 +59,22 @@ _PROVIDER_DEFAULT_MODELS = {
|
|||
}
|
||||
|
||||
|
||||
def _check_local_runtime() -> tuple[bool, str | None]:
|
||||
"""Return whether local embedded Hindsight imports cleanly.
|
||||
|
||||
On older CPUs, importing the local Hindsight stack can raise a runtime
|
||||
error from NumPy before the daemon starts. Treat that as "unavailable"
|
||||
so Hermes can degrade gracefully instead of repeatedly trying to start
|
||||
a broken local memory backend.
|
||||
"""
|
||||
try:
|
||||
importlib.import_module("hindsight")
|
||||
importlib.import_module("hindsight_embed.daemon_embed_manager")
|
||||
return True, None
|
||||
except Exception as exc:
|
||||
return False, str(exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dedicated event loop for Hindsight async calls (one per process, reused).
|
||||
# Avoids creating ephemeral loops that leak aiohttp sessions.
|
||||
|
|
@ -81,13 +102,18 @@ def _get_loop() -> asyncio.AbstractEventLoop:
|
|||
return _loop
|
||||
|
||||
|
||||
def _run_sync(coro, timeout: float = 120.0):
|
||||
def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT):
|
||||
"""Schedule *coro* on the shared loop and block until done."""
|
||||
loop = _get_loop()
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
return future.result(timeout=timeout)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backward-compatible alias — instances use self._run_sync() instead.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -233,6 +259,126 @@ def _utc_timestamp() -> str:
|
|||
return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
|
||||
|
||||
|
||||
def _embedded_profile_name(config: dict[str, Any]) -> str:
|
||||
"""Return the Hindsight embedded profile name for this Hermes config."""
|
||||
profile = config.get("profile", "hermes")
|
||||
return str(profile or "hermes")
|
||||
|
||||
|
||||
def _load_simple_env(path) -> dict[str, str]:
|
||||
"""Parse a simple KEY=VALUE env file, ignoring comments and blank lines."""
|
||||
if not path.exists():
|
||||
return {}
|
||||
|
||||
values: dict[str, str] = {}
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
values[key.strip()] = value.strip()
|
||||
return values
|
||||
|
||||
|
||||
def _build_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | None = None) -> dict[str, str]:
|
||||
"""Build the profile-scoped env file that standalone hindsight-embed consumes."""
|
||||
current_key = llm_api_key
|
||||
if current_key is None:
|
||||
current_key = (
|
||||
config.get("llmApiKey")
|
||||
or config.get("llm_api_key")
|
||||
or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
)
|
||||
|
||||
current_provider = config.get("llm_provider", "")
|
||||
current_model = config.get("llm_model", "")
|
||||
current_base_url = config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
|
||||
|
||||
# The embedded daemon expects OpenAI wire format for these providers.
|
||||
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
|
||||
|
||||
env_values = {
|
||||
"HINDSIGHT_API_LLM_PROVIDER": str(daemon_provider),
|
||||
"HINDSIGHT_API_LLM_API_KEY": str(current_key or ""),
|
||||
"HINDSIGHT_API_LLM_MODEL": str(current_model),
|
||||
"HINDSIGHT_API_LOG_LEVEL": "info",
|
||||
}
|
||||
if current_base_url:
|
||||
env_values["HINDSIGHT_API_LLM_BASE_URL"] = str(current_base_url)
|
||||
return env_values
|
||||
|
||||
|
||||
def _embedded_profile_env_path(config: dict[str, Any]):
|
||||
from pathlib import Path
|
||||
|
||||
return Path.home() / ".hindsight" / "profiles" / f"{_embedded_profile_name(config)}.env"
|
||||
|
||||
|
||||
def _materialize_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | None = None):
|
||||
"""Write the profile-scoped env file that standalone hindsight-embed uses."""
|
||||
profile_env = _embedded_profile_env_path(config)
|
||||
profile_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
env_values = _build_embedded_profile_env(config, llm_api_key=llm_api_key)
|
||||
profile_env.write_text(
|
||||
"".join(f"{key}={value}\n" for key, value in env_values.items()),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return profile_env
|
||||
|
||||
def _sanitize_bank_segment(value: str) -> str:
|
||||
"""Sanitize a bank_id_template placeholder value.
|
||||
|
||||
Bank IDs should be safe for URL paths and filesystem use. Replaces any
|
||||
character that isn't alphanumeric, dash, or underscore with a dash, and
|
||||
collapses runs of dashes.
|
||||
"""
|
||||
if not value:
|
||||
return ""
|
||||
out = []
|
||||
prev_dash = False
|
||||
for ch in str(value):
|
||||
if ch.isalnum() or ch == "-" or ch == "_":
|
||||
out.append(ch)
|
||||
prev_dash = False
|
||||
else:
|
||||
if not prev_dash:
|
||||
out.append("-")
|
||||
prev_dash = True
|
||||
return "".join(out).strip("-_")
|
||||
|
||||
|
||||
def _resolve_bank_id_template(template: str, fallback: str, **placeholders: str) -> str:
|
||||
"""Resolve a bank_id template string with the given placeholders.
|
||||
|
||||
Supported placeholders (each is sanitized before substitution):
|
||||
{profile} — active Hermes profile name (from agent_identity)
|
||||
{workspace} — Hermes workspace name (from agent_workspace)
|
||||
{platform} — "cli", "telegram", "discord", etc.
|
||||
{user} — platform user id (gateway sessions)
|
||||
{session} — current session id
|
||||
|
||||
Missing/empty placeholders are rendered as the empty string and then
|
||||
collapsed — e.g. ``hermes-{user}`` with no user becomes ``hermes``.
|
||||
|
||||
If the template is empty, resolution falls back to *fallback*.
|
||||
Returns the sanitized bank id.
|
||||
"""
|
||||
if not template:
|
||||
return fallback
|
||||
sanitized = {k: _sanitize_bank_segment(v) for k, v in placeholders.items()}
|
||||
try:
|
||||
rendered = template.format(**sanitized)
|
||||
except (KeyError, IndexError) as exc:
|
||||
logger.warning("Invalid bank_id_template %r: %s — using fallback %r",
|
||||
template, exc, fallback)
|
||||
return fallback
|
||||
while "--" in rendered:
|
||||
rendered = rendered.replace("--", "-")
|
||||
while "__" in rendered:
|
||||
rendered = rendered.replace("__", "_")
|
||||
rendered = rendered.strip("-_")
|
||||
return rendered or fallback
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryProvider implementation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -262,13 +408,17 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
self._chat_type = ""
|
||||
self._thread_id = ""
|
||||
self._agent_identity = ""
|
||||
self._agent_workspace = ""
|
||||
self._turn_index = 0
|
||||
self._client = None
|
||||
self._timeout = _DEFAULT_TIMEOUT
|
||||
self._prefetch_result = ""
|
||||
self._prefetch_lock = threading.Lock()
|
||||
self._prefetch_thread = None
|
||||
self._sync_thread = None
|
||||
self._session_id = ""
|
||||
self._parent_session_id = ""
|
||||
self._document_id = ""
|
||||
|
||||
# Tags
|
||||
self._tags: list[str] | None = None
|
||||
|
|
@ -293,6 +443,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
# Bank
|
||||
self._bank_mission = ""
|
||||
self._bank_retain_mission: str | None = None
|
||||
self._bank_id_template = ""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
|
|
@ -302,9 +453,16 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
try:
|
||||
cfg = _load_config()
|
||||
mode = cfg.get("mode", "cloud")
|
||||
if mode in ("local", "local_embedded", "local_external"):
|
||||
if mode in ("local", "local_embedded"):
|
||||
available, _ = _check_local_runtime()
|
||||
return available
|
||||
if mode == "local_external":
|
||||
return True
|
||||
has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", ""))
|
||||
has_key = bool(
|
||||
cfg.get("apiKey")
|
||||
or cfg.get("api_key")
|
||||
or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
)
|
||||
has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", ""))
|
||||
return has_key or has_url
|
||||
except Exception:
|
||||
|
|
@ -363,7 +521,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
else:
|
||||
deps_to_install = [cloud_dep]
|
||||
|
||||
print(f"\n Checking dependencies...")
|
||||
print("\n Checking dependencies...")
|
||||
uv_path = shutil.which("uv")
|
||||
if not uv_path:
|
||||
print(" ⚠ uv not found — install it: curl -LsSf https://astral.sh/uv/install.sh | sh")
|
||||
|
|
@ -374,14 +532,14 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
[uv_path, "pip", "install", "--python", sys.executable, "--quiet", "--upgrade"] + deps_to_install,
|
||||
check=True, timeout=120, capture_output=True,
|
||||
)
|
||||
print(f" ✓ Dependencies up to date")
|
||||
print(" ✓ Dependencies up to date")
|
||||
except Exception as e:
|
||||
print(f" ⚠ Install failed: {e}")
|
||||
print(f" Run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}")
|
||||
|
||||
# Step 3: Mode-specific config
|
||||
if mode == "cloud":
|
||||
print(f"\n Get your API key at https://ui.hindsight.vectorize.io\n")
|
||||
print("\n Get your API key at https://ui.hindsight.vectorize.io\n")
|
||||
existing_key = os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
if existing_key:
|
||||
masked = f"...{existing_key[-4:]}" if len(existing_key) > 4 else "set"
|
||||
|
|
@ -434,13 +592,19 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
sys.stdout.write(" LLM API key: ")
|
||||
sys.stdout.flush()
|
||||
llm_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip()
|
||||
if llm_key:
|
||||
env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key
|
||||
# Always write explicitly (including empty) so the provider sees ""
|
||||
# rather than a missing variable. The daemon reads from .env at
|
||||
# startup and fails when HINDSIGHT_LLM_API_KEY is unset.
|
||||
env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key
|
||||
|
||||
# Step 4: Save everything
|
||||
provider_config["bank_id"] = "hermes"
|
||||
provider_config["recall_budget"] = "mid"
|
||||
bank_id = "hermes"
|
||||
# Read existing timeout from config if present, otherwise use default
|
||||
existing_timeout = self._config.get("timeout") if self._config else None
|
||||
timeout_val = existing_timeout if existing_timeout else _DEFAULT_TIMEOUT
|
||||
provider_config["timeout"] = timeout_val
|
||||
env_writes["HINDSIGHT_TIMEOUT"] = str(timeout_val)
|
||||
config["memory"]["provider"] = "hindsight"
|
||||
save_config(config)
|
||||
|
||||
|
|
@ -466,10 +630,32 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
new_lines.append(f"{k}={v}")
|
||||
env_path.write_text("\n".join(new_lines) + "\n")
|
||||
|
||||
if mode == "local_embedded":
|
||||
materialized_config = dict(provider_config)
|
||||
config_path = Path(hermes_home) / "hindsight" / "config.json"
|
||||
try:
|
||||
materialized_config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
llm_api_key = env_writes.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
if not llm_api_key:
|
||||
llm_api_key = _load_simple_env(Path(hermes_home) / ".env").get("HINDSIGHT_LLM_API_KEY", "")
|
||||
if not llm_api_key:
|
||||
llm_api_key = _load_simple_env(_embedded_profile_env_path(materialized_config)).get(
|
||||
"HINDSIGHT_API_LLM_API_KEY",
|
||||
"",
|
||||
)
|
||||
|
||||
_materialize_embedded_profile_env(
|
||||
materialized_config,
|
||||
llm_api_key=llm_api_key or None,
|
||||
)
|
||||
|
||||
print(f"\n ✓ Hindsight memory configured ({mode} mode)")
|
||||
if env_writes:
|
||||
print(f" API keys saved to .env")
|
||||
print(f"\n Start a new session to activate.\n")
|
||||
print(" API keys saved to .env")
|
||||
print("\n Start a new session to activate.\n")
|
||||
|
||||
def get_config_schema(self):
|
||||
return [
|
||||
|
|
@ -485,7 +671,8 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
{"key": "llm_base_url", "description": "Endpoint URL (e.g. http://192.168.1.10:8080/v1)", "default": "", "when": {"mode": "local_embedded", "llm_provider": "openai_compatible"}},
|
||||
{"key": "llm_api_key", "description": "LLM API key (optional for openai_compatible)", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local_embedded"}},
|
||||
{"key": "llm_model", "description": "LLM model", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local_embedded"}},
|
||||
{"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
|
||||
{"key": "bank_id", "description": "Memory bank name (static fallback when bank_id_template is unset)", "default": "hermes"},
|
||||
{"key": "bank_id_template", "description": "Optional template to derive bank_id dynamically. Placeholders: {profile}, {workspace}, {platform}, {user}, {session}. Example: hermes-{profile}", "default": ""},
|
||||
{"key": "bank_mission", "description": "Mission/purpose description for the memory bank"},
|
||||
{"key": "bank_retain_mission", "description": "Custom extraction prompt for memory retention"},
|
||||
{"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
|
||||
|
|
@ -505,12 +692,19 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
{"key": "recall_max_tokens", "description": "Maximum tokens for recall results", "default": 4096},
|
||||
{"key": "recall_max_input_chars", "description": "Maximum input query length for auto-recall", "default": 800},
|
||||
{"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"},
|
||||
{"key": "timeout", "description": "API request timeout in seconds", "default": _DEFAULT_TIMEOUT},
|
||||
]
|
||||
|
||||
def _get_client(self):
|
||||
"""Return the cached Hindsight client (created once, reused)."""
|
||||
if self._client is None:
|
||||
if self._mode == "local_embedded":
|
||||
available, reason = _check_local_runtime()
|
||||
if not available:
|
||||
raise RuntimeError(
|
||||
"Hindsight local runtime is unavailable"
|
||||
+ (f": {reason}" if reason else "")
|
||||
)
|
||||
from hindsight import HindsightEmbedded
|
||||
HindsightEmbedded.__del__ = lambda self: None
|
||||
llm_provider = self._config.get("llm_provider", "")
|
||||
|
|
@ -529,16 +723,30 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
self._client = HindsightEmbedded(**kwargs)
|
||||
else:
|
||||
from hindsight_client import Hindsight
|
||||
kwargs = {"base_url": self._api_url, "timeout": 30.0}
|
||||
timeout = self._timeout or _DEFAULT_TIMEOUT
|
||||
kwargs = {"base_url": self._api_url, "timeout": float(timeout)}
|
||||
if self._api_key:
|
||||
kwargs["api_key"] = self._api_key
|
||||
logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s)",
|
||||
self._api_url, bool(self._api_key))
|
||||
logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s, timeout=%s)",
|
||||
self._api_url, bool(self._api_key), kwargs["timeout"])
|
||||
self._client = Hindsight(**kwargs)
|
||||
return self._client
|
||||
|
||||
def _run_sync(self, coro):
|
||||
"""Schedule *coro* on the shared loop using the configured timeout."""
|
||||
return _run_sync(coro, timeout=self._timeout)
|
||||
|
||||
def initialize(self, session_id: str, **kwargs) -> None:
|
||||
self._session_id = str(session_id or "").strip()
|
||||
self._parent_session_id = str(kwargs.get("parent_session_id", "") or "").strip()
|
||||
|
||||
# Each process lifecycle gets its own document_id. Reusing session_id
|
||||
# alone caused overwrites on /resume — the reloaded session starts
|
||||
# with an empty _session_turns, so the next retain would replace the
|
||||
# previously stored content. session_id stays in tags so processes
|
||||
# for the same session remain filterable together.
|
||||
start_ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
self._document_id = f"{self._session_id}-{start_ts}"
|
||||
|
||||
# Check client version and auto-upgrade if needed
|
||||
try:
|
||||
|
|
@ -548,7 +756,9 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
if Version(installed) < Version(_MIN_CLIENT_VERSION):
|
||||
logger.warning("hindsight-client %s is outdated (need >=%s), attempting upgrade...",
|
||||
installed, _MIN_CLIENT_VERSION)
|
||||
import shutil, subprocess, sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
uv_path = shutil.which("uv")
|
||||
if uv_path:
|
||||
try:
|
||||
|
|
@ -575,19 +785,41 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
self._chat_type = str(kwargs.get("chat_type") or "").strip()
|
||||
self._thread_id = str(kwargs.get("thread_id") or "").strip()
|
||||
self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
|
||||
self._agent_workspace = str(kwargs.get("agent_workspace") or "").strip()
|
||||
self._turn_index = 0
|
||||
self._session_turns = []
|
||||
self._mode = self._config.get("mode", "cloud")
|
||||
# Read timeout from config or env var, fall back to default
|
||||
self._timeout = self._config.get("timeout") or int(os.environ.get("HINDSIGHT_TIMEOUT", str(_DEFAULT_TIMEOUT)))
|
||||
# "local" is a legacy alias for "local_embedded"
|
||||
if self._mode == "local":
|
||||
self._mode = "local_embedded"
|
||||
if self._mode == "local_embedded":
|
||||
available, reason = _check_local_runtime()
|
||||
if not available:
|
||||
logger.warning(
|
||||
"Hindsight local mode disabled because its runtime could not be imported: %s",
|
||||
reason,
|
||||
)
|
||||
self._mode = "disabled"
|
||||
return
|
||||
self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
|
||||
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
|
||||
self._llm_base_url = self._config.get("llm_base_url", "")
|
||||
|
||||
banks = self._config.get("banks", {}).get("hermes", {})
|
||||
self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
|
||||
static_bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
|
||||
self._bank_id_template = self._config.get("bank_id_template", "") or ""
|
||||
self._bank_id = _resolve_bank_id_template(
|
||||
self._bank_id_template,
|
||||
fallback=static_bank_id,
|
||||
profile=self._agent_identity,
|
||||
workspace=self._agent_workspace,
|
||||
platform=self._platform,
|
||||
user=self._user_id,
|
||||
session=self._session_id,
|
||||
)
|
||||
budget = self._config.get("recall_budget") or self._config.get("budget") or banks.get("budget", "mid")
|
||||
self._budget = budget if budget in _VALID_BUDGETS else "mid"
|
||||
|
||||
|
|
@ -640,6 +872,10 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
pass
|
||||
logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
|
||||
self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
|
||||
if self._bank_id_template:
|
||||
logger.debug("Hindsight bank resolved from template %r: profile=%s workspace=%s platform=%s user=%s -> bank=%s",
|
||||
self._bank_id_template, self._agent_identity, self._agent_workspace,
|
||||
self._platform, self._user_id, self._bank_id)
|
||||
logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
|
||||
"retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
|
||||
self._auto_retain, self._auto_recall, self._retain_every_n_turns,
|
||||
|
|
@ -669,42 +905,13 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
# Update the profile .env to match our current config so
|
||||
# the daemon always starts with the right settings.
|
||||
# If the config changed and the daemon is running, stop it.
|
||||
from pathlib import Path as _Path
|
||||
profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
|
||||
current_key = self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
current_provider = self._config.get("llm_provider", "")
|
||||
current_model = self._config.get("llm_model", "")
|
||||
current_base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
|
||||
# Map openai_compatible/openrouter → openai for the daemon (OpenAI wire format)
|
||||
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
|
||||
|
||||
# Read saved profile config
|
||||
saved = {}
|
||||
if profile_env.exists():
|
||||
for line in profile_env.read_text().splitlines():
|
||||
if "=" in line and not line.startswith("#"):
|
||||
k, v = line.split("=", 1)
|
||||
saved[k.strip()] = v.strip()
|
||||
|
||||
config_changed = (
|
||||
saved.get("HINDSIGHT_API_LLM_PROVIDER") != daemon_provider or
|
||||
saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
|
||||
saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key or
|
||||
saved.get("HINDSIGHT_API_LLM_BASE_URL", "") != current_base_url
|
||||
)
|
||||
profile_env = _embedded_profile_env_path(self._config)
|
||||
expected_env = _build_embedded_profile_env(self._config)
|
||||
saved = _load_simple_env(profile_env)
|
||||
config_changed = saved != expected_env
|
||||
|
||||
if config_changed:
|
||||
# Write updated profile .env
|
||||
profile_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
env_lines = (
|
||||
f"HINDSIGHT_API_LLM_PROVIDER={daemon_provider}\n"
|
||||
f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
|
||||
f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
|
||||
f"HINDSIGHT_API_LOG_LEVEL=info\n"
|
||||
)
|
||||
if current_base_url:
|
||||
env_lines += f"HINDSIGHT_API_LLM_BASE_URL={current_base_url}\n"
|
||||
profile_env.write_text(env_lines)
|
||||
profile_env = _materialize_embedded_profile_env(self._config)
|
||||
if client._manager.is_running(profile):
|
||||
with open(log_path, "a") as f:
|
||||
f.write("\n=== Config changed, restarting daemon ===\n")
|
||||
|
|
@ -777,7 +984,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
client = self._get_client()
|
||||
if self._prefetch_method == "reflect":
|
||||
logger.debug("Prefetch: calling reflect (bank=%s, query_len=%d)", self._bank_id, len(query))
|
||||
resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
|
||||
resp = self._run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
|
||||
text = resp.text or ""
|
||||
else:
|
||||
recall_kwargs: dict = {
|
||||
|
|
@ -791,7 +998,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
recall_kwargs["types"] = self._recall_types
|
||||
logger.debug("Prefetch: calling recall (bank=%s, query_len=%d, budget=%s)",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.arecall(**recall_kwargs))
|
||||
resp = self._run_sync(client.arecall(**recall_kwargs))
|
||||
num_results = len(resp.results) if resp.results else 0
|
||||
logger.debug("Prefetch: recall returned %d results", num_results)
|
||||
text = "\n".join(f"- {r.text}" for r in resp.results if r.text) if resp.results else ""
|
||||
|
|
@ -888,7 +1095,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
if session_id:
|
||||
self._session_id = str(session_id).strip()
|
||||
|
||||
turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
|
||||
turn = json.dumps(self._build_turn_messages(user_content, assistant_content), ensure_ascii=False)
|
||||
self._session_turns.append(turn)
|
||||
self._turn_counter += 1
|
||||
self._turn_index = self._turn_counter
|
||||
|
|
@ -902,6 +1109,12 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
len(self._session_turns), sum(len(t) for t in self._session_turns))
|
||||
content = "[" + ",".join(self._session_turns) + "]"
|
||||
|
||||
lineage_tags: list[str] = []
|
||||
if self._session_id:
|
||||
lineage_tags.append(f"session:{self._session_id}")
|
||||
if self._parent_session_id:
|
||||
lineage_tags.append(f"parent:{self._parent_session_id}")
|
||||
|
||||
def _sync():
|
||||
try:
|
||||
client = self._get_client()
|
||||
|
|
@ -912,15 +1125,16 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
message_count=len(self._session_turns) * 2,
|
||||
turn_index=self._turn_index,
|
||||
),
|
||||
tags=lineage_tags or None,
|
||||
)
|
||||
item.pop("bank_id", None)
|
||||
item.pop("retain_async", None)
|
||||
logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
|
||||
self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
|
||||
_run_sync(client.aretain_batch(
|
||||
self._bank_id, self._document_id, self._retain_async, len(content), len(self._session_turns))
|
||||
self._run_sync(client.aretain_batch(
|
||||
bank_id=self._bank_id,
|
||||
items=[item],
|
||||
document_id=self._session_id,
|
||||
document_id=self._document_id,
|
||||
retain_async=self._retain_async,
|
||||
))
|
||||
logger.debug("Hindsight retain succeeded")
|
||||
|
|
@ -957,7 +1171,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
)
|
||||
logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
|
||||
self._bank_id, len(content), context)
|
||||
_run_sync(client.aretain(**retain_kwargs))
|
||||
self._run_sync(client.aretain(**retain_kwargs))
|
||||
logger.debug("Tool hindsight_retain: success")
|
||||
return json.dumps({"result": "Memory stored successfully."})
|
||||
except Exception as e:
|
||||
|
|
@ -980,7 +1194,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
recall_kwargs["types"] = self._recall_types
|
||||
logger.debug("Tool hindsight_recall: bank=%s, query_len=%d, budget=%s",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.arecall(**recall_kwargs))
|
||||
resp = self._run_sync(client.arecall(**recall_kwargs))
|
||||
num_results = len(resp.results) if resp.results else 0
|
||||
logger.debug("Tool hindsight_recall: %d results", num_results)
|
||||
if not resp.results:
|
||||
|
|
@ -998,7 +1212,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
try:
|
||||
logger.debug("Tool hindsight_reflect: bank=%s, query_len=%d, budget=%s",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.areflect(
|
||||
resp = self._run_sync(client.areflect(
|
||||
bank_id=self._bank_id, query=query, budget=self._budget
|
||||
))
|
||||
logger.debug("Tool hindsight_reflect: response_len=%d", len(resp.text or ""))
|
||||
|
|
@ -1011,7 +1225,6 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
|
||||
def shutdown(self) -> None:
|
||||
logger.debug("Hindsight shutdown: waiting for background threads")
|
||||
global _loop, _loop_thread
|
||||
for t in (self._prefetch_thread, self._sync_thread):
|
||||
if t and t.is_alive():
|
||||
t.join(timeout=5.0)
|
||||
|
|
@ -1026,17 +1239,21 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
except RuntimeError:
|
||||
pass
|
||||
else:
|
||||
_run_sync(self._client.aclose())
|
||||
self._run_sync(self._client.aclose())
|
||||
except Exception:
|
||||
pass
|
||||
self._client = None
|
||||
# Stop the background event loop so no tasks are pending at exit
|
||||
if _loop is not None and _loop.is_running():
|
||||
_loop.call_soon_threadsafe(_loop.stop)
|
||||
if _loop_thread is not None:
|
||||
_loop_thread.join(timeout=5.0)
|
||||
_loop = None
|
||||
_loop_thread = None
|
||||
# The module-global background event loop (_loop / _loop_thread)
|
||||
# is intentionally NOT stopped here. It is shared across every
|
||||
# HindsightMemoryProvider instance in the process — the plugin
|
||||
# loader creates a new provider per AIAgent, and the gateway
|
||||
# creates one AIAgent per concurrent chat session. Stopping the
|
||||
# loop from one provider's shutdown() strands the aiohttp
|
||||
# ClientSession + TCPConnector owned by every sibling provider
|
||||
# on a dead loop, which surfaces as the "Unclosed client session"
|
||||
# / "Unclosed connector" warnings reported in #11923. The loop
|
||||
# runs on a daemon thread and is reclaimed on process exit;
|
||||
# per-session cleanup happens via self._client.aclose() above.
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
|
|
|
|||
66
plugins/spotify/__init__.py
Normal file
66
plugins/spotify/__init__.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""Spotify integration plugin — bundled, auto-loaded.
|
||||
|
||||
Registers 7 tools (playback, devices, queue, search, playlists, albums,
|
||||
library) into the ``spotify`` toolset. Each tool's handler is gated by
|
||||
``_check_spotify_available()`` — when the user has not run ``hermes auth
|
||||
spotify``, the tools remain registered (so they appear in ``hermes
|
||||
tools``) but the runtime check prevents dispatch.
|
||||
|
||||
Why a plugin instead of a top-level ``tools/`` file?
|
||||
|
||||
- ``plugins/`` is where third-party service integrations live (see
|
||||
``plugins/image_gen/`` for the backend-provider pattern, ``plugins/
|
||||
disk-cleanup/`` for the standalone pattern). ``tools/`` is reserved
|
||||
for foundational capabilities (terminal, read_file, web_search, etc.).
|
||||
- Mirroring the image_gen plugin layout (``plugins/<category>/<backend>/``
|
||||
for categories, flat ``plugins/<name>/`` for standalones) makes new
|
||||
service integrations a pattern contributors can copy.
|
||||
- Bundled + ``kind: backend`` auto-loads on startup just like image_gen
|
||||
backends — no user opt-in needed, no ``plugins.enabled`` config.
|
||||
|
||||
The Spotify auth flow (``hermes auth spotify``), CLI plumbing, and docs
|
||||
are unchanged. This move is purely structural.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.spotify.tools import (
|
||||
SPOTIFY_ALBUMS_SCHEMA,
|
||||
SPOTIFY_DEVICES_SCHEMA,
|
||||
SPOTIFY_LIBRARY_SCHEMA,
|
||||
SPOTIFY_PLAYBACK_SCHEMA,
|
||||
SPOTIFY_PLAYLISTS_SCHEMA,
|
||||
SPOTIFY_QUEUE_SCHEMA,
|
||||
SPOTIFY_SEARCH_SCHEMA,
|
||||
_check_spotify_available,
|
||||
_handle_spotify_albums,
|
||||
_handle_spotify_devices,
|
||||
_handle_spotify_library,
|
||||
_handle_spotify_playback,
|
||||
_handle_spotify_playlists,
|
||||
_handle_spotify_queue,
|
||||
_handle_spotify_search,
|
||||
)
|
||||
|
||||
_TOOLS = (
|
||||
("spotify_playback", SPOTIFY_PLAYBACK_SCHEMA, _handle_spotify_playback, "🎵"),
|
||||
("spotify_devices", SPOTIFY_DEVICES_SCHEMA, _handle_spotify_devices, "🔈"),
|
||||
("spotify_queue", SPOTIFY_QUEUE_SCHEMA, _handle_spotify_queue, "📻"),
|
||||
("spotify_search", SPOTIFY_SEARCH_SCHEMA, _handle_spotify_search, "🔎"),
|
||||
("spotify_playlists", SPOTIFY_PLAYLISTS_SCHEMA, _handle_spotify_playlists, "📚"),
|
||||
("spotify_albums", SPOTIFY_ALBUMS_SCHEMA, _handle_spotify_albums, "💿"),
|
||||
("spotify_library", SPOTIFY_LIBRARY_SCHEMA, _handle_spotify_library, "❤️"),
|
||||
)
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register all Spotify tools. Called once by the plugin loader."""
|
||||
for name, schema, handler, emoji in _TOOLS:
|
||||
ctx.register_tool(
|
||||
name=name,
|
||||
toolset="spotify",
|
||||
schema=schema,
|
||||
handler=handler,
|
||||
check_fn=_check_spotify_available,
|
||||
emoji=emoji,
|
||||
)
|
||||
435
plugins/spotify/client.py
Normal file
435
plugins/spotify/client.py
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
"""Thin Spotify Web API helper used by Hermes native tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Iterable, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
resolve_spotify_runtime_credentials,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyError(RuntimeError):
|
||||
"""Base Spotify tool error."""
|
||||
|
||||
|
||||
class SpotifyAuthRequiredError(SpotifyError):
|
||||
"""Raised when the user needs to authenticate with Spotify first."""
|
||||
|
||||
|
||||
class SpotifyAPIError(SpotifyError):
|
||||
"""Structured Spotify API failure."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
status_code: Optional[int] = None,
|
||||
response_body: Optional[str] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.response_body = response_body
|
||||
self.path = None
|
||||
|
||||
|
||||
class SpotifyClient:
|
||||
def __init__(self) -> None:
|
||||
self._runtime = self._resolve_runtime(refresh_if_expiring=True)
|
||||
|
||||
def _resolve_runtime(self, *, force_refresh: bool = False, refresh_if_expiring: bool = True) -> Dict[str, Any]:
|
||||
try:
|
||||
return resolve_spotify_runtime_credentials(
|
||||
force_refresh=force_refresh,
|
||||
refresh_if_expiring=refresh_if_expiring,
|
||||
)
|
||||
except AuthError as exc:
|
||||
raise SpotifyAuthRequiredError(str(exc)) from exc
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return str(self._runtime.get("base_url") or "").rstrip("/")
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self._runtime['access_token']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def request(
|
||||
self,
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
json_body: Optional[Dict[str, Any]] = None,
|
||||
allow_retry_on_401: bool = True,
|
||||
empty_response: Optional[Dict[str, Any]] = None,
|
||||
) -> Any:
|
||||
url = f"{self.base_url}{path}"
|
||||
response = httpx.request(
|
||||
method,
|
||||
url,
|
||||
headers=self._headers(),
|
||||
params=_strip_none(params),
|
||||
json=_strip_none(json_body) if json_body is not None else None,
|
||||
timeout=30.0,
|
||||
)
|
||||
if response.status_code == 401 and allow_retry_on_401:
|
||||
self._runtime = self._resolve_runtime(force_refresh=True, refresh_if_expiring=True)
|
||||
return self.request(
|
||||
method,
|
||||
path,
|
||||
params=params,
|
||||
json_body=json_body,
|
||||
allow_retry_on_401=False,
|
||||
)
|
||||
if response.status_code >= 400:
|
||||
self._raise_api_error(response, method=method, path=path)
|
||||
if response.status_code == 204 or not response.content:
|
||||
return empty_response or {"success": True, "status_code": response.status_code, "empty": True}
|
||||
if "application/json" in response.headers.get("content-type", ""):
|
||||
return response.json()
|
||||
return {"success": True, "text": response.text}
|
||||
|
||||
def _raise_api_error(self, response: httpx.Response, *, method: str, path: str) -> None:
|
||||
detail = response.text.strip()
|
||||
message = _friendly_spotify_error_message(
|
||||
status_code=response.status_code,
|
||||
detail=_extract_spotify_error_detail(response, fallback=detail),
|
||||
method=method,
|
||||
path=path,
|
||||
retry_after=response.headers.get("Retry-After"),
|
||||
)
|
||||
error = SpotifyAPIError(message, status_code=response.status_code, response_body=detail)
|
||||
error.path = path
|
||||
raise error
|
||||
|
||||
def get_devices(self) -> Any:
|
||||
return self.request("GET", "/me/player/devices")
|
||||
|
||||
def transfer_playback(self, *, device_id: str, play: bool = False) -> Any:
|
||||
return self.request("PUT", "/me/player", json_body={
|
||||
"device_ids": [device_id],
|
||||
"play": play,
|
||||
})
|
||||
|
||||
def get_playback_state(self, *, market: Optional[str] = None) -> Any:
|
||||
return self.request(
|
||||
"GET",
|
||||
"/me/player",
|
||||
params={"market": market},
|
||||
empty_response={
|
||||
"status_code": 204,
|
||||
"empty": True,
|
||||
"message": "No active Spotify playback session was found. Open Spotify on a device and start playback, or transfer playback to an available device.",
|
||||
},
|
||||
)
|
||||
|
||||
def get_currently_playing(self, *, market: Optional[str] = None) -> Any:
|
||||
return self.request(
|
||||
"GET",
|
||||
"/me/player/currently-playing",
|
||||
params={"market": market},
|
||||
empty_response={
|
||||
"status_code": 204,
|
||||
"empty": True,
|
||||
"message": "Spotify is not currently playing anything. Start playback in Spotify and try again.",
|
||||
},
|
||||
)
|
||||
|
||||
def start_playback(
|
||||
self,
|
||||
*,
|
||||
device_id: Optional[str] = None,
|
||||
context_uri: Optional[str] = None,
|
||||
uris: Optional[list[str]] = None,
|
||||
offset: Optional[Dict[str, Any]] = None,
|
||||
position_ms: Optional[int] = None,
|
||||
) -> Any:
|
||||
return self.request(
|
||||
"PUT",
|
||||
"/me/player/play",
|
||||
params={"device_id": device_id},
|
||||
json_body={
|
||||
"context_uri": context_uri,
|
||||
"uris": uris,
|
||||
"offset": offset,
|
||||
"position_ms": position_ms,
|
||||
},
|
||||
)
|
||||
|
||||
def pause_playback(self, *, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/pause", params={"device_id": device_id})
|
||||
|
||||
def skip_next(self, *, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("POST", "/me/player/next", params={"device_id": device_id})
|
||||
|
||||
def skip_previous(self, *, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("POST", "/me/player/previous", params={"device_id": device_id})
|
||||
|
||||
def seek(self, *, position_ms: int, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/seek", params={
|
||||
"position_ms": position_ms,
|
||||
"device_id": device_id,
|
||||
})
|
||||
|
||||
def set_repeat(self, *, state: str, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/repeat", params={"state": state, "device_id": device_id})
|
||||
|
||||
def set_shuffle(self, *, state: bool, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/shuffle", params={"state": str(bool(state)).lower(), "device_id": device_id})
|
||||
|
||||
def set_volume(self, *, volume_percent: int, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/volume", params={
|
||||
"volume_percent": volume_percent,
|
||||
"device_id": device_id,
|
||||
})
|
||||
|
||||
def get_queue(self) -> Any:
|
||||
return self.request("GET", "/me/player/queue")
|
||||
|
||||
def add_to_queue(self, *, uri: str, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("POST", "/me/player/queue", params={"uri": uri, "device_id": device_id})
|
||||
|
||||
def search(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
search_types: list[str],
|
||||
limit: int = 10,
|
||||
offset: int = 0,
|
||||
market: Optional[str] = None,
|
||||
include_external: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("GET", "/search", params={
|
||||
"q": query,
|
||||
"type": ",".join(search_types),
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"market": market,
|
||||
"include_external": include_external,
|
||||
})
|
||||
|
||||
def get_my_playlists(self, *, limit: int = 20, offset: int = 0) -> Any:
|
||||
return self.request("GET", "/me/playlists", params={"limit": limit, "offset": offset})
|
||||
|
||||
def get_playlist(self, *, playlist_id: str, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", f"/playlists/{playlist_id}", params={"market": market})
|
||||
|
||||
def create_playlist(
|
||||
self,
|
||||
*,
|
||||
name: str,
|
||||
public: bool = False,
|
||||
collaborative: bool = False,
|
||||
description: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("POST", "/me/playlists", json_body={
|
||||
"name": name,
|
||||
"public": public,
|
||||
"collaborative": collaborative,
|
||||
"description": description,
|
||||
})
|
||||
|
||||
def add_playlist_items(
|
||||
self,
|
||||
*,
|
||||
playlist_id: str,
|
||||
uris: list[str],
|
||||
position: Optional[int] = None,
|
||||
) -> Any:
|
||||
return self.request("POST", f"/playlists/{playlist_id}/items", json_body={
|
||||
"uris": uris,
|
||||
"position": position,
|
||||
})
|
||||
|
||||
def remove_playlist_items(
|
||||
self,
|
||||
*,
|
||||
playlist_id: str,
|
||||
uris: list[str],
|
||||
snapshot_id: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("DELETE", f"/playlists/{playlist_id}/items", json_body={
|
||||
"items": [{"uri": uri} for uri in uris],
|
||||
"snapshot_id": snapshot_id,
|
||||
})
|
||||
|
||||
def update_playlist_details(
|
||||
self,
|
||||
*,
|
||||
playlist_id: str,
|
||||
name: Optional[str] = None,
|
||||
public: Optional[bool] = None,
|
||||
collaborative: Optional[bool] = None,
|
||||
description: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("PUT", f"/playlists/{playlist_id}", json_body={
|
||||
"name": name,
|
||||
"public": public,
|
||||
"collaborative": collaborative,
|
||||
"description": description,
|
||||
})
|
||||
|
||||
def get_album(self, *, album_id: str, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", f"/albums/{album_id}", params={"market": market})
|
||||
|
||||
def get_album_tracks(self, *, album_id: str, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", f"/albums/{album_id}/tracks", params={
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"market": market,
|
||||
})
|
||||
|
||||
def get_saved_tracks(self, *, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", "/me/tracks", params={"limit": limit, "offset": offset, "market": market})
|
||||
|
||||
def save_library_items(self, *, uris: list[str]) -> Any:
|
||||
return self.request("PUT", "/me/library", params={"uris": ",".join(uris)})
|
||||
|
||||
def library_contains(self, *, uris: list[str]) -> Any:
|
||||
return self.request("GET", "/me/library/contains", params={"uris": ",".join(uris)})
|
||||
|
||||
def get_saved_albums(self, *, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", "/me/albums", params={"limit": limit, "offset": offset, "market": market})
|
||||
|
||||
def remove_saved_tracks(self, *, track_ids: list[str]) -> Any:
|
||||
uris = [f"spotify:track:{track_id}" for track_id in track_ids]
|
||||
return self.request("DELETE", "/me/library", params={"uris": ",".join(uris)})
|
||||
|
||||
def remove_saved_albums(self, *, album_ids: list[str]) -> Any:
|
||||
uris = [f"spotify:album:{album_id}" for album_id in album_ids]
|
||||
return self.request("DELETE", "/me/library", params={"uris": ",".join(uris)})
|
||||
|
||||
def get_recently_played(
|
||||
self,
|
||||
*,
|
||||
limit: int = 20,
|
||||
after: Optional[int] = None,
|
||||
before: Optional[int] = None,
|
||||
) -> Any:
|
||||
return self.request("GET", "/me/player/recently-played", params={
|
||||
"limit": limit,
|
||||
"after": after,
|
||||
"before": before,
|
||||
})
|
||||
|
||||
|
||||
def _extract_spotify_error_detail(response: httpx.Response, *, fallback: str) -> str:
|
||||
detail = fallback
|
||||
try:
|
||||
payload = response.json()
|
||||
if isinstance(payload, dict):
|
||||
error_obj = payload.get("error")
|
||||
if isinstance(error_obj, dict):
|
||||
detail = str(error_obj.get("message") or detail)
|
||||
elif isinstance(error_obj, str):
|
||||
detail = error_obj
|
||||
except Exception:
|
||||
pass
|
||||
return detail.strip()
|
||||
|
||||
|
||||
def _friendly_spotify_error_message(
|
||||
*,
|
||||
status_code: int,
|
||||
detail: str,
|
||||
method: str,
|
||||
path: str,
|
||||
retry_after: Optional[str],
|
||||
) -> str:
|
||||
normalized_detail = detail.lower()
|
||||
is_playback_path = path.startswith("/me/player")
|
||||
|
||||
if status_code == 401:
|
||||
return "Spotify authentication failed or expired. Run `hermes auth spotify` again."
|
||||
|
||||
if status_code == 403:
|
||||
if is_playback_path:
|
||||
return (
|
||||
"Spotify rejected this playback request. Playback control usually requires a Spotify Premium account "
|
||||
"and an active Spotify Connect device."
|
||||
)
|
||||
if "scope" in normalized_detail or "permission" in normalized_detail:
|
||||
return "Spotify rejected the request because the current auth scope is insufficient. Re-run `hermes auth spotify` to refresh permissions."
|
||||
return "Spotify rejected the request. The account may not have permission for this action."
|
||||
|
||||
if status_code == 404:
|
||||
if is_playback_path:
|
||||
return "Spotify could not find an active playback device or player session for this request."
|
||||
return "Spotify resource not found."
|
||||
|
||||
if status_code == 429:
|
||||
message = "Spotify rate limit exceeded."
|
||||
if retry_after:
|
||||
message += f" Retry after {retry_after} seconds."
|
||||
return message
|
||||
|
||||
if detail:
|
||||
return detail
|
||||
return f"Spotify API request failed with status {status_code}."
|
||||
|
||||
|
||||
def _strip_none(payload: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
if not payload:
|
||||
return {}
|
||||
return {key: value for key, value in payload.items() if value is not None}
|
||||
|
||||
|
||||
def normalize_spotify_id(value: str, expected_type: Optional[str] = None) -> str:
|
||||
cleaned = (value or "").strip()
|
||||
if not cleaned:
|
||||
raise SpotifyError("Spotify id/uri/url is required.")
|
||||
if cleaned.startswith("spotify:"):
|
||||
parts = cleaned.split(":")
|
||||
if len(parts) >= 3:
|
||||
item_type = parts[1]
|
||||
if expected_type and item_type != expected_type:
|
||||
raise SpotifyError(f"Expected a Spotify {expected_type}, got {item_type}.")
|
||||
return parts[2]
|
||||
if "open.spotify.com" in cleaned:
|
||||
parsed = urlparse(cleaned)
|
||||
path_parts = [part for part in parsed.path.split("/") if part]
|
||||
if len(path_parts) >= 2:
|
||||
item_type, item_id = path_parts[0], path_parts[1]
|
||||
if expected_type and item_type != expected_type:
|
||||
raise SpotifyError(f"Expected a Spotify {expected_type}, got {item_type}.")
|
||||
return item_id
|
||||
return cleaned
|
||||
|
||||
|
||||
def normalize_spotify_uri(value: str, expected_type: Optional[str] = None) -> str:
|
||||
cleaned = (value or "").strip()
|
||||
if not cleaned:
|
||||
raise SpotifyError("Spotify URI/url/id is required.")
|
||||
if cleaned.startswith("spotify:"):
|
||||
if expected_type:
|
||||
parts = cleaned.split(":")
|
||||
if len(parts) >= 3 and parts[1] != expected_type:
|
||||
raise SpotifyError(f"Expected a Spotify {expected_type}, got {parts[1]}.")
|
||||
return cleaned
|
||||
item_id = normalize_spotify_id(cleaned, expected_type)
|
||||
if expected_type:
|
||||
return f"spotify:{expected_type}:{item_id}"
|
||||
return cleaned
|
||||
|
||||
|
||||
def normalize_spotify_uris(values: Iterable[str], expected_type: Optional[str] = None) -> list[str]:
|
||||
uris: list[str] = []
|
||||
for value in values:
|
||||
uri = normalize_spotify_uri(str(value), expected_type)
|
||||
if uri not in uris:
|
||||
uris.append(uri)
|
||||
if not uris:
|
||||
raise SpotifyError("At least one Spotify item is required.")
|
||||
return uris
|
||||
|
||||
|
||||
def compact_json(data: Any) -> str:
|
||||
return json.dumps(data, ensure_ascii=False)
|
||||
13
plugins/spotify/plugin.yaml
Normal file
13
plugins/spotify/plugin.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
name: spotify
|
||||
version: 1.0.0
|
||||
description: "Native Spotify integration — 7 tools (playback, devices, queue, search, playlists, albums, library) using Spotify Web API + PKCE OAuth. Auth via `hermes auth spotify`. Tools gate on `providers.spotify` in ~/.hermes/auth.json."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
provides_tools:
|
||||
- spotify_playback
|
||||
- spotify_devices
|
||||
- spotify_queue
|
||||
- spotify_search
|
||||
- spotify_playlists
|
||||
- spotify_albums
|
||||
- spotify_library
|
||||
454
plugins/spotify/tools.py
Normal file
454
plugins/spotify/tools.py
Normal file
|
|
@ -0,0 +1,454 @@
|
|||
"""Native Spotify tools for Hermes (registered via plugins/spotify)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from hermes_cli.auth import get_auth_status
|
||||
from plugins.spotify.client import (
|
||||
SpotifyAPIError,
|
||||
SpotifyAuthRequiredError,
|
||||
SpotifyClient,
|
||||
SpotifyError,
|
||||
normalize_spotify_id,
|
||||
normalize_spotify_uri,
|
||||
normalize_spotify_uris,
|
||||
)
|
||||
from tools.registry import tool_error, tool_result
|
||||
|
||||
|
||||
def _check_spotify_available() -> bool:
|
||||
try:
|
||||
return bool(get_auth_status("spotify").get("logged_in"))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _spotify_client() -> SpotifyClient:
|
||||
return SpotifyClient()
|
||||
|
||||
|
||||
def _spotify_tool_error(exc: Exception) -> str:
|
||||
if isinstance(exc, (SpotifyError, SpotifyAuthRequiredError)):
|
||||
return tool_error(str(exc))
|
||||
if isinstance(exc, SpotifyAPIError):
|
||||
return tool_error(str(exc), status_code=exc.status_code)
|
||||
return tool_error(f"Spotify tool failed: {type(exc).__name__}: {exc}")
|
||||
|
||||
|
||||
def _coerce_limit(raw: Any, *, default: int = 20, minimum: int = 1, maximum: int = 50) -> int:
|
||||
try:
|
||||
value = int(raw)
|
||||
except Exception:
|
||||
value = default
|
||||
return max(minimum, min(maximum, value))
|
||||
|
||||
|
||||
def _coerce_bool(raw: Any, default: bool = False) -> bool:
|
||||
if isinstance(raw, bool):
|
||||
return raw
|
||||
if isinstance(raw, str):
|
||||
cleaned = raw.strip().lower()
|
||||
if cleaned in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if cleaned in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _as_list(raw: Any) -> List[str]:
|
||||
if raw is None:
|
||||
return []
|
||||
if isinstance(raw, list):
|
||||
return [str(item).strip() for item in raw if str(item).strip()]
|
||||
return [str(raw).strip()] if str(raw).strip() else []
|
||||
|
||||
|
||||
def _describe_empty_playback(payload: Any, *, action: str) -> dict | None:
|
||||
if not isinstance(payload, dict) or not payload.get("empty"):
|
||||
return None
|
||||
if action == "get_currently_playing":
|
||||
return {
|
||||
"success": True,
|
||||
"action": action,
|
||||
"is_playing": False,
|
||||
"status_code": payload.get("status_code", 204),
|
||||
"message": payload.get("message") or "Spotify is not currently playing anything.",
|
||||
}
|
||||
if action == "get_state":
|
||||
return {
|
||||
"success": True,
|
||||
"action": action,
|
||||
"has_active_device": False,
|
||||
"status_code": payload.get("status_code", 204),
|
||||
"message": payload.get("message") or "No active Spotify playback session was found.",
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _handle_spotify_playback(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "get_state").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "get_state":
|
||||
payload = client.get_playback_state(market=args.get("market"))
|
||||
empty_result = _describe_empty_playback(payload, action=action)
|
||||
return tool_result(empty_result or payload)
|
||||
if action == "get_currently_playing":
|
||||
payload = client.get_currently_playing(market=args.get("market"))
|
||||
empty_result = _describe_empty_playback(payload, action=action)
|
||||
return tool_result(empty_result or payload)
|
||||
if action == "play":
|
||||
offset = args.get("offset")
|
||||
if isinstance(offset, dict):
|
||||
payload_offset = {k: v for k, v in offset.items() if v is not None}
|
||||
else:
|
||||
payload_offset = None
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris")), "track") if args.get("uris") else None
|
||||
context_uri = None
|
||||
if args.get("context_uri"):
|
||||
raw_context = str(args.get("context_uri"))
|
||||
context_type = None
|
||||
if raw_context.startswith("spotify:album:") or "/album/" in raw_context:
|
||||
context_type = "album"
|
||||
elif raw_context.startswith("spotify:playlist:") or "/playlist/" in raw_context:
|
||||
context_type = "playlist"
|
||||
elif raw_context.startswith("spotify:artist:") or "/artist/" in raw_context:
|
||||
context_type = "artist"
|
||||
context_uri = normalize_spotify_uri(raw_context, context_type)
|
||||
result = client.start_playback(
|
||||
device_id=args.get("device_id"),
|
||||
context_uri=context_uri,
|
||||
uris=uris,
|
||||
offset=payload_offset,
|
||||
position_ms=args.get("position_ms"),
|
||||
)
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "pause":
|
||||
result = client.pause_playback(device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "next":
|
||||
result = client.skip_next(device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "previous":
|
||||
result = client.skip_previous(device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "seek":
|
||||
if args.get("position_ms") is None:
|
||||
return tool_error("position_ms is required for action='seek'")
|
||||
result = client.seek(position_ms=int(args["position_ms"]), device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "set_repeat":
|
||||
state = str(args.get("state") or "").strip().lower()
|
||||
if state not in {"track", "context", "off"}:
|
||||
return tool_error("state must be one of: track, context, off")
|
||||
result = client.set_repeat(state=state, device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "set_shuffle":
|
||||
result = client.set_shuffle(state=_coerce_bool(args.get("state")), device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "set_volume":
|
||||
if args.get("volume_percent") is None:
|
||||
return tool_error("volume_percent is required for action='set_volume'")
|
||||
result = client.set_volume(volume_percent=max(0, min(100, int(args["volume_percent"]))), device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "recently_played":
|
||||
after = args.get("after")
|
||||
before = args.get("before")
|
||||
if after and before:
|
||||
return tool_error("Provide only one of 'after' or 'before'")
|
||||
return tool_result(client.get_recently_played(
|
||||
limit=_coerce_limit(args.get("limit"), default=20),
|
||||
after=int(after) if after is not None else None,
|
||||
before=int(before) if before is not None else None,
|
||||
))
|
||||
return tool_error(f"Unknown spotify_playback action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_devices(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "list").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "list":
|
||||
return tool_result(client.get_devices())
|
||||
if action == "transfer":
|
||||
device_id = str(args.get("device_id") or "").strip()
|
||||
if not device_id:
|
||||
return tool_error("device_id is required for action='transfer'")
|
||||
result = client.transfer_playback(device_id=device_id, play=_coerce_bool(args.get("play")))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
return tool_error(f"Unknown spotify_devices action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_queue(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "get").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "get":
|
||||
return tool_result(client.get_queue())
|
||||
if action == "add":
|
||||
uri = normalize_spotify_uri(str(args.get("uri") or ""), None)
|
||||
result = client.add_to_queue(uri=uri, device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "uri": uri, "result": result})
|
||||
return tool_error(f"Unknown spotify_queue action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_search(args: dict, **kw) -> str:
|
||||
client = _spotify_client()
|
||||
query = str(args.get("query") or "").strip()
|
||||
if not query:
|
||||
return tool_error("query is required")
|
||||
raw_types = _as_list(args.get("types") or args.get("type") or ["track"])
|
||||
search_types = [value.lower() for value in raw_types if value.lower() in {"album", "artist", "playlist", "track", "show", "episode", "audiobook"}]
|
||||
if not search_types:
|
||||
return tool_error("types must contain one or more of: album, artist, playlist, track, show, episode, audiobook")
|
||||
try:
|
||||
return tool_result(client.search(
|
||||
query=query,
|
||||
search_types=search_types,
|
||||
limit=_coerce_limit(args.get("limit"), default=10),
|
||||
offset=max(0, int(args.get("offset") or 0)),
|
||||
market=args.get("market"),
|
||||
include_external=args.get("include_external"),
|
||||
))
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_playlists(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "list").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "list":
|
||||
return tool_result(client.get_my_playlists(
|
||||
limit=_coerce_limit(args.get("limit"), default=20),
|
||||
offset=max(0, int(args.get("offset") or 0)),
|
||||
))
|
||||
if action == "get":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
return tool_result(client.get_playlist(playlist_id=playlist_id, market=args.get("market")))
|
||||
if action == "create":
|
||||
name = str(args.get("name") or "").strip()
|
||||
if not name:
|
||||
return tool_error("name is required for action='create'")
|
||||
return tool_result(client.create_playlist(
|
||||
name=name,
|
||||
public=_coerce_bool(args.get("public")),
|
||||
collaborative=_coerce_bool(args.get("collaborative")),
|
||||
description=args.get("description"),
|
||||
))
|
||||
if action == "add_items":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris")))
|
||||
return tool_result(client.add_playlist_items(
|
||||
playlist_id=playlist_id,
|
||||
uris=uris,
|
||||
position=args.get("position"),
|
||||
))
|
||||
if action == "remove_items":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris")))
|
||||
return tool_result(client.remove_playlist_items(
|
||||
playlist_id=playlist_id,
|
||||
uris=uris,
|
||||
snapshot_id=args.get("snapshot_id"),
|
||||
))
|
||||
if action == "update_details":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
return tool_result(client.update_playlist_details(
|
||||
playlist_id=playlist_id,
|
||||
name=args.get("name"),
|
||||
public=args.get("public"),
|
||||
collaborative=args.get("collaborative"),
|
||||
description=args.get("description"),
|
||||
))
|
||||
return tool_error(f"Unknown spotify_playlists action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_albums(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "get").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
album_id = normalize_spotify_id(str(args.get("album_id") or args.get("id") or ""), "album")
|
||||
if action == "get":
|
||||
return tool_result(client.get_album(album_id=album_id, market=args.get("market")))
|
||||
if action == "tracks":
|
||||
return tool_result(client.get_album_tracks(
|
||||
album_id=album_id,
|
||||
limit=_coerce_limit(args.get("limit"), default=20),
|
||||
offset=max(0, int(args.get("offset") or 0)),
|
||||
market=args.get("market"),
|
||||
))
|
||||
return tool_error(f"Unknown spotify_albums action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_library(args: dict, **kw) -> str:
|
||||
"""Unified handler for saved tracks + saved albums (formerly two tools)."""
|
||||
kind = str(args.get("kind") or "").strip().lower()
|
||||
if kind not in {"tracks", "albums"}:
|
||||
return tool_error("kind must be one of: tracks, albums")
|
||||
action = str(args.get("action") or "list").strip().lower()
|
||||
item_type = "track" if kind == "tracks" else "album"
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "list":
|
||||
limit = _coerce_limit(args.get("limit"), default=20)
|
||||
offset = max(0, int(args.get("offset") or 0))
|
||||
market = args.get("market")
|
||||
if kind == "tracks":
|
||||
return tool_result(client.get_saved_tracks(limit=limit, offset=offset, market=market))
|
||||
return tool_result(client.get_saved_albums(limit=limit, offset=offset, market=market))
|
||||
if action == "save":
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris") or args.get("items")), item_type)
|
||||
return tool_result(client.save_library_items(uris=uris))
|
||||
if action == "remove":
|
||||
ids = [normalize_spotify_id(item, item_type) for item in _as_list(args.get("ids") or args.get("items"))]
|
||||
if not ids:
|
||||
return tool_error("ids/items is required for action='remove'")
|
||||
if kind == "tracks":
|
||||
return tool_result(client.remove_saved_tracks(track_ids=ids))
|
||||
return tool_result(client.remove_saved_albums(album_ids=ids))
|
||||
return tool_error(f"Unknown spotify_library action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
COMMON_STRING = {"type": "string"}
|
||||
|
||||
SPOTIFY_PLAYBACK_SCHEMA = {
|
||||
"name": "spotify_playback",
|
||||
"description": "Control Spotify playback, inspect the active playback state, or fetch recently played tracks.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["get_state", "get_currently_playing", "play", "pause", "next", "previous", "seek", "set_repeat", "set_shuffle", "set_volume", "recently_played"]},
|
||||
"device_id": COMMON_STRING,
|
||||
"market": COMMON_STRING,
|
||||
"context_uri": COMMON_STRING,
|
||||
"uris": {"type": "array", "items": COMMON_STRING},
|
||||
"offset": {"type": "object"},
|
||||
"position_ms": {"type": "integer"},
|
||||
"state": {"description": "For set_repeat use track/context/off. For set_shuffle use boolean-like true/false.", "oneOf": [{"type": "string"}, {"type": "boolean"}]},
|
||||
"volume_percent": {"type": "integer"},
|
||||
"limit": {"type": "integer", "description": "For recently_played: number of tracks (max 50)"},
|
||||
"after": {"type": "integer", "description": "For recently_played: Unix ms cursor (after this timestamp)"},
|
||||
"before": {"type": "integer", "description": "For recently_played: Unix ms cursor (before this timestamp)"},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_DEVICES_SCHEMA = {
|
||||
"name": "spotify_devices",
|
||||
"description": "List Spotify Connect devices or transfer playback to a different device.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["list", "transfer"]},
|
||||
"device_id": COMMON_STRING,
|
||||
"play": {"type": "boolean"},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_QUEUE_SCHEMA = {
|
||||
"name": "spotify_queue",
|
||||
"description": "Inspect the user's Spotify queue or add an item to it.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["get", "add"]},
|
||||
"uri": COMMON_STRING,
|
||||
"device_id": COMMON_STRING,
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_SEARCH_SCHEMA = {
|
||||
"name": "spotify_search",
|
||||
"description": "Search the Spotify catalog for tracks, albums, artists, playlists, shows, or episodes.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": COMMON_STRING,
|
||||
"types": {"type": "array", "items": COMMON_STRING},
|
||||
"type": COMMON_STRING,
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
"market": COMMON_STRING,
|
||||
"include_external": COMMON_STRING,
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_PLAYLISTS_SCHEMA = {
|
||||
"name": "spotify_playlists",
|
||||
"description": "List, inspect, create, update, and modify Spotify playlists.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["list", "get", "create", "add_items", "remove_items", "update_details"]},
|
||||
"playlist_id": COMMON_STRING,
|
||||
"market": COMMON_STRING,
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
"name": COMMON_STRING,
|
||||
"description": COMMON_STRING,
|
||||
"public": {"type": "boolean"},
|
||||
"collaborative": {"type": "boolean"},
|
||||
"uris": {"type": "array", "items": COMMON_STRING},
|
||||
"position": {"type": "integer"},
|
||||
"snapshot_id": COMMON_STRING,
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_ALBUMS_SCHEMA = {
|
||||
"name": "spotify_albums",
|
||||
"description": "Fetch Spotify album metadata or album tracks.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["get", "tracks"]},
|
||||
"album_id": COMMON_STRING,
|
||||
"id": COMMON_STRING,
|
||||
"market": COMMON_STRING,
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_LIBRARY_SCHEMA = {
|
||||
"name": "spotify_library",
|
||||
"description": "List, save, or remove the user's saved Spotify tracks or albums. Use `kind` to select which.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {"type": "string", "enum": ["tracks", "albums"], "description": "Which library to operate on"},
|
||||
"action": {"type": "string", "enum": ["list", "save", "remove"]},
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
"market": COMMON_STRING,
|
||||
"uris": {"type": "array", "items": COMMON_STRING},
|
||||
"ids": {"type": "array", "items": COMMON_STRING},
|
||||
"items": {"type": "array", "items": COMMON_STRING},
|
||||
},
|
||||
"required": ["kind", "action"],
|
||||
},
|
||||
}
|
||||
70
plugins/strike-freedom-cockpit/README.md
Normal file
70
plugins/strike-freedom-cockpit/README.md
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# Strike Freedom Cockpit — dashboard skin demo
|
||||
|
||||
Demonstrates how the dashboard skin+plugin system can be used to build a
|
||||
fully custom cockpit-style reskin without touching the core dashboard.
|
||||
|
||||
Two pieces:
|
||||
|
||||
- `theme/strike-freedom.yaml` — a dashboard theme YAML that paints the
|
||||
palette, typography, layout variant (`cockpit`), component chrome
|
||||
(notched card corners, scanlines, accent colors), and declares asset
|
||||
slots (`hero`, `crest`, `bg`).
|
||||
- `dashboard/` — a plugin that populates the `sidebar`, `header-left`,
|
||||
and `footer-right` slots reserved by the cockpit layout. The sidebar
|
||||
renders an MS-STATUS panel with segmented telemetry bars driven by
|
||||
real agent status; the header-left injects a COMPASS crest; the
|
||||
footer-right replaces the default org tagline.
|
||||
|
||||
## Install
|
||||
|
||||
1. **Theme** — copy the theme YAML into your Hermes home:
|
||||
|
||||
```
|
||||
cp theme/strike-freedom.yaml ~/.hermes/dashboard-themes/
|
||||
```
|
||||
|
||||
2. **Plugin** — the `dashboard/` directory gets auto-discovered because
|
||||
it lives under `plugins/` in the repo. On a user install, copy the
|
||||
whole plugin directory into `~/.hermes/plugins/`:
|
||||
|
||||
```
|
||||
cp -r . ~/.hermes/plugins/strike-freedom-cockpit
|
||||
```
|
||||
|
||||
3. Restart the web UI (or `GET /api/dashboard/plugins/rescan`), open it,
|
||||
pick **Strike Freedom** from the theme switcher.
|
||||
|
||||
## Customising the artwork
|
||||
|
||||
The sidebar plugin reads `--theme-asset-hero` and `--theme-asset-crest`
|
||||
from the active theme. Drop your own URLs into the theme YAML:
|
||||
|
||||
```yaml
|
||||
assets:
|
||||
hero: "/my-images/strike-freedom.png"
|
||||
crest: "/my-images/compass-crest.svg"
|
||||
bg: "/my-images/cosmic-era-bg.jpg"
|
||||
```
|
||||
|
||||
The plugin reads those at render time — no plugin code changes needed
|
||||
to swap artwork across themes.
|
||||
|
||||
## What this demo proves
|
||||
|
||||
The dashboard skin+plugin system supports (ref: `web/src/themes/types.ts`,
|
||||
`web/src/plugins/slots.ts`):
|
||||
|
||||
- Palette, typography, font URLs, density, radius — already present
|
||||
- **Asset URLs exposed as CSS vars** (bg / hero / crest / logo /
|
||||
sidebar / header + arbitrary `custom.*`)
|
||||
- **Raw `customCSS` blocks** injected as scoped `<style>` tags
|
||||
- **Per-component style overrides** (card / header / sidebar / backdrop /
|
||||
tab / progress / footer / badge / page) via CSS vars
|
||||
- **`layoutVariant`** — `standard`, `cockpit`, or `tiled`
|
||||
- **Plugin slots** — 10 named shell slots plugins can inject into
|
||||
(`backdrop`, `header-left/right/banner`, `sidebar`, `pre-main`,
|
||||
`post-main`, `footer-left/right`, `overlay`)
|
||||
- **Route overrides** — plugins can replace a built-in page entirely
|
||||
(`tab.override: "/"`) instead of just adding a tab
|
||||
- **Hidden plugins** — slot-only plugins that never show in the nav
|
||||
(`tab.hidden: true`) — as used here
|
||||
309
plugins/strike-freedom-cockpit/dashboard/dist/index.js
vendored
Normal file
309
plugins/strike-freedom-cockpit/dashboard/dist/index.js
vendored
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
/**
|
||||
* Strike Freedom Cockpit — dashboard plugin demo.
|
||||
*
|
||||
* A slot-only plugin (manifest sets tab.hidden: true) that populates
|
||||
* three shell slots when the user has the ``strike-freedom`` theme
|
||||
* selected (or any theme that picks layoutVariant: cockpit):
|
||||
*
|
||||
* - sidebar → MS-STATUS panel: ENERGY / SHIELD / POWER bars,
|
||||
* ZGMF-X20A identity line, pilot block, hero
|
||||
* render (from --theme-asset-hero when the theme
|
||||
* provides one).
|
||||
* - header-left → COMPASS faction crest (uses --theme-asset-crest
|
||||
* if provided, falls back to a geometric SVG).
|
||||
* - footer-right → COSMIC ERA tagline that replaces the default
|
||||
* footer org line.
|
||||
*
|
||||
* The plugin demonstrates every extension point added alongside the
|
||||
* slot system: registerSlot, tab.hidden, reading theme asset CSS vars
|
||||
* from plugin code, and rendering above the built-in route content.
|
||||
*/
|
||||
(function () {
|
||||
"use strict";
|
||||
|
||||
const SDK = window.__HERMES_PLUGIN_SDK__;
|
||||
const PLUGINS = window.__HERMES_PLUGINS__;
|
||||
if (!SDK || !PLUGINS || !PLUGINS.registerSlot) {
|
||||
// Old dashboard bundle without slot support — bail silently rather
|
||||
// than breaking the page.
|
||||
return;
|
||||
}
|
||||
|
||||
const { React } = SDK;
|
||||
const { useState, useEffect } = SDK.hooks;
|
||||
const { api } = SDK;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
/** Read a CSS custom property from :root. Empty string when unset. */
|
||||
function cssVar(name) {
|
||||
if (typeof document === "undefined") return "";
|
||||
return getComputedStyle(document.documentElement).getPropertyValue(name).trim();
|
||||
}
|
||||
|
||||
/** Segmented chip progress bar — 10 cells filled proportionally to value. */
|
||||
function TelemetryBar(props) {
|
||||
const { label, value, color } = props;
|
||||
const cells = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const filled = Math.round(value / 10) > i;
|
||||
cells.push(
|
||||
React.createElement("span", {
|
||||
key: i,
|
||||
style: {
|
||||
flex: 1,
|
||||
height: 8,
|
||||
background: filled ? color : "rgba(255,255,255,0.06)",
|
||||
transition: "background 200ms",
|
||||
clipPath: "polygon(2px 0, 100% 0, calc(100% - 2px) 100%, 0 100%)",
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
return React.createElement(
|
||||
"div",
|
||||
{ style: { display: "flex", flexDirection: "column", gap: 4 } },
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
display: "flex",
|
||||
justifyContent: "space-between",
|
||||
fontSize: "0.65rem",
|
||||
letterSpacing: "0.12em",
|
||||
opacity: 0.75,
|
||||
},
|
||||
},
|
||||
React.createElement("span", null, label),
|
||||
React.createElement("span", { style: { color, fontWeight: 700 } }, value + "%"),
|
||||
),
|
||||
React.createElement(
|
||||
"div",
|
||||
{ style: { display: "flex", gap: 2 } },
|
||||
cells,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Sidebar: MS-STATUS panel
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function SidebarSlot() {
|
||||
// Pull live-ish numbers from the status API so the plugin isn't just
|
||||
// a static decoration. Fall back to full bars if the API is slow /
|
||||
// unavailable.
|
||||
const [status, setStatus] = useState(null);
|
||||
useEffect(function () {
|
||||
let cancel = false;
|
||||
api.getStatus()
|
||||
.then(function (s) { if (!cancel) setStatus(s); })
|
||||
.catch(function () {});
|
||||
return function () { cancel = true; };
|
||||
}, []);
|
||||
|
||||
// Map real status signals to HUD telemetry. Energy/shield/power
|
||||
// aren't literal concepts on a software agent, so we read them from
|
||||
// adjacent signals: active sessions, gateway connected-platforms,
|
||||
// and agent-online health.
|
||||
const energy = status && status.gateway_online ? 92 : 18;
|
||||
const shield = status && status.connected_platforms
|
||||
? Math.min(100, 40 + (status.connected_platforms.length * 15))
|
||||
: 70;
|
||||
const power = status && status.active_sessions
|
||||
? Math.min(100, 55 + (status.active_sessions.length * 10))
|
||||
: 87;
|
||||
|
||||
const hero = cssVar("--theme-asset-hero");
|
||||
|
||||
return React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
padding: "1rem 0.75rem",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: "1rem",
|
||||
fontFamily: "var(--theme-font-display, sans-serif)",
|
||||
letterSpacing: "0.08em",
|
||||
textTransform: "uppercase",
|
||||
fontSize: "0.65rem",
|
||||
},
|
||||
},
|
||||
// Header line
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
borderBottom: "1px solid rgba(64,200,255,0.3)",
|
||||
paddingBottom: 8,
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 2,
|
||||
},
|
||||
},
|
||||
React.createElement("span", { style: { opacity: 0.6 } }, "ms status"),
|
||||
React.createElement("span", { style: { fontWeight: 700, fontSize: "0.85rem" } }, "zgmf-x20a"),
|
||||
React.createElement("span", { style: { opacity: 0.6, fontSize: "0.6rem" } }, "strike freedom"),
|
||||
),
|
||||
// Hero slot — only renders when the theme provides one.
|
||||
hero
|
||||
? React.createElement("div", {
|
||||
style: {
|
||||
width: "100%",
|
||||
aspectRatio: "3 / 4",
|
||||
backgroundImage: hero,
|
||||
backgroundSize: "contain",
|
||||
backgroundPosition: "center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
opacity: 0.85,
|
||||
},
|
||||
"aria-hidden": true,
|
||||
})
|
||||
: React.createElement("div", {
|
||||
style: {
|
||||
width: "100%",
|
||||
aspectRatio: "3 / 4",
|
||||
border: "1px dashed rgba(64,200,255,0.25)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
fontSize: "0.55rem",
|
||||
opacity: 0.4,
|
||||
},
|
||||
}, "hero slot — set assets.hero in theme"),
|
||||
// Pilot block
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
borderTop: "1px solid rgba(64,200,255,0.18)",
|
||||
borderBottom: "1px solid rgba(64,200,255,0.18)",
|
||||
padding: "8px 0",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 2,
|
||||
},
|
||||
},
|
||||
React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "pilot"),
|
||||
React.createElement("span", { style: { fontWeight: 700 } }, "hermes agent"),
|
||||
React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "compass"),
|
||||
),
|
||||
// Telemetry bars
|
||||
React.createElement(TelemetryBar, { label: "energy", value: energy, color: "#ffce3a" }),
|
||||
React.createElement(TelemetryBar, { label: "shield", value: shield, color: "#3fd3ff" }),
|
||||
React.createElement(TelemetryBar, { label: "power", value: power, color: "#ff3a5e" }),
|
||||
// System online
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
marginTop: 4,
|
||||
padding: "6px 8px",
|
||||
border: "1px solid rgba(74,222,128,0.4)",
|
||||
color: "#4ade80",
|
||||
textAlign: "center",
|
||||
fontWeight: 700,
|
||||
fontSize: "0.6rem",
|
||||
},
|
||||
},
|
||||
status && status.gateway_online ? "system online" : "system offline",
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Header-left: COMPASS crest
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function HeaderCrestSlot() {
|
||||
const crest = cssVar("--theme-asset-crest");
|
||||
const inner = crest
|
||||
? React.createElement("div", {
|
||||
style: {
|
||||
width: 28,
|
||||
height: 28,
|
||||
backgroundImage: crest,
|
||||
backgroundSize: "contain",
|
||||
backgroundPosition: "center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
},
|
||||
"aria-hidden": true,
|
||||
})
|
||||
: React.createElement(
|
||||
"svg",
|
||||
{
|
||||
width: 28,
|
||||
height: 28,
|
||||
viewBox: "0 0 28 28",
|
||||
fill: "none",
|
||||
stroke: "currentColor",
|
||||
strokeWidth: 1.5,
|
||||
"aria-hidden": true,
|
||||
},
|
||||
React.createElement("path", { d: "M14 2 L26 14 L14 26 L2 14 Z" }),
|
||||
React.createElement("path", { d: "M14 8 L20 14 L14 20 L8 14 Z" }),
|
||||
React.createElement("circle", { cx: 14, cy: 14, r: 2, fill: "currentColor" }),
|
||||
);
|
||||
return React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
paddingLeft: 12,
|
||||
paddingRight: 8,
|
||||
color: "var(--color-accent, #3fd3ff)",
|
||||
},
|
||||
},
|
||||
inner,
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Footer-right: COSMIC ERA tagline
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function FooterTaglineSlot() {
|
||||
return React.createElement(
|
||||
"span",
|
||||
{
|
||||
style: {
|
||||
fontFamily: "var(--theme-font-display, sans-serif)",
|
||||
fontSize: "0.6rem",
|
||||
letterSpacing: "0.18em",
|
||||
textTransform: "uppercase",
|
||||
opacity: 0.75,
|
||||
mixBlendMode: "plus-lighter",
|
||||
},
|
||||
},
|
||||
"compass hermes systems / cosmic era 71",
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Hidden tab placeholder — tab.hidden=true means this never renders in
|
||||
// the nav, but we still register something sensible in case someone
|
||||
// manually navigates to /strike-freedom-cockpit (e.g. via a bookmark).
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function HiddenPage() {
|
||||
return React.createElement(
|
||||
"div",
|
||||
{ style: { padding: "2rem", opacity: 0.6, fontSize: "0.8rem" } },
|
||||
"Strike Freedom cockpit is a slot-only plugin — it populates the sidebar, header, and footer instead of showing a tab page.",
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Registration
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const NAME = "strike-freedom-cockpit";
|
||||
PLUGINS.register(NAME, HiddenPage);
|
||||
PLUGINS.registerSlot(NAME, "sidebar", SidebarSlot);
|
||||
PLUGINS.registerSlot(NAME, "header-left", HeaderCrestSlot);
|
||||
PLUGINS.registerSlot(NAME, "footer-right", FooterTaglineSlot);
|
||||
})();
|
||||
14
plugins/strike-freedom-cockpit/dashboard/manifest.json
Normal file
14
plugins/strike-freedom-cockpit/dashboard/manifest.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"name": "strike-freedom-cockpit",
|
||||
"label": "Strike Freedom Cockpit",
|
||||
"description": "MS-STATUS sidebar + header crest for the Strike Freedom theme",
|
||||
"icon": "Shield",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/strike-freedom-cockpit",
|
||||
"position": "end",
|
||||
"hidden": true
|
||||
},
|
||||
"slots": ["sidebar", "header-left", "footer-right"],
|
||||
"entry": "dist/index.js"
|
||||
}
|
||||
126
plugins/strike-freedom-cockpit/theme/strike-freedom.yaml
Normal file
126
plugins/strike-freedom-cockpit/theme/strike-freedom.yaml
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# Strike Freedom — Hermes dashboard theme demo
|
||||
#
|
||||
# Copy this file to ~/.hermes/dashboard-themes/strike-freedom.yaml and
|
||||
# restart the web UI (or hit `/api/dashboard/plugins/rescan`). Pair with
|
||||
# the `strike-freedom-cockpit` plugin (plugins/strike-freedom-cockpit/)
|
||||
# for the full cockpit experience — this theme paints the palette,
|
||||
# chrome, and layout; the plugin supplies the MS-STATUS sidebar + header
|
||||
# crest that the cockpit layout variant reserves space for.
|
||||
#
|
||||
# Demonstrates every theme extension point added alongside the plugin
|
||||
# slot system: palette, typography, layoutVariant, assets, customCSS,
|
||||
# componentStyles, colorOverrides.
|
||||
name: strike-freedom
|
||||
label: "Strike Freedom"
|
||||
description: "Cockpit HUD — deep navy + cyan + gold accents"
|
||||
|
||||
# ------- palette (3-layer) -------
|
||||
palette:
|
||||
background: "#05091a"
|
||||
midground: "#d8f0ff"
|
||||
foreground:
|
||||
hex: "#ffffff"
|
||||
alpha: 0
|
||||
warmGlow: "rgba(255, 199, 55, 0.24)"
|
||||
noiseOpacity: 0.7
|
||||
|
||||
# ------- typography -------
|
||||
typography:
|
||||
fontSans: '"Orbitron", "Eurostile", "Bank Gothic", "Impact", sans-serif'
|
||||
fontMono: '"Share Tech Mono", "JetBrains Mono", ui-monospace, monospace'
|
||||
fontDisplay: '"Orbitron", "Eurostile", "Impact", sans-serif'
|
||||
fontUrl: "https://fonts.googleapis.com/css2?family=Orbitron:wght@400;500;600;700;800&family=Share+Tech+Mono&display=swap"
|
||||
baseSize: "14px"
|
||||
lineHeight: "1.5"
|
||||
letterSpacing: "0.04em"
|
||||
|
||||
# ------- layout -------
|
||||
layout:
|
||||
radius: "0"
|
||||
density: "compact"
|
||||
|
||||
# ``cockpit`` reserves a 260px left rail that the shell renders when the
|
||||
# user is on this theme. A paired plugin populates the rail via the
|
||||
# ``sidebar`` slot; with no plugin the rail shows a placeholder.
|
||||
layoutVariant: cockpit
|
||||
|
||||
# ------- assets -------
|
||||
# Use any URL (https, data:, /dashboard-plugins/...) or a pre-wrapped
|
||||
# ``url(...)``/``linear-gradient(...)`` expression. The shell exposes
|
||||
# each as a CSS var so plugins can read the same imagery.
|
||||
assets:
|
||||
bg: "linear-gradient(140deg, #05091a 0%, #0a1530 55%, #102048 100%)"
|
||||
# Plugin reads --theme-asset-hero / --theme-asset-crest to populate
|
||||
# its sidebar hero render + header crest. Replace these URLs with your
|
||||
# own artwork (copy files into ~/.hermes/dashboard-themes/assets/ and
|
||||
# reference them as /dashboard-themes-assets/strike-freedom/hero.png
|
||||
# once that static route is wired up — for now use inline data URLs or
|
||||
# remote URLs).
|
||||
hero: ""
|
||||
crest: ""
|
||||
|
||||
# ------- component chrome -------
|
||||
# Each bucket's props become CSS vars (--component-<bucket>-<kebab>) that
|
||||
# built-in shell components (Card, header, sidebar, backdrop) consume.
|
||||
componentStyles:
|
||||
card:
|
||||
# Notched corners on the top-left + bottom-right — classic mecha UI.
|
||||
clipPath: "polygon(12px 0, 100% 0, 100% calc(100% - 12px), calc(100% - 12px) 100%, 0 100%, 0 12px)"
|
||||
background: "linear-gradient(180deg, rgba(10, 22, 52, 0.85) 0%, rgba(5, 9, 26, 0.92) 100%)"
|
||||
boxShadow: "inset 0 0 0 1px rgba(64, 200, 255, 0.28), 0 0 18px -6px rgba(64, 200, 255, 0.4)"
|
||||
header:
|
||||
background: "linear-gradient(180deg, rgba(16, 32, 72, 0.95) 0%, rgba(5, 9, 26, 0.9) 100%)"
|
||||
sidebar:
|
||||
background: "linear-gradient(180deg, rgba(8, 18, 42, 0.88) 0%, rgba(5, 9, 26, 0.85) 100%)"
|
||||
tab:
|
||||
clipPath: "polygon(6px 0, 100% 0, calc(100% - 6px) 100%, 0 100%)"
|
||||
backdrop:
|
||||
backgroundSize: "cover"
|
||||
backgroundPosition: "center"
|
||||
fillerOpacity: "1"
|
||||
fillerBlendMode: "normal"
|
||||
|
||||
# ------- color overrides -------
|
||||
colorOverrides:
|
||||
primary: "#ffce3a"
|
||||
primaryForeground: "#05091a"
|
||||
accent: "#3fd3ff"
|
||||
accentForeground: "#05091a"
|
||||
ring: "#3fd3ff"
|
||||
success: "#4ade80"
|
||||
warning: "#ffce3a"
|
||||
destructive: "#ff3a5e"
|
||||
border: "rgba(64, 200, 255, 0.28)"
|
||||
|
||||
# ------- customCSS -------
|
||||
# Raw CSS injected as a scoped <style> tag on theme apply. Use this for
|
||||
# selector-level tweaks componentStyles can't express (pseudo-elements,
|
||||
# animations, media queries). Bounded to 32 KiB per theme.
|
||||
customCSS: |
|
||||
/* Scanline overlay — subtle, only when theme is active. */
|
||||
:root[data-layout-variant="cockpit"] body::before {
|
||||
content: "";
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
pointer-events: none;
|
||||
z-index: 100;
|
||||
background: repeating-linear-gradient(
|
||||
to bottom,
|
||||
transparent 0px,
|
||||
transparent 2px,
|
||||
rgba(64, 200, 255, 0.035) 3px,
|
||||
rgba(64, 200, 255, 0.035) 4px
|
||||
);
|
||||
mix-blend-mode: screen;
|
||||
}
|
||||
|
||||
/* Chevron pips on card corners. */
|
||||
[data-layout-variant="cockpit"] .border-border::before,
|
||||
[data-layout-variant="cockpit"] .border-border::after {
|
||||
content: "";
|
||||
position: absolute;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border: 1px solid rgba(64, 200, 255, 0.55);
|
||||
pointer-events: none;
|
||||
}
|
||||
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "hermes-agent"
|
||||
version = "0.10.0"
|
||||
version = "0.11.0"
|
||||
description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
|
@ -78,6 +78,7 @@ termux = [
|
|||
]
|
||||
dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
|
||||
feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
|
||||
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
|
||||
web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
|
||||
rl = [
|
||||
"atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
|
||||
|
|
|
|||
938
run_agent.py
938
run_agent.py
File diff suppressed because it is too large
Load diff
|
|
@ -44,6 +44,13 @@ AUTHOR_MAP = {
|
|||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
"maks.mir@yahoo.com": "say8hi",
|
||||
"web3blind@users.noreply.github.com": "web3blind",
|
||||
"julia@alexland.us": "alexg0bot",
|
||||
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
|
|
@ -55,13 +62,18 @@ AUTHOR_MAP = {
|
|||
"keifergu@tencent.com": "keifergu",
|
||||
"kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
|
||||
"abner.the.foreman@agentmail.to": "Abnertheforeman",
|
||||
"thomasgeorgevii09@gmail.com": "tochukwuada",
|
||||
"harryykyle1@gmail.com": "hharry11",
|
||||
"kshitijk4poor@gmail.com": "kshitijk4poor",
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
|
||||
"cyprian@ironin.pl": "iRonin",
|
||||
"valdi.jorge@gmail.com": "jvcl",
|
||||
"q19dcp@gmail.com": "aj-nt",
|
||||
"ebukau84@gmail.com": "UgwujaGeorge",
|
||||
"francip@gmail.com": "francip",
|
||||
"omni@comelse.com": "omnissiah-comelse",
|
||||
"oussama.redcode@gmail.com": "mavrickdeveloper",
|
||||
|
|
@ -74,6 +86,7 @@ AUTHOR_MAP = {
|
|||
"77628552+raulvidis@users.noreply.github.com": "raulvidis",
|
||||
"145567217+Aum08Desai@users.noreply.github.com": "Aum08Desai",
|
||||
"256820943+kshitij-eliza@users.noreply.github.com": "kshitij-eliza",
|
||||
"jiechengwu@pony.ai": "Jason2031",
|
||||
"44278268+shitcoinsherpa@users.noreply.github.com": "shitcoinsherpa",
|
||||
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
|
||||
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
|
||||
|
|
@ -164,7 +177,41 @@ AUTHOR_MAP = {
|
|||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
"satelerd@gmail.com": "satelerd",
|
||||
"dan@danlynn.com": "danklynn",
|
||||
"mattmaximo@hotmail.com": "MattMaximo",
|
||||
"149063006+j3ffffff@users.noreply.github.com": "j3ffffff",
|
||||
"A-FdL-Prog@users.noreply.github.com": "A-FdL-Prog",
|
||||
"l0hde@users.noreply.github.com": "l0hde",
|
||||
"difujia@users.noreply.github.com": "difujia",
|
||||
"vominh1919@gmail.com": "vominh1919",
|
||||
"yue.gu2023@gmail.com": "YueLich",
|
||||
"51783311+andyylin@users.noreply.github.com": "andyylin",
|
||||
"me@jakubkrcmar.cz": "jakubkrcmar",
|
||||
"prasadus92@gmail.com": "prasadus92",
|
||||
"michael@make.software": "mssteuer",
|
||||
"der@konsi.org": "konsisumer",
|
||||
"abogale2@gmail.com": "amanuel2",
|
||||
"alexazzjjtt@163.com": "alexzhu0",
|
||||
"pub_forgreatagent@antgroup.com": "AntAISecurityLab",
|
||||
"252620095+briandevans@users.noreply.github.com": "briandevans",
|
||||
"danielrpike9@gmail.com": "Bartok9",
|
||||
"skozyuk@cruxexperts.com": "CruxExperts",
|
||||
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
"mgparkprint@gmail.com": "vlwkaos",
|
||||
"tranquil_flow@protonmail.com": "Tranquil-Flow",
|
||||
"wangshengyang2004@163.com": "Wangshengyang2004",
|
||||
"hasan.ali13381@gmail.com": "H-Ali13381",
|
||||
"xienb@proton.me": "XieNBi",
|
||||
"139681654+maymuneth@users.noreply.github.com": "maymuneth",
|
||||
"zengwei@nightq.cn": "nightq",
|
||||
"1434494126@qq.com": "5park1e",
|
||||
"158153005+5park1e@users.noreply.github.com": "5park1e",
|
||||
"innocarpe@gmail.com": "innocarpe",
|
||||
"noreply@ked.com": "qike-ms",
|
||||
"andrekurait@gmail.com": "AndreKurait",
|
||||
"bsgdigital@users.noreply.github.com": "bsgdigital",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
"0xnykcd@googlemail.com": "0xNyk",
|
||||
"buraysandro9@gmail.com": "buray",
|
||||
|
|
@ -181,6 +228,11 @@ AUTHOR_MAP = {
|
|||
"bryan@intertwinesys.com": "bryanyoung",
|
||||
"christo.mitov@gmail.com": "christomitov",
|
||||
"hermes@nousresearch.com": "NousResearch",
|
||||
"reginaldasr@gmail.com": "ReginaldasR",
|
||||
"ntconguit@gmail.com": "0xharryriddle",
|
||||
"agent@wildcat.local": "ericnicolaides",
|
||||
"georgex8001@gmail.com": "georgex8001",
|
||||
"stefan@dimagents.ai": "dimitrovi",
|
||||
"hermes@noushq.ai": "benbarclay",
|
||||
"chinmingcock@gmail.com": "ChimingLiu",
|
||||
"openclaw@sparklab.ai": "openclaw",
|
||||
|
|
@ -329,6 +381,9 @@ AUTHOR_MAP = {
|
|||
"brian@bde.io": "briandevans",
|
||||
"hubin_ll@qq.com": "LLQWQ",
|
||||
"memosr_email@gmail.com": "memosr",
|
||||
"jperlow@gmail.com": "perlowja",
|
||||
"tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc",
|
||||
"harryplusplus@gmail.com": "harryplusplus",
|
||||
"anthhub@163.com": "anthhub",
|
||||
"shenuu@gmail.com": "shenuu",
|
||||
"xiayh17@gmail.com": "xiayh0107",
|
||||
|
|
@ -409,6 +464,42 @@ AUTHOR_MAP = {
|
|||
"caliberoviv@gmail.com": "vivganes",
|
||||
"michaelfackerell@gmail.com": "MikeFac",
|
||||
"18024642@qq.com": "GuyCui",
|
||||
"eumael.mkt@gmail.com": "maelrx",
|
||||
# v0.11.0 additions
|
||||
"benbarclay@gmail.com": "benbarclay",
|
||||
"lijiawen@umich.edu": "Jiawen-lee",
|
||||
"oleksiy@kovyrin.net": "kovyrin",
|
||||
"kovyrin.claw@gmail.com": "kovyrin",
|
||||
"kaiobarb@gmail.com": "liftaris",
|
||||
"me@arihantsethia.com": "arihantsethia",
|
||||
"zhuofengwang2003@gmail.com": "coekfung",
|
||||
"teknium@noreply.github.com": "teknium1",
|
||||
"2114364329@qq.com": "cuyua9",
|
||||
"2557058999@qq.com": "Disaster-Terminator",
|
||||
"cine.dreamer.one@gmail.com": "LeonSGP43",
|
||||
"leozeli@qq.com": "leozeli",
|
||||
"linlehao@cuhk.edu.cn": "LehaoLin",
|
||||
"liutong@isacas.ac.cn": "I3eg1nner",
|
||||
"peterberthelsen@Peters-MacBook-Air.local": "PeterBerthelsen",
|
||||
"root@debian.debian": "lengxii",
|
||||
"roque@priveperfumeshn.com": "priveperfumes",
|
||||
"shijianzhi@shijianzhideMacBook-Pro.local": "sjz-ks",
|
||||
"topcheer@me.com": "topcheer",
|
||||
"walli@tencent.com": "walli",
|
||||
"zhuofengwang@tencent.com": "Zhuofeng-Wang",
|
||||
# April 2026 salvage-PR batch (#14920, #14986, #14966)
|
||||
"mrunmayeerane17@gmail.com": "mrunmayee17",
|
||||
"69489633+camaragon@users.noreply.github.com": "camaragon",
|
||||
"shamork@outlook.com": "shamork",
|
||||
# April 2026 Discord Copilot /model salvage (#15030)
|
||||
"cshong2017@outlook.com": "Nicecsh",
|
||||
# no-github-match — keep as display names
|
||||
"clio-agent@sisyphuslabs.ai": "Sisyphus",
|
||||
"marco@rutimka.de": "Marco Rutsch",
|
||||
"paul@gamma.app": "Paul Bergeron",
|
||||
"zhangxicen@example.com": "zhangxicen",
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -248,7 +248,6 @@ Type these during an interactive chat session.
|
|||
```
|
||||
/config Show config (CLI)
|
||||
/model [name] Show or change model
|
||||
/provider Show provider info
|
||||
/personality [name] Set personality
|
||||
/reasoning [level] Set reasoning (none|minimal|low|medium|high|xhigh|show|hide)
|
||||
/verbose Cycle: off → new → all → verbose
|
||||
|
|
|
|||
196
skills/creative/design-md/SKILL.md
Normal file
196
skills/creative/design-md/SKILL.md
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
---
|
||||
name: design-md
|
||||
description: Author, validate, diff, and export DESIGN.md files — Google's open-source format spec that gives coding agents a persistent, structured understanding of a design system (tokens + rationale in one file). Use when building a design system, porting style rules between projects, generating UI with consistent brand, or auditing accessibility/contrast.
|
||||
version: 1.0.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google]
|
||||
related_skills: [popular-web-designs, excalidraw, architecture-diagram]
|
||||
---
|
||||
|
||||
# DESIGN.md Skill
|
||||
|
||||
DESIGN.md is Google's open spec (Apache-2.0, `google-labs-code/design.md`) for
|
||||
describing a visual identity to coding agents. One file combines:
|
||||
|
||||
- **YAML front matter** — machine-readable design tokens (normative values)
|
||||
- **Markdown body** — human-readable rationale, organized into canonical sections
|
||||
|
||||
Tokens give exact values. Prose tells agents *why* those values exist and how to
|
||||
apply them. The CLI (`npx @google/design.md`) lints structure + WCAG contrast,
|
||||
diffs versions for regressions, and exports to Tailwind or W3C DTCG JSON.
|
||||
|
||||
## When to use this skill
|
||||
|
||||
- User asks for a DESIGN.md file, design tokens, or a design system spec
|
||||
- User wants consistent UI/brand across multiple projects or tools
|
||||
- User pastes an existing DESIGN.md and asks to lint, diff, export, or extend it
|
||||
- User asks to port a style guide into a format agents can consume
|
||||
- User wants contrast / WCAG accessibility validation on their color palette
|
||||
|
||||
For purely visual inspiration or layout examples, use `popular-web-designs`
|
||||
instead. This skill is for the *formal spec file* itself.
|
||||
|
||||
## File anatomy
|
||||
|
||||
```md
|
||||
---
|
||||
version: alpha
|
||||
name: Heritage
|
||||
description: Architectural minimalism meets journalistic gravitas.
|
||||
colors:
|
||||
primary: "#1A1C1E"
|
||||
secondary: "#6C7278"
|
||||
tertiary: "#B8422E"
|
||||
neutral: "#F7F5F2"
|
||||
typography:
|
||||
h1:
|
||||
fontFamily: Public Sans
|
||||
fontSize: 3rem
|
||||
fontWeight: 700
|
||||
lineHeight: 1.1
|
||||
letterSpacing: "-0.02em"
|
||||
body-md:
|
||||
fontFamily: Public Sans
|
||||
fontSize: 1rem
|
||||
rounded:
|
||||
sm: 4px
|
||||
md: 8px
|
||||
lg: 16px
|
||||
spacing:
|
||||
sm: 8px
|
||||
md: 16px
|
||||
lg: 24px
|
||||
components:
|
||||
button-primary:
|
||||
backgroundColor: "{colors.tertiary}"
|
||||
textColor: "#FFFFFF"
|
||||
rounded: "{rounded.sm}"
|
||||
padding: 12px
|
||||
button-primary-hover:
|
||||
backgroundColor: "{colors.primary}"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Architectural Minimalism meets Journalistic Gravitas...
|
||||
|
||||
## Colors
|
||||
|
||||
- **Primary (#1A1C1E):** Deep ink for headlines and core text.
|
||||
- **Tertiary (#B8422E):** "Boston Clay" — the sole driver for interaction.
|
||||
|
||||
## Typography
|
||||
|
||||
Public Sans for everything except small all-caps labels...
|
||||
|
||||
## Components
|
||||
|
||||
`button-primary` is the only high-emphasis action on a page...
|
||||
```
|
||||
|
||||
## Token types
|
||||
|
||||
| Type | Format | Example |
|
||||
|------|--------|---------|
|
||||
| Color | `#` + hex (sRGB) | `"#1A1C1E"` |
|
||||
| Dimension | number + unit (`px`, `em`, `rem`) | `48px`, `-0.02em` |
|
||||
| Token reference | `{path.to.token}` | `{colors.primary}` |
|
||||
| Typography | object with `fontFamily`, `fontSize`, `fontWeight`, `lineHeight`, `letterSpacing`, `fontFeature`, `fontVariation` | see above |
|
||||
|
||||
Component property whitelist: `backgroundColor`, `textColor`, `typography`,
|
||||
`rounded`, `padding`, `size`, `height`, `width`. Variants (hover, active,
|
||||
pressed) are **separate component entries** with related key names
|
||||
(`button-primary-hover`), not nested.
|
||||
|
||||
## Canonical section order
|
||||
|
||||
Sections are optional, but present ones MUST appear in this order. Duplicate
|
||||
headings reject the file.
|
||||
|
||||
1. Overview (alias: Brand & Style)
|
||||
2. Colors
|
||||
3. Typography
|
||||
4. Layout (alias: Layout & Spacing)
|
||||
5. Elevation & Depth (alias: Elevation)
|
||||
6. Shapes
|
||||
7. Components
|
||||
8. Do's and Don'ts
|
||||
|
||||
Unknown sections are preserved, not errored. Unknown token names are accepted
|
||||
if the value type is valid. Unknown component properties produce a warning.
|
||||
|
||||
## Workflow: authoring a new DESIGN.md
|
||||
|
||||
1. **Ask the user** (or infer) the brand tone, accent color, and typography
|
||||
direction. If they provided a site, image, or vibe, translate it to the
|
||||
token shape above.
|
||||
2. **Write `DESIGN.md`** in their project root using `write_file`. Always
|
||||
include `name:` and `colors:`; other sections optional but encouraged.
|
||||
3. **Use token references** (`{colors.primary}`) in the `components:` section
|
||||
instead of re-typing hex values. Keeps the palette single-source.
|
||||
4. **Lint it** (see below). Fix any broken references or WCAG failures
|
||||
before returning.
|
||||
5. **If the user has an existing project**, also write Tailwind or DTCG
|
||||
exports next to the file (`tailwind.theme.json`, `tokens.json`).
|
||||
|
||||
## Workflow: lint / diff / export
|
||||
|
||||
The CLI is `@google/design.md` (Node). Use `npx` — no global install needed.
|
||||
|
||||
```bash
|
||||
# Validate structure + token references + WCAG contrast
|
||||
npx -y @google/design.md lint DESIGN.md
|
||||
|
||||
# Compare two versions, fail on regression (exit 1 = regression)
|
||||
npx -y @google/design.md diff DESIGN.md DESIGN-v2.md
|
||||
|
||||
# Export to Tailwind theme JSON
|
||||
npx -y @google/design.md export --format tailwind DESIGN.md > tailwind.theme.json
|
||||
|
||||
# Export to W3C DTCG (Design Tokens Format Module) JSON
|
||||
npx -y @google/design.md export --format dtcg DESIGN.md > tokens.json
|
||||
|
||||
# Print the spec itself — useful when injecting into an agent prompt
|
||||
npx -y @google/design.md spec --rules-only --format json
|
||||
```
|
||||
|
||||
All commands accept `-` for stdin. `lint` returns exit 1 on errors. Use the
|
||||
`--format json` flag and parse the output if you need to report findings
|
||||
structurally.
|
||||
|
||||
### Lint rule reference (what the 7 rules catch)
|
||||
|
||||
- `broken-ref` (error) — `{colors.missing}` points at a non-existent token
|
||||
- `duplicate-section` (error) — same `## Heading` appears twice
|
||||
- `invalid-color`, `invalid-dimension`, `invalid-typography` (error)
|
||||
- `wcag-contrast` (warning/info) — component `textColor` vs `backgroundColor`
|
||||
ratio against WCAG AA (4.5:1) and AAA (7:1)
|
||||
- `unknown-component-property` (warning) — outside the whitelist above
|
||||
|
||||
When the user cares about accessibility, call this out explicitly in your
|
||||
summary — WCAG findings are the most load-bearing reason to use the CLI.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
- **Don't nest component variants.** `button-primary.hover` is wrong;
|
||||
`button-primary-hover` as a sibling key is right.
|
||||
- **Hex colors must be quoted strings.** YAML will otherwise choke on `#` or
|
||||
truncate values like `#1A1C1E` oddly.
|
||||
- **Negative dimensions need quotes too.** `letterSpacing: -0.02em` parses as
|
||||
a YAML flow — write `letterSpacing: "-0.02em"`.
|
||||
- **Section order is enforced.** If the user gives you prose in a random order,
|
||||
reorder it to match the canonical list before saving.
|
||||
- **`version: alpha` is the current spec version** (as of Apr 2026). The spec
|
||||
is marked alpha — watch for breaking changes.
|
||||
- **Token references resolve by dotted path.** `{colors.primary}` works;
|
||||
`{primary}` does not.
|
||||
|
||||
## Spec source of truth
|
||||
|
||||
- Repo: https://github.com/google-labs-code/design.md (Apache-2.0)
|
||||
- CLI: `@google/design.md` on npm
|
||||
- License of generated DESIGN.md files: whatever the user's project uses;
|
||||
the spec itself is Apache-2.0.
|
||||
99
skills/creative/design-md/templates/starter.md
Normal file
99
skills/creative/design-md/templates/starter.md
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
---
|
||||
version: alpha
|
||||
name: MyBrand
|
||||
description: One-sentence description of the visual identity.
|
||||
colors:
|
||||
primary: "#0F172A"
|
||||
secondary: "#64748B"
|
||||
tertiary: "#2563EB"
|
||||
neutral: "#F8FAFC"
|
||||
on-primary: "#FFFFFF"
|
||||
on-tertiary: "#FFFFFF"
|
||||
typography:
|
||||
h1:
|
||||
fontFamily: Inter
|
||||
fontSize: 3rem
|
||||
fontWeight: 700
|
||||
lineHeight: 1.1
|
||||
letterSpacing: "-0.02em"
|
||||
h2:
|
||||
fontFamily: Inter
|
||||
fontSize: 2rem
|
||||
fontWeight: 600
|
||||
lineHeight: 1.2
|
||||
body-md:
|
||||
fontFamily: Inter
|
||||
fontSize: 1rem
|
||||
lineHeight: 1.5
|
||||
label-caps:
|
||||
fontFamily: Inter
|
||||
fontSize: 0.75rem
|
||||
fontWeight: 600
|
||||
letterSpacing: "0.08em"
|
||||
rounded:
|
||||
sm: 4px
|
||||
md: 8px
|
||||
lg: 16px
|
||||
full: 9999px
|
||||
spacing:
|
||||
xs: 4px
|
||||
sm: 8px
|
||||
md: 16px
|
||||
lg: 24px
|
||||
xl: 48px
|
||||
components:
|
||||
button-primary:
|
||||
backgroundColor: "{colors.tertiary}"
|
||||
textColor: "{colors.on-tertiary}"
|
||||
rounded: "{rounded.sm}"
|
||||
padding: 12px
|
||||
button-primary-hover:
|
||||
backgroundColor: "{colors.primary}"
|
||||
textColor: "{colors.on-primary}"
|
||||
card:
|
||||
backgroundColor: "{colors.neutral}"
|
||||
textColor: "{colors.primary}"
|
||||
rounded: "{rounded.md}"
|
||||
padding: 24px
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Describe the voice and feel of the brand in one or two paragraphs. What mood
|
||||
does it evoke? What emotional response should a user have on first impression?
|
||||
|
||||
## Colors
|
||||
|
||||
- **Primary ({colors.primary}):** Core text, headlines, high-emphasis surfaces.
|
||||
- **Secondary ({colors.secondary}):** Supporting text, borders, metadata.
|
||||
- **Tertiary ({colors.tertiary}):** Interaction driver — buttons, links,
|
||||
selected states. Use sparingly to preserve its signal.
|
||||
- **Neutral ({colors.neutral}):** Page background and surface fills.
|
||||
|
||||
## Typography
|
||||
|
||||
Inter for everything. Weight and size carry hierarchy, not font family. Tight
|
||||
letter-spacing on display sizes; default tracking on body.
|
||||
|
||||
## Layout
|
||||
|
||||
Spacing scale is a 4px baseline. Use `md` (16px) for intra-component gaps,
|
||||
`lg` (24px) for inter-component gaps, `xl` (48px) for section breaks.
|
||||
|
||||
## Shapes
|
||||
|
||||
Rounded corners are modest — `sm` on interactive elements, `md` on cards.
|
||||
`full` is reserved for avatars and pill badges.
|
||||
|
||||
## Components
|
||||
|
||||
- `button-primary` is the only high-emphasis action per screen.
|
||||
- `card` is the default surface for grouped content. No shadow by default.
|
||||
|
||||
## Do's and Don'ts
|
||||
|
||||
- **Do** use token references (`{colors.primary}`) instead of literal hex in
|
||||
component definitions.
|
||||
- **Don't** introduce colors outside the palette — extend the palette first.
|
||||
- **Don't** nest component variants. `button-primary-hover` is a sibling,
|
||||
not a child.
|
||||
134
skills/media/spotify/SKILL.md
Normal file
134
skills/media/spotify/SKILL.md
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
---
|
||||
name: spotify
|
||||
description: Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state. Loads when the user asks to play/pause/queue music, search tracks/albums/artists, manage playlists, or check what's playing. Assumes the Hermes Spotify toolset is enabled and `hermes auth spotify` has been run.
|
||||
version: 1.0.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
prerequisites:
|
||||
tools: [spotify_playback, spotify_devices, spotify_queue, spotify_search, spotify_playlists, spotify_albums, spotify_library]
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [spotify, music, playback, playlists, media]
|
||||
related_skills: [gif-search]
|
||||
---
|
||||
|
||||
# Spotify
|
||||
|
||||
Control the user's Spotify account via the Hermes Spotify toolset (7 tools). Setup guide: https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify
|
||||
|
||||
## When to use this skill
|
||||
|
||||
The user says something like "play X", "pause", "skip", "queue up X", "what's playing", "search for X", "add to my X playlist", "make a playlist", "save this to my library", etc.
|
||||
|
||||
## The 7 tools
|
||||
|
||||
- `spotify_playback` — play, pause, next, previous, seek, set_repeat, set_shuffle, set_volume, get_state, get_currently_playing, recently_played
|
||||
- `spotify_devices` — list, transfer
|
||||
- `spotify_queue` — get, add
|
||||
- `spotify_search` — search the catalog
|
||||
- `spotify_playlists` — list, get, create, add_items, remove_items, update_details
|
||||
- `spotify_albums` — get, tracks
|
||||
- `spotify_library` — list/save/remove with `kind: "tracks"|"albums"`
|
||||
|
||||
Playback-mutating actions require Spotify Premium; search/library/playlist ops work on Free.
|
||||
|
||||
## Canonical patterns (minimize tool calls)
|
||||
|
||||
### "Play <artist/track/album>"
|
||||
One search, then play by URI. Do NOT loop through search results describing them unless the user asked for options.
|
||||
|
||||
```
|
||||
spotify_search({"query": "miles davis kind of blue", "types": ["album"], "limit": 1})
|
||||
→ got album URI spotify:album:1weenld61qoidwYuZ1GESA
|
||||
spotify_playback({"action": "play", "context_uri": "spotify:album:1weenld61qoidwYuZ1GESA"})
|
||||
```
|
||||
|
||||
For "play some <artist>" (no specific song), prefer `types: ["artist"]` and play the artist context URI — Spotify handles smart shuffle. If the user says "the song" or "that track", search `types: ["track"]` and pass `uris: [track_uri]` to play.
|
||||
|
||||
### "What's playing?" / "What am I listening to?"
|
||||
Single call — don't chain get_state after get_currently_playing.
|
||||
|
||||
```
|
||||
spotify_playback({"action": "get_currently_playing"})
|
||||
```
|
||||
|
||||
If it returns 204/empty (`is_playing: false`), tell the user nothing is playing. Don't retry.
|
||||
|
||||
### "Pause" / "Skip" / "Volume 50"
|
||||
Direct action, no preflight inspection needed.
|
||||
|
||||
```
|
||||
spotify_playback({"action": "pause"})
|
||||
spotify_playback({"action": "next"})
|
||||
spotify_playback({"action": "set_volume", "volume_percent": 50})
|
||||
```
|
||||
|
||||
### "Add to my <playlist name> playlist"
|
||||
1. `spotify_playlists list` to find the playlist ID by name
|
||||
2. Get the track URI (from currently playing, or search)
|
||||
3. `spotify_playlists add_items` with the playlist_id and URIs
|
||||
|
||||
```
|
||||
spotify_playlists({"action": "list"})
|
||||
→ found "Late Night Jazz" = 37i9dQZF1DX4wta20PHgwo
|
||||
spotify_playback({"action": "get_currently_playing"})
|
||||
→ current track uri = spotify:track:0DiWol3AO6WpXZgp0goxAV
|
||||
spotify_playlists({"action": "add_items",
|
||||
"playlist_id": "37i9dQZF1DX4wta20PHgwo",
|
||||
"uris": ["spotify:track:0DiWol3AO6WpXZgp0goxAV"]})
|
||||
```
|
||||
|
||||
### "Create a playlist called X and add the last 3 songs I played"
|
||||
```
|
||||
spotify_playback({"action": "recently_played", "limit": 3})
|
||||
spotify_playlists({"action": "create", "name": "Focus 2026"})
|
||||
→ got playlist_id back in response
|
||||
spotify_playlists({"action": "add_items", "playlist_id": <id>, "uris": [<3 uris>]})
|
||||
```
|
||||
|
||||
### "Save / unsave / is this saved?"
|
||||
Use `spotify_library` with the right `kind`.
|
||||
|
||||
```
|
||||
spotify_library({"kind": "tracks", "action": "save", "uris": ["spotify:track:..."]})
|
||||
spotify_library({"kind": "albums", "action": "list", "limit": 50})
|
||||
```
|
||||
|
||||
### "Transfer playback to my <device>"
|
||||
```
|
||||
spotify_devices({"action": "list"})
|
||||
→ pick the device_id by matching name/type
|
||||
spotify_devices({"action": "transfer", "device_id": "<id>", "play": true})
|
||||
```
|
||||
|
||||
## Critical failure modes
|
||||
|
||||
**`403 Forbidden — No active device found`** on any playback action means Spotify isn't running anywhere. Tell the user: "Open Spotify on your phone/desktop/web player first, start any track for a second, then retry." Don't retry the tool call blindly — it will fail the same way. You can call `spotify_devices list` to confirm; an empty list means no active device.
|
||||
|
||||
**`403 Forbidden — Premium required`** means the user is on Free and tried to mutate playback. Don't retry; tell them this action needs Premium. Reads still work (search, playlists, library, get_state).
|
||||
|
||||
**`204 No Content` on `get_currently_playing`** is NOT an error — it means nothing is playing. The tool returns `is_playing: false`. Just report that to the user.
|
||||
|
||||
**`429 Too Many Requests`** = rate limit. Wait and retry once. If it keeps happening, you're looping — stop.
|
||||
|
||||
**`401 Unauthorized` after a retry** — refresh token revoked. Tell the user to run `hermes auth spotify` again.
|
||||
|
||||
## URI and ID formats
|
||||
|
||||
Spotify uses three interchangeable ID formats. The tools accept all three and normalize:
|
||||
|
||||
- URI: `spotify:track:0DiWol3AO6WpXZgp0goxAV` (preferred)
|
||||
- URL: `https://open.spotify.com/track/0DiWol3AO6WpXZgp0goxAV`
|
||||
- Bare ID: `0DiWol3AO6WpXZgp0goxAV`
|
||||
|
||||
When in doubt, use full URIs. Search results return URIs in the `uri` field — pass those directly.
|
||||
|
||||
Entity types: `track`, `album`, `artist`, `playlist`, `show`, `episode`. Use the right type for the action — `spotify_playback.play` with a `context_uri` expects album/playlist/artist; `uris` expects an array of track URIs.
|
||||
|
||||
## What NOT to do
|
||||
|
||||
- **Don't call `get_state` before every action.** Spotify accepts play/pause/skip without preflight. Only inspect state when the user asked "what's playing" or you need to reason about device/track.
|
||||
- **Don't describe search results unless asked.** If the user said "play X", search, grab the top URI, play it. They'll hear it's wrong if it's wrong.
|
||||
- **Don't retry on `403 Premium required` or `403 No active device`.** Those are permanent until user action.
|
||||
- **Don't use `spotify_search` to find a playlist by name** — that searches the public Spotify catalog. User playlists come from `spotify_playlists list`.
|
||||
- **Don't mix `kind: "tracks"` with album URIs** in `spotify_library` (or vice versa). The tool normalizes IDs but the API endpoint differs.
|
||||
|
|
@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks(
|
|||
|
||||
### Model architecture
|
||||
|
||||
<!-- ascii-guard-ignore -->
|
||||
```
|
||||
SAM Architecture:
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
|
|
@ -144,6 +145,7 @@ SAM Architecture:
|
|||
Image Embeddings Prompt Embeddings Masks + IoU
|
||||
(computed once) (per prompt) predictions
|
||||
```
|
||||
<!-- ascii-guard-ignore-end -->
|
||||
|
||||
### Model variants
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target
|
|||
|
||||
This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.
|
||||
|
||||
<!-- ascii-guard-ignore -->
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ RESEARCH PAPER PIPELINE │
|
||||
|
|
@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
|
|||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
<!-- ascii-guard-ignore-end -->
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -904,9 +904,15 @@ class TestRegisterSessionMcpServers:
|
|||
]
|
||||
|
||||
with patch("tools.mcp_tool.register_mcp_servers", return_value=["mcp_srv_search"]), \
|
||||
patch("model_tools.get_tool_definitions", return_value=fake_tools):
|
||||
patch("model_tools.get_tool_definitions", return_value=fake_tools) as mock_defs:
|
||||
await agent._register_session_mcp_servers(state, [server])
|
||||
|
||||
mock_defs.assert_called_once_with(
|
||||
enabled_toolsets=["hermes-acp", "mcp-srv"],
|
||||
disabled_toolsets=None,
|
||||
quiet_mode=True,
|
||||
)
|
||||
assert state.agent.enabled_toolsets == ["hermes-acp", "mcp-srv"]
|
||||
assert state.agent.tools == fake_tools
|
||||
assert state.agent.valid_tool_names == {"mcp_srv_search", "terminal"}
|
||||
# _invalidate_system_prompt should have been called
|
||||
|
|
|
|||
|
|
@ -138,6 +138,43 @@ class TestListAndCleanup:
|
|||
class TestPersistence:
|
||||
"""Verify that sessions are persisted to SessionDB and can be restored."""
|
||||
|
||||
def test_create_session_includes_registered_mcp_toolsets(self, tmp_path, monkeypatch):
|
||||
captured = {}
|
||||
|
||||
def fake_resolve_runtime_provider(requested=None, **kwargs):
|
||||
return {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://openrouter.example/v1",
|
||||
"api_key": "***",
|
||||
"command": None,
|
||||
"args": [],
|
||||
}
|
||||
|
||||
def fake_agent(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return SimpleNamespace(model=kwargs.get("model"), enabled_toolsets=kwargs.get("enabled_toolsets"))
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.load_config", lambda: {
|
||||
"model": {"provider": "openrouter", "default": "test-model"},
|
||||
"mcp_servers": {
|
||||
"olympus": {"command": "python", "enabled": True},
|
||||
"exa": {"url": "https://exa.ai/mcp"},
|
||||
"disabled": {"command": "python", "enabled": False},
|
||||
},
|
||||
})
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
fake_resolve_runtime_provider,
|
||||
)
|
||||
db = SessionDB(tmp_path / "state.db")
|
||||
|
||||
with patch("run_agent.AIAgent", side_effect=fake_agent):
|
||||
manager = SessionManager(db=db)
|
||||
manager.create_session(cwd="/work")
|
||||
|
||||
assert captured["enabled_toolsets"] == ["hermes-acp", "mcp-olympus", "mcp-exa"]
|
||||
|
||||
def test_create_session_writes_to_db(self, manager):
|
||||
state = manager.create_session(cwd="/project")
|
||||
db = manager._get_db()
|
||||
|
|
|
|||
165
tests/agent/test_anthropic_keychain.py
Normal file
165
tests/agent/test_anthropic_keychain.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""Tests for Bug #12905 fixes in agent/anthropic_adapter.py — macOS Keychain support."""
|
||||
|
||||
import json
|
||||
import platform
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.anthropic_adapter import (
|
||||
_read_claude_code_credentials_from_keychain,
|
||||
read_claude_code_credentials,
|
||||
)
|
||||
|
||||
|
||||
class TestReadClaudeCodeCredentialsFromKeychain:
|
||||
"""Bug 4: macOS Keychain support for Claude Code >=2.1.114."""
|
||||
|
||||
def test_returns_none_on_linux(self):
|
||||
"""Keychain reading is Darwin-only; must return None on other platforms."""
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Linux"):
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_on_windows(self):
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Windows"):
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_when_security_command_not_found(self):
|
||||
"""OSError from missing security binary must be handled gracefully."""
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run",
|
||||
side_effect=OSError("security not found")):
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_on_nonzero_exit_code(self):
|
||||
"""security returns non-zero when the Keychain entry doesn't exist."""
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_for_empty_stdout(self):
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_for_non_json_payload(self):
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=0, stdout="not valid json", stderr="")
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_when_password_field_is_missing_claude_ai_oauth(self):
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout=json.dumps({"someOtherService": {"accessToken": "tok"}}),
|
||||
stderr="",
|
||||
)
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_returns_none_when_access_token_is_empty(self):
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout=json.dumps({"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}}),
|
||||
stderr="",
|
||||
)
|
||||
assert _read_claude_code_credentials_from_keychain() is None
|
||||
|
||||
def test_parses_valid_keychain_entry(self):
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout=json.dumps({
|
||||
"claudeAiOauth": {
|
||||
"accessToken": "kc-access-token-abc",
|
||||
"refreshToken": "kc-refresh-token-xyz",
|
||||
"expiresAt": 9999999999999,
|
||||
}
|
||||
}),
|
||||
stderr="",
|
||||
)
|
||||
creds = _read_claude_code_credentials_from_keychain()
|
||||
assert creds is not None
|
||||
assert creds["accessToken"] == "kc-access-token-abc"
|
||||
assert creds["refreshToken"] == "kc-refresh-token-xyz"
|
||||
assert creds["expiresAt"] == 9999999999999
|
||||
assert creds["source"] == "macos_keychain"
|
||||
|
||||
|
||||
class TestReadClaudeCodeCredentialsPriority:
|
||||
"""Bug 4: Keychain must be checked before the JSON file."""
|
||||
|
||||
def test_keychain_takes_priority_over_json_file(self, tmp_path, monkeypatch):
|
||||
"""When both Keychain and JSON file have credentials, Keychain wins."""
|
||||
# Set up JSON file with "older" token
|
||||
json_cred_file = tmp_path / ".claude" / ".credentials.json"
|
||||
json_cred_file.parent.mkdir(parents=True)
|
||||
json_cred_file.write_text(json.dumps({
|
||||
"claudeAiOauth": {
|
||||
"accessToken": "json-token",
|
||||
"refreshToken": "json-refresh",
|
||||
"expiresAt": 9999999999999,
|
||||
}
|
||||
}))
|
||||
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
|
||||
|
||||
# Mock Keychain to return a "newer" token
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(
|
||||
returncode=0,
|
||||
stdout=json.dumps({
|
||||
"claudeAiOauth": {
|
||||
"accessToken": "keychain-token",
|
||||
"refreshToken": "keychain-refresh",
|
||||
"expiresAt": 9999999999999,
|
||||
}
|
||||
}),
|
||||
stderr="",
|
||||
)
|
||||
creds = read_claude_code_credentials()
|
||||
|
||||
# Keychain token should be returned, not JSON file token
|
||||
assert creds is not None
|
||||
assert creds["accessToken"] == "keychain-token"
|
||||
assert creds["source"] == "macos_keychain"
|
||||
|
||||
def test_falls_back_to_json_when_keychain_returns_none(self, tmp_path, monkeypatch):
|
||||
"""When Keychain has no entry, JSON file is used as fallback."""
|
||||
json_cred_file = tmp_path / ".claude" / ".credentials.json"
|
||||
json_cred_file.parent.mkdir(parents=True)
|
||||
json_cred_file.write_text(json.dumps({
|
||||
"claudeAiOauth": {
|
||||
"accessToken": "json-fallback-token",
|
||||
"refreshToken": "json-refresh",
|
||||
"expiresAt": 9999999999999,
|
||||
}
|
||||
}))
|
||||
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
|
||||
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
# Simulate Keychain entry not found
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
|
||||
creds = read_claude_code_credentials()
|
||||
|
||||
assert creds is not None
|
||||
assert creds["accessToken"] == "json-fallback-token"
|
||||
assert creds["source"] == "claude_code_credentials_file"
|
||||
|
||||
def test_returns_none_when_neither_keychain_nor_json_has_creds(self, tmp_path, monkeypatch):
|
||||
"""No credentials anywhere — must return None cleanly."""
|
||||
monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
|
||||
|
||||
with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \
|
||||
patch("agent.anthropic_adapter.subprocess.run") as mock_run:
|
||||
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="")
|
||||
creds = read_claude_code_credentials()
|
||||
|
||||
assert creds is None
|
||||
|
|
@ -19,6 +19,7 @@ from agent.auxiliary_client import (
|
|||
_read_codex_access_token,
|
||||
_get_provider_chain,
|
||||
_is_payment_error,
|
||||
_normalize_aux_provider,
|
||||
_try_payment_fallback,
|
||||
_resolve_auto,
|
||||
)
|
||||
|
|
@ -54,6 +55,17 @@ def codex_auth_dir(tmp_path, monkeypatch):
|
|||
return codex_dir
|
||||
|
||||
|
||||
class TestNormalizeAuxProvider:
|
||||
def test_maps_github_copilot_aliases(self):
|
||||
assert _normalize_aux_provider("github") == "copilot"
|
||||
assert _normalize_aux_provider("github-copilot") == "copilot"
|
||||
assert _normalize_aux_provider("github-models") == "copilot"
|
||||
|
||||
def test_maps_github_copilot_acp_aliases(self):
|
||||
assert _normalize_aux_provider("github-copilot-acp") == "copilot-acp"
|
||||
assert _normalize_aux_provider("copilot-acp-agent") == "copilot-acp"
|
||||
|
||||
|
||||
class TestReadCodexAccessToken:
|
||||
def test_valid_auth_store(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
|
|
@ -447,6 +459,34 @@ class TestExplicitProviderRouting:
|
|||
adapter = client.chat.completions
|
||||
assert adapter._is_oauth is False
|
||||
|
||||
def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)):
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, model = resolve_provider_client("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert any(
|
||||
"credential pool has no usable entries" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
assert not any(
|
||||
"OPENROUTER_API_KEY not set" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, model = resolve_provider_client("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert any(
|
||||
"OPENROUTER_API_KEY not set" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
class TestGetTextAuxiliaryClient:
|
||||
"""Test the full resolution chain for get_text_auxiliary_client."""
|
||||
|
||||
|
|
@ -1175,3 +1215,201 @@ class TestAnthropicCompatImageConversion:
|
|||
}]
|
||||
result = _convert_openai_images_to_anthropic(messages)
|
||||
assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
|
||||
|
||||
|
||||
class _AuxAuth401(Exception):
|
||||
status_code = 401
|
||||
|
||||
def __init__(self, message="Provided authentication token is expired"):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class _DummyResponse:
|
||||
def __init__(self, text="ok"):
|
||||
self.choices = [MagicMock(message=MagicMock(content=text))]
|
||||
|
||||
|
||||
class _FailingThenSuccessCompletions:
|
||||
def __init__(self):
|
||||
self.calls = 0
|
||||
|
||||
def create(self, **kwargs):
|
||||
self.calls += 1
|
||||
if self.calls == 1:
|
||||
raise _AuxAuth401()
|
||||
return _DummyResponse("sync-ok")
|
||||
|
||||
|
||||
class _AsyncFailingThenSuccessCompletions:
|
||||
def __init__(self):
|
||||
self.calls = 0
|
||||
|
||||
async def create(self, **kwargs):
|
||||
self.calls += 1
|
||||
if self.calls == 1:
|
||||
raise _AuxAuth401()
|
||||
return _DummyResponse("async-ok")
|
||||
|
||||
|
||||
class TestAuxiliaryAuthRefreshRetry:
|
||||
def test_call_llm_refreshes_codex_on_401_for_vision(self):
|
||||
failing_client = MagicMock()
|
||||
failing_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
failing_client.chat.completions = _FailingThenSuccessCompletions()
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-sync")
|
||||
|
||||
with (
|
||||
patch(
|
||||
"agent.auxiliary_client.resolve_vision_provider_client",
|
||||
side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")],
|
||||
),
|
||||
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
|
||||
):
|
||||
resp = call_llm(
|
||||
task="vision",
|
||||
provider="openai-codex",
|
||||
model="gpt-5.2-codex",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert resp.choices[0].message.content == "fresh-sync"
|
||||
mock_refresh.assert_called_once_with("openai-codex")
|
||||
|
||||
def test_call_llm_refreshes_codex_on_401_for_non_vision(self):
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
stale_client.chat.completions.create.side_effect = _AuxAuth401("stale codex token")
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-non-vision")
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.2-codex", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.2-codex"), (fresh_client, "gpt-5.2-codex")]),
|
||||
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
|
||||
):
|
||||
resp = call_llm(
|
||||
task="compression",
|
||||
provider="openai-codex",
|
||||
model="gpt-5.2-codex",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert resp.choices[0].message.content == "fresh-non-vision"
|
||||
mock_refresh.assert_called_once_with("openai-codex")
|
||||
assert stale_client.chat.completions.create.call_count == 1
|
||||
assert fresh_client.chat.completions.create.call_count == 1
|
||||
|
||||
def test_call_llm_refreshes_anthropic_on_401_for_non_vision(self):
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://api.anthropic.com"
|
||||
stale_client.chat.completions.create.side_effect = _AuxAuth401("anthropic token expired")
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://api.anthropic.com"
|
||||
fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-anthropic")
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]),
|
||||
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
|
||||
):
|
||||
resp = call_llm(
|
||||
task="compression",
|
||||
provider="anthropic",
|
||||
model="claude-haiku-4-5-20251001",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert resp.choices[0].message.content == "fresh-anthropic"
|
||||
mock_refresh.assert_called_once_with("anthropic")
|
||||
assert stale_client.chat.completions.create.call_count == 1
|
||||
assert fresh_client.chat.completions.create.call_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_call_llm_refreshes_codex_on_401_for_vision(self):
|
||||
failing_client = MagicMock()
|
||||
failing_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
failing_client.chat.completions = _AsyncFailingThenSuccessCompletions()
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async"))
|
||||
|
||||
with (
|
||||
patch(
|
||||
"agent.auxiliary_client.resolve_vision_provider_client",
|
||||
side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")],
|
||||
),
|
||||
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
|
||||
):
|
||||
resp = await async_call_llm(
|
||||
task="vision",
|
||||
provider="openai-codex",
|
||||
model="gpt-5.2-codex",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert resp.choices[0].message.content == "fresh-async"
|
||||
mock_refresh.assert_called_once_with("openai-codex")
|
||||
|
||||
def test_refresh_provider_credentials_force_refreshes_anthropic_oauth_and_evicts_cache(self, monkeypatch):
|
||||
stale_client = MagicMock()
|
||||
cache_key = ("anthropic", False, None, None, None)
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_TOKEN", "")
|
||||
monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "")
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "")
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._client_cache", {cache_key: (stale_client, "claude-haiku-4-5-20251001", None)}),
|
||||
patch("agent.anthropic_adapter.read_claude_code_credentials", return_value={
|
||||
"accessToken": "expired-token",
|
||||
"refreshToken": "refresh-token",
|
||||
"expiresAt": 0,
|
||||
}),
|
||||
patch("agent.anthropic_adapter.refresh_anthropic_oauth_pure", return_value={
|
||||
"access_token": "fresh-token",
|
||||
"refresh_token": "refresh-token-2",
|
||||
"expires_at_ms": 9999999999999,
|
||||
}) as mock_refresh_oauth,
|
||||
patch("agent.anthropic_adapter._write_claude_code_credentials") as mock_write,
|
||||
):
|
||||
from agent.auxiliary_client import _refresh_provider_credentials
|
||||
|
||||
assert _refresh_provider_credentials("anthropic") is True
|
||||
|
||||
mock_refresh_oauth.assert_called_once_with("refresh-token", use_json=False)
|
||||
mock_write.assert_called_once_with("fresh-token", "refresh-token-2", 9999999999999)
|
||||
stale_client.close.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_call_llm_refreshes_anthropic_on_401_for_non_vision(self):
|
||||
stale_client = MagicMock()
|
||||
stale_client.base_url = "https://api.anthropic.com"
|
||||
stale_client.chat.completions.create = AsyncMock(side_effect=_AuxAuth401("anthropic token expired"))
|
||||
|
||||
fresh_client = MagicMock()
|
||||
fresh_client.base_url = "https://api.anthropic.com"
|
||||
fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async-anthropic"))
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]),
|
||||
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
|
||||
):
|
||||
resp = await async_call_llm(
|
||||
task="compression",
|
||||
provider="anthropic",
|
||||
model="claude-haiku-4-5-20251001",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert resp.choices[0].message.content == "fresh-async-anthropic"
|
||||
mock_refresh.assert_called_once_with("anthropic")
|
||||
assert stale_client.chat.completions.create.await_count == 1
|
||||
assert fresh_client.chat.completions.create.await_count == 1
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue