Merge branch 'main' into bb/gui

This commit is contained in:
emozilla 2026-05-12 21:18:07 -04:00
commit 747caa74f0
139 changed files with 13565 additions and 816 deletions

View file

@ -273,6 +273,20 @@ BROWSER_SESSION_TIMEOUT=300
# Browser sessions are automatically closed after this period of no activity
BROWSER_INACTIVITY_TIMEOUT=120
# Camofox local anti-detection browser (Camoufox-based Firefox).
# Set CAMOFOX_URL to route the browser tools through a local Camofox server
# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
# CAMOFOX_URL=http://localhost:9377
# Externally managed Camofox sessions — when another app owns the visible
# Camofox browser, set these so Hermes shares the same userId/profile instead
# of creating its own isolated session.
# CAMOFOX_USER_ID=
# CAMOFOX_SESSION_KEY=
# Set to true to reuse an already-open Camofox tab for this identity before
# creating a new one (useful for gateway restarts).
# CAMOFOX_ADOPT_EXISTING_TAB=false
# =============================================================================
# SESSION LOGGING
# =============================================================================

View file

@ -55,11 +55,14 @@ jobs:
e2e:
runs-on: ubuntu-latest
timeout-minutes: 10
timeout-minutes: 15
steps:
- name: Checkout code
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y ripgrep
- name: Install uv
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5

View file

@ -35,6 +35,14 @@ def _get_anthropic_sdk():
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
global _anthropic_sdk
if _anthropic_sdk is ...:
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("provider.anthropic", prompt=False)
except ImportError:
pass
except Exception:
# FeatureUnavailable — fall through to ImportError handling below
pass
try:
import anthropic as _sdk
_anthropic_sdk = _sdk

View file

@ -382,7 +382,7 @@ _AI_GATEWAY_HEADERS = {
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
# Set at resolve time — True if the auxiliary client points to Nous Portal
auxiliary_is_nous: bool = False
@ -3828,7 +3828,7 @@ def _resolve_task_provider_model(
# (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
if cfg_provider and cfg_provider != "auto":
return cfg_provider, resolved_model, None, None, resolved_api_mode
return cfg_provider, resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
return "auto", resolved_model, None, None, resolved_api_mode
@ -4026,7 +4026,7 @@ def _build_call_kwargs(
# Provider-specific extra_body
merged_extra = dict(extra_body or {})
if provider == "nous" or auxiliary_is_nous:
merged_extra.setdefault("tags", []).extend(["product=hermes-agent"])
merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
if merged_extra:
kwargs["extra_body"] = merged_extra

106
agent/lsp/__init__.py Normal file
View file

@ -0,0 +1,106 @@
"""Language Server Protocol (LSP) integration for Hermes Agent.
Hermes runs full language servers (pyright, gopls, rust-analyzer,
typescript-language-server, etc.) as subprocesses and pipes their
``textDocument/publishDiagnostics`` output into the post-write lint
delta filter used by ``write_file`` and ``patch``.
LSP is **gated on git workspace detection** if the agent's cwd is
inside a git repository, LSP runs against that workspace; otherwise the
file_operations layer falls back to its existing in-process syntax
checks. This keeps users on user-home cwd's (e.g. Telegram gateway
chats) from spawning daemons they don't need.
Public API:
from agent.lsp import get_service
svc = get_service()
if svc and svc.enabled_for(path):
await svc.touch_file(path)
diags = svc.diagnostics_for(path)
The bulk of the wiring is internal most callers only need the layer
in :func:`tools.file_operations.FileOperations._check_lint_delta`,
which is already wired (see that module).
Architecture is documented in ``website/docs/user-guide/features/lsp.md``.
"""
from __future__ import annotations
import atexit
import logging
import threading
from typing import Optional
from agent.lsp.manager import LSPService
logger = logging.getLogger("agent.lsp")
_service: Optional[LSPService] = None
_atexit_registered = False
_service_lock = threading.Lock()
def get_service() -> Optional[LSPService]:
"""Return the process-wide LSP service singleton, or None when disabled.
The service is created lazily on first call. ``None`` is returned
when LSP is disabled in config, when no workspace can be detected,
or when the platform doesn't support subprocess-based LSP servers.
On first creation, registers an :mod:`atexit` handler that tears
down spawned language servers on Python exit so a long-running
CLI or gateway session doesn't leak pyright/gopls/etc. processes
when it terminates.
"""
global _service, _atexit_registered
if _service is not None:
return _service if _service.is_active() else None
with _service_lock:
if _service is not None:
return _service if _service.is_active() else None
_service = LSPService.create_from_config()
if not _atexit_registered:
# ``atexit`` handlers run in LIFO order on normal Python
# exit and on SystemExit, but NOT on os._exit() or
# uncaught signals. Language servers are stateless
# subprocesses — losing them on SIGKILL is fine; they'll
# be reaped by the kernel along with their parent. We
# care about clean exits where Python flushes stdio
# before terminating; without this hook every
# ``hermes chat`` exit would leak pyright processes that
# outlive the parent for a few seconds while their
# stdout buffers drain.
atexit.register(_atexit_shutdown)
_atexit_registered = True
return _service if (_service is not None and _service.is_active()) else None
def shutdown_service() -> None:
"""Tear down the LSP service if one was started.
Safe to call multiple times; safe to call when no service was created.
"""
global _service
with _service_lock:
svc = _service
_service = None
if svc is not None:
try:
svc.shutdown()
except Exception as e: # noqa: BLE001
logger.debug("LSP shutdown error: %s", e)
def _atexit_shutdown() -> None:
"""atexit-registered wrapper. Logs at debug because by the time
atexit fires the user has already seen the agent's final output —
a noisy shutdown line on top of that is just clutter."""
try:
shutdown_service()
except Exception as e: # noqa: BLE001
logger.debug("atexit LSP shutdown failed: %s", e)
__all__ = ["get_service", "shutdown_service", "LSPService"]

308
agent/lsp/cli.py Normal file
View file

@ -0,0 +1,308 @@
"""``hermes lsp`` CLI subcommand.
Subcommands:
- ``status`` show service state, configured servers, install status.
- ``install <server_id>`` eagerly install one server's binary.
- ``install-all`` try to install every server with a known recipe.
- ``restart`` tear down running clients so the next edit re-spawns.
- ``which <server_id>`` print the resolved binary path for one server.
- ``list`` print the registry of supported servers.
The handlers are kept here (rather than in
``hermes_cli/main.py``) so the LSP module ships self-contained.
"""
from __future__ import annotations
import argparse
import sys
from typing import Optional
def register_subparser(subparsers: argparse._SubParsersAction) -> None:
"""Wire the ``hermes lsp`` subcommand tree into the main argparse."""
parser = subparsers.add_parser(
"lsp",
help="Language Server Protocol management",
description=(
"Manage the LSP layer that powers post-write semantic "
"diagnostics in write_file/patch."
),
)
sub = parser.add_subparsers(dest="lsp_command")
sub_status = sub.add_parser("status", help="Show LSP service status")
sub_status.add_argument(
"--json", action="store_true", help="Emit machine-readable JSON"
)
sub_list = sub.add_parser("list", help="List supported language servers")
sub_list.add_argument(
"--installed-only",
action="store_true",
help="Only show servers whose binary is currently available",
)
sub_install = sub.add_parser("install", help="Install a server binary")
sub_install.add_argument("server", help="Server id (e.g. pyright, gopls)")
sub_install_all = sub.add_parser(
"install-all",
help="Install every server with a known auto-install recipe",
)
sub_install_all.add_argument(
"--include-manual",
action="store_true",
help="Even attempt servers marked manual-install (best effort)",
)
sub_restart = sub.add_parser(
"restart",
help="Tear down running LSP clients (next edit re-spawns)",
)
sub_which = sub.add_parser("which", help="Print binary path for a server")
sub_which.add_argument("server", help="Server id")
parser.set_defaults(func=run_lsp_command)
def run_lsp_command(args: argparse.Namespace) -> int:
"""Top-level dispatcher for ``hermes lsp <subcommand>``."""
sub = getattr(args, "lsp_command", None) or "status"
try:
if sub == "status":
return _cmd_status(getattr(args, "json", False))
if sub == "list":
return _cmd_list(getattr(args, "installed_only", False))
if sub == "install":
return _cmd_install(args.server)
if sub == "install-all":
return _cmd_install_all(getattr(args, "include_manual", False))
if sub == "restart":
return _cmd_restart()
if sub == "which":
return _cmd_which(args.server)
sys.stderr.write(f"unknown lsp subcommand: {sub}\n")
return 2
except KeyboardInterrupt:
return 130
def _cmd_status(emit_json: bool) -> int:
from agent.lsp import get_service
from agent.lsp.servers import SERVERS
from agent.lsp.install import detect_status
svc = get_service()
service_active = svc is not None
info = svc.get_status() if svc is not None else {"enabled": False}
if emit_json:
import json
payload = {
"service": info,
"registry": [
{
"server_id": s.server_id,
"extensions": list(s.extensions),
"description": s.description,
"binary_status": detect_status(_recipe_pkg_for(s.server_id)),
}
for s in SERVERS
],
}
sys.stdout.write(json.dumps(payload, indent=2) + "\n")
return 0
out = []
out.append("LSP Service")
out.append("===========")
out.append(f" enabled: {info.get('enabled', False)}")
if service_active:
out.append(f" wait_mode: {info.get('wait_mode')}")
out.append(f" wait_timeout: {info.get('wait_timeout')}s")
out.append(f" install_strategy:{info.get('install_strategy')}")
clients = info.get("clients") or []
if clients:
out.append(f" active clients: {len(clients)}")
for c in clients:
out.append(
f" - {c['server_id']:20s} state={c['state']:10s} root={c['workspace_root']}"
)
else:
out.append(" active clients: none")
broken = info.get("broken") or []
if broken:
out.append(f" broken pairs: {len(broken)}")
for b in broken:
out.append(f" - {b}")
disabled = info.get("disabled_servers") or []
if disabled:
out.append(f" disabled in cfg: {', '.join(disabled)}")
# Surface backend-tool gaps that aren't visible in the registry table:
# some servers spawn fine but emit no diagnostics without a sidecar
# binary (bash-language-server -> shellcheck).
backend_warnings = _backend_warnings()
if backend_warnings:
out.append("")
out.append("Backend warnings")
out.append("================")
for line in backend_warnings:
out.append(f" ! {line}")
out.append("")
out.append("Registered Servers")
out.append("==================")
for s in SERVERS:
pkg = _recipe_pkg_for(s.server_id)
status = detect_status(pkg)
marker = {
"installed": "",
"missing": "·",
"manual-only": "?",
}.get(status, " ")
ext_summary = ", ".join(list(s.extensions)[:5])
if len(s.extensions) > 5:
ext_summary += f", … (+{len(s.extensions) - 5})"
out.append(
f" {marker} {s.server_id:24s} [{status:11s}] {ext_summary}"
)
if s.description:
out.append(f" {s.description}")
sys.stdout.write("\n".join(out) + "\n")
return 0
def _cmd_list(installed_only: bool) -> int:
from agent.lsp.servers import SERVERS
from agent.lsp.install import detect_status
for s in SERVERS:
pkg = _recipe_pkg_for(s.server_id)
status = detect_status(pkg)
if installed_only and status != "installed":
continue
sys.stdout.write(
f"{s.server_id:24s} [{status:11s}] {','.join(s.extensions)}\n"
)
return 0
def _cmd_install(server_id: str) -> int:
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
pkg = _recipe_pkg_for(server_id)
pre_status = detect_status(pkg)
if pre_status == "installed":
sys.stdout.write(f"{server_id} already installed\n")
return 0
sys.stdout.write(f"installing {server_id} (pkg={pkg}) ...\n")
sys.stdout.flush()
bin_path = try_install(pkg, "auto")
if bin_path is None:
recipe = INSTALL_RECIPES.get(pkg)
if recipe and recipe.get("strategy") == "manual":
sys.stderr.write(
f"{server_id}: this server requires a manual install. "
f"See documentation.\n"
)
else:
sys.stderr.write(f"{server_id}: install failed (see logs).\n")
return 1
sys.stdout.write(f"installed: {bin_path}\n")
return 0
def _cmd_install_all(include_manual: bool) -> int:
from agent.lsp.servers import SERVERS
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
rc = 0
for s in SERVERS:
pkg = _recipe_pkg_for(s.server_id)
recipe = INSTALL_RECIPES.get(pkg)
if recipe is None:
continue
if recipe.get("strategy") == "manual" and not include_manual:
continue
if detect_status(pkg) == "installed":
sys.stdout.write(f" {s.server_id:24s} already installed\n")
continue
sys.stdout.write(f" installing {s.server_id} (pkg={pkg}) ... ")
sys.stdout.flush()
path = try_install(pkg, "auto")
if path:
sys.stdout.write(f"ok ({path})\n")
else:
sys.stdout.write("FAILED\n")
rc = 1
return rc
def _cmd_restart() -> int:
from agent.lsp import shutdown_service
shutdown_service()
sys.stdout.write("LSP service shut down. Next edit will respawn clients.\n")
return 0
def _cmd_which(server_id: str) -> int:
from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
import os
import shutil as _shutil
recipe = INSTALL_RECIPES.get(server_id)
bin_name = (recipe or {}).get("bin", server_id)
staged = hermes_lsp_bin_dir() / bin_name
if staged.exists():
sys.stdout.write(str(staged) + "\n")
return 0
on_path = _shutil.which(bin_name)
if on_path:
sys.stdout.write(on_path + "\n")
return 0
sys.stderr.write(f"{server_id}: not installed\n")
return 1
def _recipe_pkg_for(server_id: str) -> str:
"""Map a registry ``server_id`` to its install-recipe package key."""
# The mapping lives here (not in install.py) because it's a CLI
# convenience layer. Most server_ids are also their own recipe
# key, but a few differ (e.g. ``vue-language-server`` →
# ``@vue/language-server``).
aliases = {
"vue-language-server": "@vue/language-server",
"astro-language-server": "@astrojs/language-server",
"dockerfile-ls": "dockerfile-language-server-nodejs",
"typescript": "typescript-language-server",
}
return aliases.get(server_id, server_id)
def _backend_warnings() -> list:
"""Return human-readable notes about LSP backend tools that are missing
in a way that won't surface elsewhere.
Some language servers ship as thin wrappers around an external CLI for
actual diagnostics they spawn cleanly but never emit any errors when
the sidecar binary isn't on PATH. bash-language-server / shellcheck
is the load-bearing example.
Returned strings are short, actionable, and include the install
suggestion across common platforms.
"""
import shutil as _shutil
from agent.lsp.install import hermes_lsp_bin_dir
notes: list = []
bash_installed = _shutil.which("bash-language-server") is not None or (
(hermes_lsp_bin_dir() / "bash-language-server").exists()
)
if bash_installed and _shutil.which("shellcheck") is None:
notes.append(
"bash-language-server is installed but shellcheck is missing — "
"diagnostics will be empty (apt: shellcheck, brew: shellcheck, "
"scoop: shellcheck)."
)
return notes

930
agent/lsp/client.py Normal file
View file

@ -0,0 +1,930 @@
"""Async LSP client over stdin/stdout.
One :class:`LSPClient` corresponds to one ``(language_server, workspace_root)``
pair exactly what OpenCode keys clients on, and the same shape Claude
Code uses. The client owns a child process, drives the JSON-RPC
exchange, and exposes:
- :meth:`open_file` / :meth:`change_file` text document sync
- :meth:`wait_for_diagnostics` block until the server emits fresh
diagnostics for a specific file (or a timeout fires)
- :meth:`diagnostics_for` read the current per-file diagnostic store
- :meth:`shutdown` graceful close + SIGTERM/SIGKILL fallback
The class is designed for async use from a single asyncio event loop.
The :class:`agent.lsp.manager.LSPService` runs an event loop in a
background thread so the synchronous file_operations layer can call
into it via :func:`agent.lsp.manager.LSPService.touch_file`.
Implementation notes:
- Push diagnostics are stored per-URI in :attr:`_push_diagnostics` from
``textDocument/publishDiagnostics`` notifications. Pull diagnostics
go in :attr:`_pull_diagnostics`. The merged view dedupes by content.
- Whole-document sync. Even when the server advertises incremental
sync, we send a single ``contentChanges`` entry replacing the
entire document. Pretending to be incremental while sending a
full replacement is well-tolerated by every major server and saves
range bookkeeping. See OpenCode's ``client.ts:584-659`` for the
same trick.
- The "touch-file dance": every ``open_file`` call also fires a
``workspace/didChangeWatchedFiles`` notification (CREATED on the
first open, CHANGED thereafter). Some servers (clangd, eslint)
only re-scan when this notification fires, even though the LSP spec
doesn't strictly require it.
- ``ContentModified`` (-32801) errors get retried with exponential
backoff up to 3 times. This matches Claude Code's
``LSPServerInstance.sendRequest``.
"""
from __future__ import annotations
import asyncio
import logging
import os
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
from urllib.parse import quote, unquote
from agent.lsp.protocol import (
ERROR_CONTENT_MODIFIED,
ERROR_METHOD_NOT_FOUND,
LSPProtocolError,
LSPRequestError,
classify_message,
encode_message,
make_error_response,
make_notification,
make_request,
make_response,
read_message,
)
logger = logging.getLogger("agent.lsp.client")
# Timeouts (seconds) — mirror OpenCode's constants, scaled to seconds.
INITIALIZE_TIMEOUT = 45.0
DIAGNOSTICS_DOCUMENT_WAIT = 5.0
DIAGNOSTICS_FULL_WAIT = 10.0
DIAGNOSTICS_REQUEST_TIMEOUT = 3.0
PUSH_DEBOUNCE = 0.15
SHUTDOWN_GRACE = 1.0 # seconds between SIGTERM and SIGKILL
# Retry policy for transient ContentModified errors.
MAX_CONTENT_MODIFIED_RETRIES = 3
RETRY_BASE_DELAY = 0.5 # 0.5, 1.0, 2.0 — exponential
def file_uri(path: str) -> str:
"""Return ``file://`` URI for an absolute filesystem path.
Mirrors Node's ``pathToFileURL`` — handles spaces, unicode, and
Windows drive letters (``C:\\foo`` ``file:///C:/foo``).
"""
abs_path = os.path.abspath(path)
if os.name == "nt":
# Windows: backslash → forward slash, prepend extra slash so
# the drive letter shows up as part of the path component.
abs_path = abs_path.replace("\\", "/")
if not abs_path.startswith("/"):
abs_path = "/" + abs_path
return "file://" + quote(abs_path, safe="/:")
def uri_to_path(uri: str) -> str:
"""Inverse of :func:`file_uri`."""
if not uri.startswith("file://"):
return uri
raw = uri[len("file://"):]
if os.name == "nt" and raw.startswith("/") and len(raw) > 2 and raw[2] == ":":
raw = raw[1:] # strip leading slash before drive letter
return os.path.normpath(unquote(raw))
def _end_position(text: str) -> Dict[str, int]:
"""Return the LSP Position at the end of ``text``.
Used to construct a single-range "replace whole document" change
for ``textDocument/didChange`` regardless of the server's declared
sync mode.
"""
if not text:
return {"line": 0, "character": 0}
lines = text.splitlines(keepends=False)
last_line = len(lines) - 1
last_col = len(lines[-1]) if lines else 0
# If the text ends with a trailing newline, ``splitlines`` won't
# represent it. The end position is then the start of the next
# (empty) line — line index is len(lines), column 0.
if text.endswith(("\n", "\r")):
return {"line": last_line + 1, "character": 0}
return {"line": last_line, "character": last_col}
class LSPClient:
"""Async LSP client tied to one server process and one workspace root.
Lifecycle:
c = LSPClient(server_id, workspace_root, command, args, init_options)
await c.start() # spawn + initialize
ver = await c.open_file("/path/to/foo.py")
await c.wait_for_diagnostics("/path/to/foo.py", ver)
diags = c.diagnostics_for("/path/to/foo.py")
await c.shutdown()
"""
# ------------------------------------------------------------------
# construction + lifecycle
# ------------------------------------------------------------------
def __init__(
self,
*,
server_id: str,
workspace_root: str,
command: List[str],
env: Optional[Dict[str, str]] = None,
cwd: Optional[str] = None,
initialization_options: Optional[Dict[str, Any]] = None,
seed_diagnostics_on_first_push: bool = False,
) -> None:
self.server_id = server_id
self.workspace_root = workspace_root
self._command = list(command)
self._env = env
self._cwd = cwd or workspace_root
self._init_options = initialization_options or {}
self._seed_first_push = seed_diagnostics_on_first_push
# Process + streams
self._proc: Optional[asyncio.subprocess.Process] = None
self._stderr_task: Optional[asyncio.Task] = None
self._reader_task: Optional[asyncio.Task] = None
# Request/response correlation
self._next_id: int = 0
self._pending: Dict[int, asyncio.Future] = {}
# Server-side request handlers (server → client requests).
# Kept small and explicit; everything else returns method-not-found.
self._request_handlers: Dict[str, Callable[[Any], Awaitable[Any]]] = {
"window/workDoneProgress/create": self._handle_work_done_create,
"workspace/configuration": self._handle_workspace_configuration,
"client/registerCapability": self._handle_register_capability,
"client/unregisterCapability": self._handle_unregister_capability,
"workspace/workspaceFolders": self._handle_workspace_folders,
"workspace/diagnostic/refresh": self._handle_diagnostic_refresh,
}
# Notifications (server → client) we care about.
self._notification_handlers: Dict[str, Callable[[Any], None]] = {
"textDocument/publishDiagnostics": self._handle_publish_diagnostics,
# Everything else (window/showMessage, $/progress, etc.)
# is silently dropped by default.
}
# Tracked file state — required for didChange version bumps.
self._files: Dict[str, Dict[str, Any]] = {}
# Diagnostic stores, keyed by file path (NOT URI).
self._push_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
self._pull_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
# Per-path "last published" time so wait-for-fresh logic works.
self._published: Dict[str, float] = {}
# Per-path version of the latest push (matches our didChange
# version when the server respects it).
self._published_version: Dict[str, int] = {}
# First-push seen flag, for typescript-style seed-on-first-push.
self._first_push_seen: Set[str] = set()
# Capability registrations — only diagnostic ones are tracked.
self._diagnostic_registrations: Dict[str, Dict[str, Any]] = {}
# State machine
self._state: str = "stopped"
self._initialize_result: Optional[Dict[str, Any]] = None
self._sync_kind: int = 1 # 1=Full, 2=Incremental
self._stopping: bool = False
# Push event for waiters.
self._push_event = asyncio.Event()
# Monotonic counter incremented on every publishDiagnostics push.
# Waiters snapshot it on entry and treat any increase as
# "something happened, recheck the predicate". Avoids the
# asyncio.Event sticky-state trap.
self._push_counter = 0
# Registration change event so wait_for_diagnostics can re-loop
# when the server announces a new dynamic provider.
self._registration_event = asyncio.Event()
@property
def is_running(self) -> bool:
return self._state == "running" and self._proc is not None and self._proc.returncode is None
@property
def state(self) -> str:
return self._state
async def start(self) -> None:
"""Spawn the server and complete the initialize handshake.
Raises any exception encountered during spawn/init. On failure
the process is killed and the client is left in state
``"error"`` re-call ``start()`` to retry.
"""
if self._state in ("running", "starting"):
return
self._state = "starting"
try:
await self._spawn()
await self._initialize()
self._state = "running"
except Exception:
self._state = "error"
await self._cleanup_process()
raise
async def _spawn(self) -> None:
env = dict(os.environ)
if self._env:
env.update(self._env)
try:
self._proc = await asyncio.create_subprocess_exec(
self._command[0],
*self._command[1:],
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
cwd=self._cwd,
)
except FileNotFoundError as e:
raise LSPProtocolError(
f"LSP server binary not found: {self._command[0]} ({e})"
) from e
# Drain stderr at debug level — if we don't, the pipe buffer
# fills and the server hangs.
self._stderr_task = asyncio.create_task(self._drain_stderr())
# Start the reader loop.
self._reader_task = asyncio.create_task(self._reader_loop())
async def _drain_stderr(self) -> None:
if self._proc is None or self._proc.stderr is None:
return
try:
while True:
line = await self._proc.stderr.readline()
if not line:
break
text = line.decode("utf-8", errors="replace").rstrip()
if text:
logger.debug("[%s] stderr: %s", self.server_id, text[:1000])
except (asyncio.CancelledError, OSError):
pass
async def _reader_loop(self) -> None:
if self._proc is None or self._proc.stdout is None:
return
try:
while True:
msg = await read_message(self._proc.stdout)
if msg is None:
logger.debug("[%s] server closed stdout cleanly", self.server_id)
break
kind, key = classify_message(msg)
if kind == "response":
self._dispatch_response(key, msg)
elif kind == "request":
asyncio.create_task(self._dispatch_request(key, msg))
elif kind == "notification":
self._dispatch_notification(key, msg)
else:
logger.warning("[%s] dropping invalid message: %r", self.server_id, msg)
except LSPProtocolError as e:
logger.warning("[%s] protocol error in reader loop: %s", self.server_id, e)
except (asyncio.CancelledError, OSError):
pass
finally:
# Wake up any pending requests so they can fail fast.
for fut in list(self._pending.values()):
if not fut.done():
fut.set_exception(LSPProtocolError("server connection closed"))
self._pending.clear()
async def _initialize(self) -> None:
params = {
"rootUri": file_uri(self.workspace_root),
"rootPath": self.workspace_root,
"processId": os.getpid(),
"workspaceFolders": [
{"name": "workspace", "uri": file_uri(self.workspace_root)}
],
"initializationOptions": self._init_options,
"capabilities": {
"window": {"workDoneProgress": True},
"workspace": {
"configuration": True,
"workspaceFolders": True,
"didChangeWatchedFiles": {"dynamicRegistration": True},
"diagnostics": {"refreshSupport": False},
},
"textDocument": {
"synchronization": {
"dynamicRegistration": False,
"didOpen": True,
"didChange": True,
"didSave": True,
"willSave": False,
"willSaveWaitUntil": False,
},
"diagnostic": {
"dynamicRegistration": True,
"relatedDocumentSupport": True,
},
"publishDiagnostics": {
"relatedInformation": True,
"tagSupport": {"valueSet": [1, 2]},
"versionSupport": True,
"codeDescriptionSupport": True,
"dataSupport": False,
},
"hover": {"contentFormat": ["markdown", "plaintext"]},
"definition": {"linkSupport": True},
"references": {},
"documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
},
"general": {"positionEncodings": ["utf-16"]},
},
}
result = await asyncio.wait_for(
self._send_request("initialize", params),
timeout=INITIALIZE_TIMEOUT,
)
self._initialize_result = result
self._sync_kind = self._extract_sync_kind(result.get("capabilities") or {})
await self._send_notification("initialized", {})
if self._init_options:
# Some servers (vtsls, eslint) want config pushed via
# didChangeConfiguration even if it was sent in
# initializationOptions.
await self._send_notification(
"workspace/didChangeConfiguration",
{"settings": self._init_options},
)
@staticmethod
def _extract_sync_kind(capabilities: dict) -> int:
sync = capabilities.get("textDocumentSync")
if isinstance(sync, int):
return sync
if isinstance(sync, dict):
change = sync.get("change")
if isinstance(change, int):
return change
return 1 # default to Full
async def shutdown(self) -> None:
"""Best-effort graceful shutdown.
Sends ``shutdown`` + ``exit``, then SIGTERMs/SIGKILLs the
process if it doesn't exit cleanly. Idempotent.
"""
if self._stopping:
return
self._stopping = True
try:
if self.is_running:
try:
await asyncio.wait_for(self._send_request("shutdown", None), timeout=2.0)
except (asyncio.TimeoutError, LSPRequestError, LSPProtocolError):
pass
try:
await self._send_notification("exit", None)
except Exception:
pass
finally:
self._state = "stopped"
await self._cleanup_process()
async def _cleanup_process(self) -> None:
if self._reader_task is not None and not self._reader_task.done():
self._reader_task.cancel()
try:
await self._reader_task
except (asyncio.CancelledError, Exception): # noqa: BLE001
pass
if self._stderr_task is not None and not self._stderr_task.done():
self._stderr_task.cancel()
try:
await self._stderr_task
except (asyncio.CancelledError, Exception): # noqa: BLE001
pass
proc = self._proc
self._proc = None
if proc is None:
return
if proc.returncode is None:
try:
proc.terminate()
try:
await asyncio.wait_for(proc.wait(), timeout=SHUTDOWN_GRACE)
except asyncio.TimeoutError:
try:
proc.kill()
await proc.wait()
except ProcessLookupError:
pass
except ProcessLookupError:
pass
# ------------------------------------------------------------------
# request / notification plumbing
# ------------------------------------------------------------------
async def _send_request(self, method: str, params: Any) -> Any:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
raise LSPProtocolError(f"cannot send {method!r}: stdin closed")
loop = asyncio.get_running_loop()
req_id = self._next_id
self._next_id += 1
fut: asyncio.Future = loop.create_future()
self._pending[req_id] = fut
try:
self._proc.stdin.write(encode_message(make_request(req_id, method, params)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError) as e:
self._pending.pop(req_id, None)
raise LSPProtocolError(f"send failed for {method!r}: {e}") from e
try:
return await fut
finally:
self._pending.pop(req_id, None)
async def _send_request_with_retry(self, method: str, params: Any, *, timeout: float) -> Any:
"""Send a request, retrying on ``ContentModified`` (-32801).
Other errors propagate. The retry policy matches Claude Code's
``LSPServerInstance.sendRequest`` 3 attempts with delays
0.5s, 1.0s, 2.0s.
"""
for attempt in range(MAX_CONTENT_MODIFIED_RETRIES + 1):
try:
return await asyncio.wait_for(self._send_request(method, params), timeout=timeout)
except LSPRequestError as e:
if e.code == ERROR_CONTENT_MODIFIED and attempt < MAX_CONTENT_MODIFIED_RETRIES:
await asyncio.sleep(RETRY_BASE_DELAY * (2 ** attempt))
continue
raise
async def _send_notification(self, method: str, params: Any) -> None:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
return
try:
self._proc.stdin.write(encode_message(make_notification(method, params)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError) as e:
logger.debug("[%s] notify %s failed: %s", self.server_id, method, e)
async def _send_response(self, req_id: Any, result: Any) -> None:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
return
try:
self._proc.stdin.write(encode_message(make_response(req_id, result)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError):
pass
async def _send_error_response(self, req_id: Any, code: int, message: str) -> None:
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
return
try:
self._proc.stdin.write(encode_message(make_error_response(req_id, code, message)))
await self._proc.stdin.drain()
except (BrokenPipeError, ConnectionResetError, OSError):
pass
def _dispatch_response(self, req_id: int, msg: dict) -> None:
fut = self._pending.get(req_id)
if fut is None or fut.done():
return
if "error" in msg:
err = msg["error"] or {}
fut.set_exception(
LSPRequestError(
code=int(err.get("code", -32000)),
message=str(err.get("message", "unknown")),
data=err.get("data"),
)
)
else:
fut.set_result(msg.get("result"))
async def _dispatch_request(self, req_id: Any, msg: dict) -> None:
method = msg.get("method", "")
params = msg.get("params")
handler = self._request_handlers.get(method)
if handler is None:
await self._send_error_response(req_id, ERROR_METHOD_NOT_FOUND, f"method not found: {method}")
return
try:
result = await handler(params)
except Exception as e: # noqa: BLE001 — protocol must not blow up
logger.warning("[%s] request handler %s failed: %s", self.server_id, method, e)
await self._send_error_response(req_id, -32000, f"handler failed: {e}")
return
await self._send_response(req_id, result)
def _dispatch_notification(self, method: str, msg: dict) -> None:
handler = self._notification_handlers.get(method)
if handler is None:
return
try:
handler(msg.get("params"))
except Exception as e: # noqa: BLE001
logger.debug("[%s] notification handler %s failed: %s", self.server_id, method, e)
# ------------------------------------------------------------------
# built-in server-→-client request handlers
# ------------------------------------------------------------------
async def _handle_work_done_create(self, params: Any) -> Any:
# Acknowledge progress tokens — required by some servers.
return None
async def _handle_workspace_configuration(self, params: Any) -> Any:
# Walk dotted sections through initializationOptions. Mirrors
# OpenCode's `client.ts:198-220` — return null when missing.
if not isinstance(params, dict):
return [None]
items = params.get("items") or []
out: List[Any] = []
for item in items:
if not isinstance(item, dict):
out.append(None)
continue
section = item.get("section")
if not section or not self._init_options:
out.append(self._init_options or None)
continue
cur: Any = self._init_options
for part in str(section).split("."):
if isinstance(cur, dict) and part in cur:
cur = cur[part]
else:
cur = None
break
out.append(cur)
return out
async def _handle_register_capability(self, params: Any) -> Any:
if not isinstance(params, dict):
return None
for reg in params.get("registrations") or []:
if not isinstance(reg, dict):
continue
method = reg.get("method")
reg_id = reg.get("id")
if method == "textDocument/diagnostic" and reg_id:
self._diagnostic_registrations[str(reg_id)] = reg
self._registration_event.set()
return None
async def _handle_unregister_capability(self, params: Any) -> Any:
if not isinstance(params, dict):
return None
for unreg in params.get("unregisterations") or []:
if not isinstance(unreg, dict):
continue
reg_id = unreg.get("id")
if reg_id:
self._diagnostic_registrations.pop(str(reg_id), None)
return None
async def _handle_workspace_folders(self, params: Any) -> Any:
return [{"name": "workspace", "uri": file_uri(self.workspace_root)}]
async def _handle_diagnostic_refresh(self, params: Any) -> Any:
# We don't honour refresh — we re-pull on every touchFile.
return None
# ------------------------------------------------------------------
# publishDiagnostics handler
# ------------------------------------------------------------------
def _handle_publish_diagnostics(self, params: Any) -> None:
if not isinstance(params, dict):
return
uri = params.get("uri")
if not isinstance(uri, str):
return
path = uri_to_path(uri)
diagnostics = params.get("diagnostics") or []
if not isinstance(diagnostics, list):
diagnostics = []
version = params.get("version")
loop_time = asyncio.get_event_loop().time()
if self._seed_first_push and path not in self._first_push_seen:
# First push: seed without firing the event so a waiter
# doesn't resolve on the very first push (which arrives
# before the user-triggered didChange could've produced
# fresh diagnostics).
self._first_push_seen.add(path)
self._push_diagnostics[path] = diagnostics
self._published[path] = loop_time
if isinstance(version, int):
self._published_version[path] = version
return
self._push_diagnostics[path] = diagnostics
self._published[path] = loop_time
if isinstance(version, int):
self._published_version[path] = version
self._first_push_seen.add(path)
# Bump the monotonic push counter and wake every waiter. We
# keep the Event sticky-set so any wait already in progress
# resolves; waiters re-check their predicate after waking and
# decide whether to keep waiting. ``_push_counter`` is what
# they actually compare against to detect a fresh event.
self._push_counter += 1
self._push_event.set()
# ------------------------------------------------------------------
# public file-sync API
# ------------------------------------------------------------------
async def open_file(self, path: str, *, language_id: str = "plaintext") -> int:
"""Send didOpen (first time) or didChange (subsequent) for ``path``.
Returns the new document version number that the agent's
``wait_for_diagnostics`` should match against.
"""
if not self.is_running:
raise LSPProtocolError("client not running")
abs_path = os.path.abspath(path)
try:
text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
except OSError as e:
raise LSPProtocolError(f"cannot read {abs_path}: {e}") from e
uri = file_uri(abs_path)
existing = self._files.get(abs_path)
if existing is not None:
# Re-open: bump version, fire didChangeWatchedFiles + didChange.
await self._send_notification(
"workspace/didChangeWatchedFiles",
{"changes": [{"uri": uri, "type": 2}]}, # 2 = CHANGED
)
new_version = existing["version"] + 1
old_text = existing["text"]
content_changes: List[Dict[str, Any]]
if self._sync_kind == 2:
content_changes = [
{
"range": {
"start": {"line": 0, "character": 0},
"end": _end_position(old_text),
},
"text": text,
}
]
else:
content_changes = [{"text": text}]
await self._send_notification(
"textDocument/didChange",
{
"textDocument": {"uri": uri, "version": new_version},
"contentChanges": content_changes,
},
)
self._files[abs_path] = {"version": new_version, "text": text}
return new_version
# First open: didChangeWatchedFiles CREATED + didOpen.
await self._send_notification(
"workspace/didChangeWatchedFiles",
{"changes": [{"uri": uri, "type": 1}]}, # 1 = CREATED
)
# Clear any stale push/pull entries — fresh open should start
# from scratch.
self._push_diagnostics.pop(abs_path, None)
self._pull_diagnostics.pop(abs_path, None)
self._published.pop(abs_path, None)
self._published_version.pop(abs_path, None)
await self._send_notification(
"textDocument/didOpen",
{
"textDocument": {
"uri": uri,
"languageId": language_id,
"version": 0,
"text": text,
}
},
)
self._files[abs_path] = {"version": 0, "text": text}
return 0
async def save_file(self, path: str) -> None:
"""Send didSave for ``path``. Some linters re-scan only on save."""
if not self.is_running:
return
abs_path = os.path.abspath(path)
await self._send_notification(
"textDocument/didSave",
{"textDocument": {"uri": file_uri(abs_path)}},
)
# ------------------------------------------------------------------
# diagnostics: pull + wait
# ------------------------------------------------------------------
async def _pull_document_diagnostics(self, path: str) -> None:
"""Send ``textDocument/diagnostic`` for one file.
Stores results into :attr:`_pull_diagnostics`. Silently
no-ops on errors (server may not support the pull endpoint).
"""
try:
params: Dict[str, Any] = {
"textDocument": {"uri": file_uri(os.path.abspath(path))}
}
result = await self._send_request_with_retry(
"textDocument/diagnostic",
params,
timeout=DIAGNOSTICS_REQUEST_TIMEOUT,
)
except (LSPRequestError, LSPProtocolError, asyncio.TimeoutError) as e:
logger.debug("[%s] document diagnostic pull failed: %s", self.server_id, e)
return
if not isinstance(result, dict):
return
items = result.get("items")
if isinstance(items, list):
self._pull_diagnostics[os.path.abspath(path)] = items
related = result.get("relatedDocuments")
if isinstance(related, dict):
for uri, sub in related.items():
if not isinstance(sub, dict):
continue
sub_items = sub.get("items")
if isinstance(sub_items, list):
self._pull_diagnostics[uri_to_path(uri)] = sub_items
async def wait_for_diagnostics(
self,
path: str,
version: int,
*,
mode: str = "document",
) -> None:
"""Wait for the server to publish diagnostics for ``path`` at ``version``.
``mode`` is ``"document"`` (5s budget, document pulls) or
``"full"`` (10s budget, also workspace pulls). Best-effort
returns silently on timeout. Does NOT throw if the server
doesn't support pull diagnostics; we still get the push side.
"""
budget = DIAGNOSTICS_FULL_WAIT if mode == "full" else DIAGNOSTICS_DOCUMENT_WAIT
deadline = asyncio.get_event_loop().time() + budget
abs_path = os.path.abspath(path)
while True:
remaining = deadline - asyncio.get_event_loop().time()
if remaining <= 0:
return
# Concurrent: document pull + push wait.
pull_task = asyncio.create_task(self._pull_document_diagnostics(abs_path))
push_task = asyncio.create_task(self._wait_for_fresh_push(abs_path, version, remaining))
done, pending = await asyncio.wait(
{pull_task, push_task},
timeout=remaining,
return_when=asyncio.FIRST_COMPLETED,
)
for t in pending:
t.cancel()
for t in pending:
try:
await t
except (asyncio.CancelledError, Exception): # noqa: BLE001
pass
# If we got a fresh push for our version, we're done.
current_v = self._published_version.get(abs_path)
if abs_path in self._published and (
current_v is None or current_v >= version
):
return
# Pull may have populated _pull_diagnostics — that's also
# success.
if abs_path in self._pull_diagnostics:
return
# Loop until budget runs out.
async def _wait_for_fresh_push(self, path: str, version: int, timeout: float) -> None:
"""Wait until a publishDiagnostics arrives for ``path`` at ``version``+."""
deadline = asyncio.get_event_loop().time() + timeout
baseline = self._push_counter
while True:
current_v = self._published_version.get(path)
if path in self._published and (current_v is None or current_v >= version):
# Debounce — wait a tick in case more diagnostics arrive
# immediately after. TS often emits in pairs. We
# snapshot the counter so we wake on a *new* push, not
# on the one that satisfied us a moment ago.
debounce_baseline = self._push_counter
debounce_deadline = asyncio.get_event_loop().time() + PUSH_DEBOUNCE
while self._push_counter == debounce_baseline:
remaining = debounce_deadline - asyncio.get_event_loop().time()
if remaining <= 0:
break
self._push_event.clear()
try:
await asyncio.wait_for(self._push_event.wait(), timeout=remaining)
except asyncio.TimeoutError:
break
return
remaining = deadline - asyncio.get_event_loop().time()
if remaining <= 0:
return
if self._push_counter > baseline:
# New event arrived but predicate still false — re-check
# immediately without waiting again.
baseline = self._push_counter
continue
self._push_event.clear()
try:
await asyncio.wait_for(self._push_event.wait(), timeout=min(remaining, 0.5))
except asyncio.TimeoutError:
continue
def diagnostics_for(self, path: str) -> List[Dict[str, Any]]:
"""Return current merged + deduped diagnostics for one file.
Diagnostics from push and pull stores are concatenated and
deduplicated by ``(severity, code, message, range)`` content
key. Empty list if the server hasn't published anything.
"""
abs_path = os.path.abspath(path)
push = self._push_diagnostics.get(abs_path) or []
pull = self._pull_diagnostics.get(abs_path) or []
return _dedupe(push, pull)
def _dedupe(*lists: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
seen: Set[str] = set()
out: List[Dict[str, Any]] = []
for lst in lists:
for d in lst:
if not isinstance(d, dict):
continue
key = _diagnostic_key(d)
if key in seen:
continue
seen.add(key)
out.append(d)
return out
def _diagnostic_key(d: Dict[str, Any]) -> str:
"""Content-equality key for a diagnostic.
Matches the structural-equality used in claude-code's
``areDiagnosticsEqual`` message + severity + source + code +
range coords. The range is reduced to a tuple to keep the key
stable across dict orderings.
"""
rng = d.get("range") or {}
start = rng.get("start") or {}
end = rng.get("end") or {}
code = d.get("code")
if code is not None and not isinstance(code, str):
code = str(code)
return "\x00".join(
[
str(d.get("severity") or 1),
str(code or ""),
str(d.get("source") or ""),
str(d.get("message") or "").strip(),
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
]
)
__all__ = [
"LSPClient",
"file_uri",
"uri_to_path",
"INITIALIZE_TIMEOUT",
"DIAGNOSTICS_DOCUMENT_WAIT",
"DIAGNOSTICS_FULL_WAIT",
]

213
agent/lsp/eventlog.py Normal file
View file

@ -0,0 +1,213 @@
"""Structured logging with steady-state silence for the LSP layer.
The LSP layer fires on every write_file/patch. In a busy session
that's hundreds of events. We want users to be able to ``rg`` the
log for "did LSP fire on that edit?" without drowning in noise.
The level model:
- ``DEBUG`` for steady-state events that have no novel signal:
``clean``, ``feature off``, ``extension not mapped``, ``no project
root for already-announced file``, ``server unavailable for
already-announced binary``. These never reach ``agent.log`` at the
default INFO threshold.
- ``INFO`` for state transitions worth surfacing exactly once per
session: ``active for <root>`` the first time a (server_id,
workspace_root) client starts, ``no project root for <path>``
the first time we see that file. Plus every diagnostic event
(those are inherently rare and per-edit, exactly what users grep
for).
- ``WARNING`` for action-required failures: ``server unavailable``
(binary not on PATH) the first time per (server_id, binary),
``no server configured`` once per language. Per-call WARNING for
timeouts and unexpected bridge exceptions.
The dedup is in-process module-level sets. Each set grows at most by
the number of distinct (server_id, root) and (server_id, binary)
pairs touched in one Python process bytes of memory in even an
aggressive monorepo session. Bounded LRU was rejected: evicting an
entry would risk re-firing the WARNING/INFO line we explicitly want
to suppress.
Grep recipe::
tail -f ~/.hermes/logs/agent.log | rg 'lsp\\['
"""
from __future__ import annotations
import logging
import os
import threading
from typing import Tuple
# Dedicated logger name so the documented grep recipe survives a
# ``logging.getLogger(__name__)`` rename of any internal module.
event_log = logging.getLogger("hermes.lint.lsp")
# ---------------------------------------------------------------------------
# Once-per-X dedup sets
# ---------------------------------------------------------------------------
_announce_lock = threading.Lock()
_announced_active: set = set() # keys: (server_id, workspace_root)
_announced_unavailable: set = set() # keys: (server_id, binary_path_or_name)
_announced_no_root: set = set() # keys: (server_id, file_path)
_announced_no_server: set = set() # keys: (server_id,)
def _short_path(file_path: str) -> str:
"""Render *file_path* relative to the cwd when sensible, else absolute.
Keeps log lines readable for the common case (the user is inside
the project they're editing) without emitting brittle ``../../..``
chains for the cross-tree case.
"""
if not file_path:
return file_path
try:
rel = os.path.relpath(file_path)
except ValueError:
return file_path
if rel.startswith(".." + os.sep) or rel == "..":
return file_path
return rel
def _emit(server_id: str, level: int, message: str) -> None:
event_log.log(level, "lsp[%s] %s", server_id, message)
def _announce_once(bucket: set, key: Tuple) -> bool:
"""Return True if *key* has not been announced for *bucket* yet.
Atomically marks the key as announced so concurrent callers
cannot both win the race and double-log.
"""
with _announce_lock:
if key in bucket:
return False
bucket.add(key)
return True
# ---------------------------------------------------------------------------
# Public event helpers — call these from the LSP layer.
# ---------------------------------------------------------------------------
def log_clean(server_id: str, file_path: str) -> None:
"""No diagnostics emitted for *file_path*. DEBUG (silent at default)."""
_emit(server_id, logging.DEBUG, f"clean ({_short_path(file_path)})")
def log_disabled(server_id: str, file_path: str, reason: str) -> None:
"""LSP intentionally skipped for this file (feature off, ext unmapped,
backend not local, etc.). DEBUG."""
_emit(server_id, logging.DEBUG, f"skipped: {reason} ({_short_path(file_path)})")
def log_active(server_id: str, workspace_root: str) -> None:
"""A new LSP client started for (server_id, workspace_root).
INFO once per (server_id, workspace_root); DEBUG thereafter.
Lets users verify "is LSP actually running?" with a single grep.
"""
key = (server_id, workspace_root)
if _announce_once(_announced_active, key):
_emit(server_id, logging.INFO, f"active for {workspace_root}")
else:
_emit(server_id, logging.DEBUG, f"reused client for {workspace_root}")
def log_diagnostics(server_id: str, file_path: str, count: int) -> None:
"""Diagnostics arrived for a file. INFO every time — these are the
failure signals users actually want to grep for, and they are
inherently rare per edit."""
_emit(server_id, logging.INFO, f"{count} diags ({_short_path(file_path)})")
def log_no_project_root(server_id: str, file_path: str) -> None:
"""File had no recognised project marker. INFO once per file,
DEBUG thereafter."""
key = (server_id, file_path)
if _announce_once(_announced_no_root, key):
_emit(server_id, logging.INFO, f"no project root for {_short_path(file_path)}")
else:
_emit(server_id, logging.DEBUG, f"no project root for {_short_path(file_path)}")
def log_server_unavailable(server_id: str, binary_or_pkg: str) -> None:
"""The server binary couldn't be resolved. WARNING once per
(server_id, binary), DEBUG thereafter so a hundred subsequent
.py edits don't spam the log."""
key = (server_id, binary_or_pkg)
if _announce_once(_announced_unavailable, key):
_emit(
server_id,
logging.WARNING,
f"server unavailable: {binary_or_pkg} not found "
"(install via `hermes lsp install <id>` or set lsp.servers.<id>.command)",
)
else:
_emit(server_id, logging.DEBUG, f"server still unavailable: {binary_or_pkg}")
def log_no_server_configured(server_id: str) -> None:
"""No spawn recipe for this language. WARNING once."""
if _announce_once(_announced_no_server, (server_id,)):
_emit(server_id, logging.WARNING, "no server configured")
def log_timeout(server_id: str, file_path: str, kind: str = "diagnostics") -> None:
"""A request to the server timed out. WARNING every time — these are
inherently novel events worth surfacing on each occurrence."""
_emit(
server_id,
logging.WARNING,
f"{kind} timed out for {_short_path(file_path)}",
)
def log_server_error(server_id: str, file_path: str, exc: BaseException) -> None:
"""An unexpected exception bubbled out of the LSP layer. WARNING."""
_emit(
server_id,
logging.WARNING,
f"unexpected error for {_short_path(file_path)}: {type(exc).__name__}: {exc}",
)
def log_spawn_failed(server_id: str, workspace_root: str, exc: BaseException) -> None:
"""The LSP server failed to spawn or initialize. WARNING."""
_emit(
server_id,
logging.WARNING,
f"spawn/initialize failed for {workspace_root}: {type(exc).__name__}: {exc}",
)
def reset_announce_caches() -> None:
"""Test-only: clear the dedup caches. Production code never calls this."""
with _announce_lock:
_announced_active.clear()
_announced_unavailable.clear()
_announced_no_root.clear()
_announced_no_server.clear()
__all__ = [
"event_log",
"log_clean",
"log_disabled",
"log_active",
"log_diagnostics",
"log_no_project_root",
"log_server_unavailable",
"log_no_server_configured",
"log_timeout",
"log_server_error",
"log_spawn_failed",
"reset_announce_caches",
]

376
agent/lsp/install.py Normal file
View file

@ -0,0 +1,376 @@
"""Auto-installation of LSP server binaries.
Tries to install missing servers using whatever package manager is
appropriate. All installs go to a Hermes-owned bin staging dir,
``<HERMES_HOME>/lsp/bin/``, so we don't pollute the user's global
toolchain.
Strategies:
- ``auto`` attempt to install with the best available package
manager. This is the default.
- ``manual`` never install; if a binary is missing, the server is
silently skipped and the user is told about it via ``hermes lsp
status``.
- ``off`` same as ``manual`` for now (kept distinct so we can
evolve behavior later, e.g. logging differently).
The actual installs happen synchronously the first time a server is
needed and concurrent calls to :func:`try_install` for the same
package are deduplicated via a per-package lock.
Failure modes are non-fatal: every install path is wrapped in
try/except and returns ``None`` on failure. The tool layer then
falls back to its in-process syntax checker, exactly as if the user
hadn't enabled LSP at all.
"""
from __future__ import annotations
import logging
import os
import shutil
import subprocess
import sys
import threading
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger("agent.lsp.install")
# Package-name → install-strategy hint registry. Each entry is a
# tuple of strategy name + package name + executable name. When the
# install completes, we look for the executable in
# ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
#
# Optional fields:
# - ``extra_pkgs``: list of sibling packages to install alongside
# ``pkg`` in the same node_modules tree. Used when an LSP server
# has a runtime peer dependency that npm doesn't auto-pull (e.g.
# typescript-language-server needs ``typescript``).
INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
# Python
"pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
# JS/TS family
"typescript-language-server": {
"strategy": "npm",
"pkg": "typescript-language-server",
"bin": "typescript-language-server",
# typescript-language-server requires the `typescript` SDK
# (tsserver) to be importable from the same node_modules tree;
# otherwise initialize() fails with "Could not find a valid
# TypeScript installation". Install them together.
"extra_pkgs": ["typescript"],
},
"@vue/language-server": {
"strategy": "npm",
"pkg": "@vue/language-server",
"bin": "vue-language-server",
},
"svelte-language-server": {
"strategy": "npm",
"pkg": "svelte-language-server",
"bin": "svelteserver",
},
"@astrojs/language-server": {
"strategy": "npm",
"pkg": "@astrojs/language-server",
"bin": "astro-ls",
},
"yaml-language-server": {
"strategy": "npm",
"pkg": "yaml-language-server",
"bin": "yaml-language-server",
},
"bash-language-server": {
"strategy": "npm",
"pkg": "bash-language-server",
"bin": "bash-language-server",
},
"intelephense": {"strategy": "npm", "pkg": "intelephense", "bin": "intelephense"},
"dockerfile-language-server-nodejs": {
"strategy": "npm",
"pkg": "dockerfile-language-server-nodejs",
"bin": "docker-langserver",
},
# Go
"gopls": {"strategy": "go", "pkg": "golang.org/x/tools/gopls@latest", "bin": "gopls"},
# Rust — too heavy (hundreds of MB to bootstrap). We do NOT
# auto-install rust-analyzer; users install via rustup.
"rust-analyzer": {"strategy": "manual", "pkg": "", "bin": "rust-analyzer"},
# C/C++ — manual (clangd ships with LLVM, very heavy)
"clangd": {"strategy": "manual", "pkg": "", "bin": "clangd"},
# Lua — manual (LuaLS is platform-specific binaries from GitHub
# releases; complex enough that we punt to the user)
"lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
}
_install_locks: Dict[str, threading.Lock] = {}
_install_results: Dict[str, Optional[str]] = {}
_install_lock_meta = threading.Lock()
def hermes_lsp_bin_dir() -> Path:
"""Return the Hermes-owned bin staging dir for LSP servers."""
home = os.environ.get("HERMES_HOME")
if home is None:
home = os.path.join(os.path.expanduser("~"), ".hermes")
p = Path(home) / "lsp" / "bin"
p.mkdir(parents=True, exist_ok=True)
return p
def _existing_binary(name: str) -> Optional[str]:
"""Probe the staging dir + PATH for a binary named ``name``."""
staged = hermes_lsp_bin_dir() / name
if staged.exists() and os.access(staged, os.X_OK):
return str(staged)
on_path = shutil.which(name)
if on_path:
return on_path
return None
def _get_lock(pkg: str) -> threading.Lock:
with _install_lock_meta:
lock = _install_locks.get(pkg)
if lock is None:
lock = threading.Lock()
_install_locks[pkg] = lock
return lock
def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
"""Try to install ``pkg`` and return the binary path if successful.
``strategy`` is ``"auto"``, ``"manual"``, or ``"off"``. In
``manual``/``off`` mode, this function only probes for an
existing binary and returns ``None`` if not found.
The install is cached per-package a second call returns the
same path (or ``None``) without reinstalling. Concurrent calls
are serialized.
"""
if strategy not in ("auto",):
# Only ``auto`` triggers an actual install. In manual/off,
# we still check whether the binary already exists.
recipe = INSTALL_RECIPES.get(pkg, {})
bin_name = recipe.get("bin", pkg)
return _existing_binary(bin_name)
if pkg in _install_results:
return _install_results[pkg]
lock = _get_lock(pkg)
with lock:
# Double-check after acquiring lock.
if pkg in _install_results:
return _install_results[pkg]
result = _do_install(pkg)
_install_results[pkg] = result
return result
def _do_install(pkg: str) -> Optional[str]:
recipe = INSTALL_RECIPES.get(pkg)
if recipe is None:
# Not in our registry — best-effort: just probe PATH.
return shutil.which(pkg)
strategy = recipe.get("strategy", "manual")
bin_name = recipe.get("bin", pkg)
# Check if already present (shutil.which or staging dir)
existing = _existing_binary(bin_name)
if existing:
return existing
if strategy == "manual":
logger.debug("[install] %s requires manual install (recipe=%s)", pkg, recipe)
return None
if strategy == "npm":
return _install_npm(
recipe.get("pkg", pkg),
bin_name,
extra_pkgs=recipe.get("extra_pkgs") or [],
)
if strategy == "go":
return _install_go(recipe.get("pkg", pkg), bin_name)
if strategy == "pip":
return _install_pip(recipe.get("pkg", pkg), bin_name)
logger.warning("[install] unknown strategy %r for %s", strategy, pkg)
return None
def _install_npm(
pkg: str,
bin_name: str,
extra_pkgs: Optional[list] = None,
) -> Optional[str]:
"""Install an npm package into our staging dir.
Uses ``npm install --prefix`` so the binaries land in
``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
one level for direct PATH-style access.
``extra_pkgs`` is a list of sibling packages to install in the
same ``node_modules`` tree. Used for LSP servers with runtime
peer deps that npm doesn't auto-pull (typescript-language-server
needs ``typescript`` next to it; intelephense ships standalone).
"""
npm = shutil.which("npm")
if npm is None:
logger.info("[install] cannot install %s: npm not on PATH", pkg)
return None
staging = hermes_lsp_bin_dir().parent # <HERMES_HOME>/lsp/
install_targets = [pkg] + list(extra_pkgs or [])
try:
logger.info(
"[install] npm install --prefix %s %s",
staging,
" ".join(install_targets),
)
proc = subprocess.run(
[npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", *install_targets],
check=False,
capture_output=True,
text=True,
timeout=300,
)
if proc.returncode != 0:
logger.warning(
"[install] npm install failed for %s: %s", pkg, proc.stderr.strip()[:500]
)
return None
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] npm install errored for %s: %s", pkg, e)
return None
# Find the bin
nm_bin = staging / "node_modules" / ".bin" / bin_name
if os.name == "nt":
# On Windows npm sometimes drops `.cmd` shims
candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
else:
candidates = [nm_bin]
for c in candidates:
if c.exists():
# Symlink into our `lsp/bin/` for stable PATH access.
link = hermes_lsp_bin_dir() / c.name
if not link.exists():
try:
link.symlink_to(c)
except (OSError, NotImplementedError):
# Symlinks fail on some Windows setups — copy instead.
try:
shutil.copy2(c, link)
except OSError:
return str(c)
return str(link if link.exists() else c)
logger.warning("[install] npm install for %s succeeded but bin %s not found", pkg, bin_name)
return None
def _install_go(pkg: str, bin_name: str) -> Optional[str]:
"""Install a Go module to GOBIN=<staging>."""
go = shutil.which("go")
if go is None:
logger.info("[install] cannot install %s: go not on PATH", pkg)
return None
staging = hermes_lsp_bin_dir()
env = dict(os.environ)
env["GOBIN"] = str(staging)
try:
logger.info("[install] go install %s (GOBIN=%s)", pkg, staging)
proc = subprocess.run(
[go, "install", pkg],
check=False,
capture_output=True,
text=True,
timeout=600,
env=env,
)
if proc.returncode != 0:
logger.warning(
"[install] go install failed for %s: %s", pkg, proc.stderr.strip()[:500]
)
return None
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] go install errored for %s: %s", pkg, e)
return None
bin_path = staging / bin_name
if os.name == "nt":
bin_path = bin_path.with_suffix(".exe")
if bin_path.exists():
return str(bin_path)
logger.warning("[install] go install for %s succeeded but bin %s not found", pkg, bin_name)
return None
def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
"""Install a Python package into a hermes-owned target dir.
We avoid polluting the user's site-packages by using
``pip install --target``. Bins go into
``<staging>/python-packages/bin/`` which we symlink into
``<staging>/bin``. Note: this only works for packages that ship a
console script.
"""
pip_target = hermes_lsp_bin_dir().parent / "python-packages"
pip_target.mkdir(parents=True, exist_ok=True)
try:
logger.info("[install] pip install --target %s %s", pip_target, pkg)
proc = subprocess.run(
[sys.executable, "-m", "pip", "install", "--target", str(pip_target), "--quiet", pkg],
check=False,
capture_output=True,
text=True,
timeout=300,
)
if proc.returncode != 0:
logger.warning(
"[install] pip install failed for %s: %s", pkg, proc.stderr.strip()[:500]
)
return None
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] pip install errored for %s: %s", pkg, e)
return None
# Look for the script
bin_path = pip_target / "bin" / bin_name
if bin_path.exists():
link = hermes_lsp_bin_dir() / bin_name
if not link.exists():
try:
link.symlink_to(bin_path)
except (OSError, NotImplementedError):
try:
shutil.copy2(bin_path, link)
except OSError:
return str(bin_path)
return str(link if link.exists() else bin_path)
return None
def detect_status(pkg: str) -> str:
"""Return ``installed``, ``missing``, or ``manual-only`` for a package.
Used by the ``hermes lsp status`` CLI to give users a quick
overview of what's available without spawning anything.
"""
recipe = INSTALL_RECIPES.get(pkg)
bin_name = recipe.get("bin", pkg) if recipe else pkg
if _existing_binary(bin_name):
return "installed"
if recipe and recipe.get("strategy") == "manual":
return "manual-only"
return "missing"
__all__ = [
"INSTALL_RECIPES",
"try_install",
"detect_status",
"hermes_lsp_bin_dir",
]

607
agent/lsp/manager.py Normal file
View file

@ -0,0 +1,607 @@
"""Service-level orchestration for LSP clients.
The :class:`LSPService` is the bridge between the synchronous
file_operations layer and the async :class:`agent.lsp.client.LSPClient`.
Design choices:
- A **single asyncio event loop** runs in a background thread. All
client work happens on that loop. Synchronous callers from
``tools/file_operations.py`` use :meth:`get_diagnostics_sync` to
open + wait + drain in one blocking call.
- One client per ``(server_id, workspace_root)`` key. Lazy spawn:
the first request for a key spawns the client; subsequent requests
re-use it.
- A **broken-set** records ``(server_id, workspace_root)`` pairs that
failed to spawn or initialize. These are never retried for the
life of the service. Mirrors OpenCode's design.
- A **delta baseline** map keeps "diagnostics-as-of-the-last-snapshot"
per file. ``snapshot_baseline()`` is called BEFORE a write; the
next ``get_diagnostics_sync()`` returns only diagnostics that
weren't in the baseline. This is the lift from Claude Code's
``beforeFileEdited`` / ``getNewDiagnostics`` pattern, except wired
to the local LSP layer instead of MCP IDE RPC.
The service is **off by default** call :meth:`is_active` to check
whether it's actually doing anything. When LSP is disabled in
config, when no git workspace can be detected, when all configured
servers are missing binaries and auto-install is off, ``is_active``
returns False and the file_operations layer falls through to the
in-process syntax check.
"""
from __future__ import annotations
import asyncio
import logging
import os
import threading
import time
from concurrent.futures import Future as ConcurrentFuture
from typing import Any, Dict, List, Optional, Tuple
from agent.lsp import eventlog
from agent.lsp.client import (
DIAGNOSTICS_DOCUMENT_WAIT,
LSPClient,
file_uri,
)
from agent.lsp.servers import (
ServerContext,
ServerDef,
SpawnSpec,
find_server_for_file,
language_id_for,
)
from agent.lsp.workspace import (
clear_cache,
is_inside_workspace,
resolve_workspace_for_file,
)
logger = logging.getLogger("agent.lsp.manager")
DEFAULT_IDLE_TIMEOUT = 600 # seconds; servers idle for >10min get reaped
class _BackgroundLoop:
"""A daemon thread that owns one asyncio event loop.
Provides :meth:`run` for synchronous callers submits a coroutine
to the loop and blocks until it finishes (or a timeout fires).
"""
def __init__(self) -> None:
self._loop: Optional[asyncio.AbstractEventLoop] = None
self._thread: Optional[threading.Thread] = None
self._ready = threading.Event()
def start(self) -> None:
if self._thread is not None:
return
self._thread = threading.Thread(
target=self._run_forever,
name="hermes-lsp-loop",
daemon=True,
)
self._thread.start()
self._ready.wait(timeout=5.0)
def _run_forever(self) -> None:
loop = asyncio.new_event_loop()
self._loop = loop
asyncio.set_event_loop(loop)
self._ready.set()
try:
loop.run_forever()
finally:
try:
loop.close()
except Exception: # noqa: BLE001
pass
def run(self, coro, *, timeout: Optional[float] = None) -> Any:
"""Submit a coroutine to the loop and block until done.
Returns the coroutine's result, or raises its exception.
"""
if self._loop is None:
raise RuntimeError("background loop not started")
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
try:
return fut.result(timeout=timeout)
except Exception:
fut.cancel()
raise
def stop(self) -> None:
loop = self._loop
if loop is None:
return
try:
loop.call_soon_threadsafe(loop.stop)
except RuntimeError:
pass
if self._thread is not None:
self._thread.join(timeout=2.0)
self._loop = None
self._thread = None
class LSPService:
"""The process-wide LSP service.
Created once via :meth:`create_from_config`; the
:func:`agent.lsp.get_service` accessor manages the singleton.
Most callers should use that accessor rather than constructing
:class:`LSPService` directly.
"""
# ------------------------------------------------------------------
# construction + factory
# ------------------------------------------------------------------
def __init__(
self,
*,
enabled: bool,
wait_mode: str,
wait_timeout: float,
install_strategy: str,
binary_overrides: Optional[Dict[str, List[str]]] = None,
env_overrides: Optional[Dict[str, Dict[str, str]]] = None,
init_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
disabled_servers: Optional[List[str]] = None,
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
) -> None:
self._enabled = enabled
self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
self._wait_timeout = wait_timeout
self._install_strategy = install_strategy
self._binary_overrides = binary_overrides or {}
self._env_overrides = env_overrides or {}
self._init_overrides = init_overrides or {}
self._disabled_servers = set(disabled_servers or [])
self._idle_timeout = idle_timeout
self._loop = _BackgroundLoop()
if self._enabled:
self._loop.start()
# Per-(server_id, workspace_root) state
self._clients: Dict[Tuple[str, str], LSPClient] = {}
self._broken: set = set()
self._spawning: Dict[Tuple[str, str], asyncio.Future] = {}
self._last_used: Dict[Tuple[str, str], float] = {}
self._state_lock = threading.Lock()
# Delta baseline: file path → snapshot of diagnostics taken
# immediately before a write. ``get_diagnostics_sync`` filters
# out anything in the baseline so the agent only sees errors
# introduced by the current edit.
self._delta_baseline: Dict[str, List[Dict[str, Any]]] = {}
@classmethod
def create_from_config(cls) -> Optional["LSPService"]:
"""Build a service from ``hermes_cli.config`` settings.
Returns ``None`` if the config can't be loaded. The service
itself returns ``is_active()`` False when LSP is disabled.
"""
try:
from hermes_cli.config import load_config
cfg = load_config()
except Exception as e: # noqa: BLE001
logger.debug("LSP config load failed: %s", e)
return None
lsp_cfg = (cfg.get("lsp") or {}) if isinstance(cfg, dict) else {}
if not isinstance(lsp_cfg, dict):
lsp_cfg = {}
enabled = bool(lsp_cfg.get("enabled", True))
wait_mode = lsp_cfg.get("wait_mode", "document")
wait_timeout = float(lsp_cfg.get("wait_timeout", DIAGNOSTICS_DOCUMENT_WAIT))
install_strategy = lsp_cfg.get("install_strategy", "auto")
servers_cfg = lsp_cfg.get("servers") or {}
disabled = []
binary_overrides: Dict[str, List[str]] = {}
env_overrides: Dict[str, Dict[str, str]] = {}
init_overrides: Dict[str, Dict[str, Any]] = {}
if isinstance(servers_cfg, dict):
for name, sub in servers_cfg.items():
if not isinstance(sub, dict):
continue
if sub.get("disabled"):
disabled.append(name)
cmd = sub.get("command")
if isinstance(cmd, list) and cmd:
binary_overrides[name] = cmd
env = sub.get("env")
if isinstance(env, dict):
env_overrides[name] = {k: str(v) for k, v in env.items()}
init = sub.get("initialization_options")
if isinstance(init, dict):
init_overrides[name] = init
return cls(
enabled=enabled,
wait_mode=wait_mode,
wait_timeout=wait_timeout,
install_strategy=install_strategy,
binary_overrides=binary_overrides,
env_overrides=env_overrides,
init_overrides=init_overrides,
disabled_servers=disabled,
)
# ------------------------------------------------------------------
# public API
# ------------------------------------------------------------------
def is_active(self) -> bool:
"""Return True iff this service should be consulted at all."""
return self._enabled
def enabled_for(self, file_path: str) -> bool:
"""Return True iff LSP should run for this specific file.
Gates on workspace detection (file or cwd inside a git worktree),
on whether any registered server matches the extension, and
on whether the (server_id, workspace_root) pair is in the
broken-set from a previous spawn failure.
Files in already-broken pairs return False so the file_operations
layer skips the LSP path entirely no spawn attempts, no
timeout cost until the service is restarted (``hermes lsp
restart``) or the process exits.
"""
if not self._enabled:
return False
srv = find_server_for_file(file_path)
if srv is None or srv.server_id in self._disabled_servers:
return False
ws_root, gated_in = resolve_workspace_for_file(file_path)
if not (ws_root and gated_in):
return False
# Broken-set short-circuit. Use the per-server root if we can
# compute one cheaply; otherwise fall back to the workspace
# root as the broken key (which is what _get_or_spawn would
# have used anyway when it failed).
try:
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
except Exception: # noqa: BLE001
per_server_root = ws_root
if (srv.server_id, per_server_root) in self._broken:
return False
return True
def snapshot_baseline(self, file_path: str) -> None:
"""Snapshot current diagnostics for ``file_path`` as the delta baseline.
Called BEFORE a write so the next ``get_diagnostics_sync()``
can filter out pre-existing errors. Best-effort failures
are silently swallowed so a flaky server can't break a write.
Outer timeouts (e.g. server hangs during initialize) mark the
(server_id, workspace_root) pair as broken so subsequent edits
skip it instantly instead of re-paying the timeout cost.
"""
if not self.enabled_for(file_path):
return
try:
diags = self._loop.run(self._snapshot_async(file_path), timeout=8.0)
self._delta_baseline[os.path.abspath(file_path)] = diags or []
except Exception as e: # noqa: BLE001
logger.debug("baseline snapshot failed for %s: %s", file_path, e)
self._mark_broken_for_file(file_path, e)
self._delta_baseline[os.path.abspath(file_path)] = []
def get_diagnostics_sync(
self,
file_path: str,
*,
delta: bool = True,
timeout: Optional[float] = None,
) -> List[Dict[str, Any]]:
"""Synchronously open ``file_path`` in the right server, wait for
diagnostics, return them.
If ``delta`` is True (default), the result is filtered against
any baseline previously captured via :meth:`snapshot_baseline`.
Diagnostics present in the baseline are removed so the caller
only sees errors introduced by the current edit.
Returns an empty list when LSP is disabled, when no workspace
can be detected, when no server matches, or when the server
can't be spawned. Never raises.
"""
if not self.enabled_for(file_path):
return []
# Resolve server_id eagerly so we can emit structured logs even
# when the request errors out below.
srv = find_server_for_file(file_path)
server_id = srv.server_id if srv else "?"
try:
t = timeout if timeout is not None else self._wait_timeout + 2.0
diags = self._loop.run(self._open_and_wait_async(file_path), timeout=t) or []
except asyncio.TimeoutError as e:
eventlog.log_timeout(server_id, file_path)
logger.debug("LSP diagnostics timeout for %s: %s", file_path, e)
self._mark_broken_for_file(file_path, e)
return []
except Exception as e: # noqa: BLE001
eventlog.log_server_error(server_id, file_path, e)
logger.debug("LSP diagnostics fetch failed for %s: %s", file_path, e)
self._mark_broken_for_file(file_path, e)
return []
abs_path = os.path.abspath(file_path)
if delta:
baseline = self._delta_baseline.get(abs_path) or []
if baseline:
seen = {_diag_key(d) for d in baseline}
diags = [d for d in diags if _diag_key(d) not in seen]
# Roll baseline forward — next call returns deltas relative
# to the just-emitted state, mirroring claude-code's
# diagnosticTracking.
try:
fresh = self._loop.run(self._current_diags_async(file_path), timeout=2.0) or []
except Exception: # noqa: BLE001
fresh = []
if fresh:
self._delta_baseline[abs_path] = fresh
if diags:
eventlog.log_diagnostics(server_id, file_path, len(diags))
else:
eventlog.log_clean(server_id, file_path)
return diags
def _mark_broken_for_file(self, file_path: str, exc: BaseException) -> None:
"""Mark the (server_id, workspace_root) pair as broken so subsequent
edits skip it instantly instead of re-paying timeout cost.
Called when the outer ``_loop.run`` timeout cancels an in-flight
spawn/initialize that the inner ``_get_or_spawn`` task was still
holding open. Without this, every subsequent write would re-enter
the spawn path and re-pay the full ``snapshot_baseline``
timeout (8s) until the binary is fixed.
Also kills any orphan client process that survived the cancelled
future, and emits a single eventlog WARNING so the user knows
which server gave up.
``exc`` is whatever exception the outer wrapper caught used
only for logging, never re-raised.
"""
srv = find_server_for_file(file_path)
if srv is None:
return
ws_root, gated = resolve_workspace_for_file(file_path)
if not (ws_root and gated):
return
try:
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
except Exception: # noqa: BLE001
per_server_root = ws_root
key = (srv.server_id, per_server_root)
already_broken = key in self._broken
self._broken.add(key)
# Kill any client we managed to spawn before the timeout. The
# cancelled future never reached the broken-set add inside
# ``_get_or_spawn`` so the client may still be hanging in
# ``_clients`` with a half-initialized state.
with self._state_lock:
client = self._clients.pop(key, None)
if client is not None:
try:
# Fire-and-forget shutdown — give it a second to cleanup,
# but don't block. We're already on a slow path.
self._loop.run(client.shutdown(), timeout=1.0)
except Exception: # noqa: BLE001
pass
if not already_broken:
eventlog.log_spawn_failed(srv.server_id, per_server_root, exc)
def shutdown(self) -> None:
"""Tear down all clients and stop the background loop."""
if not self._enabled:
return
try:
self._loop.run(self._shutdown_async(), timeout=10.0)
except Exception as e: # noqa: BLE001
logger.debug("LSP shutdown error: %s", e)
self._loop.stop()
clear_cache()
# ------------------------------------------------------------------
# async internals
# ------------------------------------------------------------------
async def _snapshot_async(self, file_path: str) -> List[Dict[str, Any]]:
client = await self._get_or_spawn(file_path)
if client is None:
return []
try:
version = await client.open_file(file_path, language_id=language_id_for(file_path))
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
except Exception as e: # noqa: BLE001
logger.debug("snapshot open/wait failed: %s", e)
return []
self._last_used[(client.server_id, client.workspace_root)] = time.time()
return list(client.diagnostics_for(file_path))
async def _open_and_wait_async(self, file_path: str) -> List[Dict[str, Any]]:
client = await self._get_or_spawn(file_path)
if client is None:
return []
try:
version = await client.open_file(file_path, language_id=language_id_for(file_path))
await client.save_file(file_path)
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
except Exception as e: # noqa: BLE001
logger.debug("open/wait failed for %s: %s", file_path, e)
return []
self._last_used[(client.server_id, client.workspace_root)] = time.time()
return list(client.diagnostics_for(file_path))
async def _current_diags_async(self, file_path: str) -> List[Dict[str, Any]]:
ws, gated = resolve_workspace_for_file(file_path)
srv = find_server_for_file(file_path)
if not (ws and gated and srv):
return []
with self._state_lock:
client = self._clients.get((srv.server_id, ws))
if client is None:
return []
return list(client.diagnostics_for(file_path))
async def _get_or_spawn(self, file_path: str) -> Optional[LSPClient]:
srv = find_server_for_file(file_path)
if srv is None:
return None
if srv.server_id in self._disabled_servers:
eventlog.log_disabled(srv.server_id, file_path, "disabled in config")
return None
ws_root, gated = resolve_workspace_for_file(file_path)
if not (ws_root and gated):
eventlog.log_no_project_root(srv.server_id, file_path)
return None
per_server_root = srv.resolve_root(file_path, ws_root)
if per_server_root is None:
eventlog.log_disabled(
srv.server_id, file_path, "exclude marker hit (server gated off)"
)
return None # exclude marker hit, server gated off
key = (srv.server_id, per_server_root)
if key in self._broken:
return None
with self._state_lock:
client = self._clients.get(key)
if client is not None and client.is_running:
eventlog.log_active(srv.server_id, per_server_root)
return client
spawning = self._spawning.get(key)
if spawning is not None:
try:
return await spawning
except Exception: # noqa: BLE001
return None
# Begin spawn
loop = asyncio.get_running_loop()
spawn_future: asyncio.Future = loop.create_future()
with self._state_lock:
self._spawning[key] = spawn_future
try:
ctx = ServerContext(
workspace_root=per_server_root,
install_strategy=self._install_strategy,
binary_overrides=self._binary_overrides,
env_overrides=self._env_overrides,
init_overrides=self._init_overrides,
)
spec = srv.build_spawn(per_server_root, ctx)
if spec is None:
# ``build_spawn`` returns None when the binary can't be
# located (auto-install disabled, manual-only server,
# or install attempt failed). Surface this once via
# the structured logger so the user can act on it.
eventlog.log_server_unavailable(srv.server_id, srv.server_id)
self._broken.add(key)
spawn_future.set_result(None)
return None
client = LSPClient(
server_id=srv.server_id,
workspace_root=spec.workspace_root,
command=spec.command,
env=spec.env,
cwd=spec.cwd,
initialization_options=spec.initialization_options,
seed_diagnostics_on_first_push=spec.seed_diagnostics_on_first_push or srv.seed_first_push,
)
try:
await client.start()
except Exception as e: # noqa: BLE001
eventlog.log_spawn_failed(srv.server_id, per_server_root, e)
self._broken.add(key)
spawn_future.set_result(None)
return None
with self._state_lock:
self._clients[key] = client
self._last_used[key] = time.time()
eventlog.log_active(srv.server_id, per_server_root)
spawn_future.set_result(client)
return client
finally:
with self._state_lock:
self._spawning.pop(key, None)
async def _shutdown_async(self) -> None:
with self._state_lock:
clients = list(self._clients.values())
self._clients.clear()
self._broken.clear()
self._last_used.clear()
await asyncio.gather(
*(c.shutdown() for c in clients),
return_exceptions=True,
)
# ------------------------------------------------------------------
# status / introspection (used by ``hermes lsp status``)
# ------------------------------------------------------------------
def get_status(self) -> Dict[str, Any]:
"""Return a snapshot of the service for the CLI status command."""
with self._state_lock:
clients = [
{
"server_id": k[0],
"workspace_root": k[1],
"state": c.state,
"running": c.is_running,
}
for k, c in self._clients.items()
]
broken = list(self._broken)
return {
"enabled": self._enabled,
"wait_mode": self._wait_mode,
"wait_timeout": self._wait_timeout,
"install_strategy": self._install_strategy,
"clients": clients,
"broken": broken,
"disabled_servers": sorted(self._disabled_servers),
}
def _diag_key(d: Dict[str, Any]) -> str:
"""Content equality key used for delta filtering. Mirrors
:func:`agent.lsp.client._diagnostic_key`."""
rng = d.get("range") or {}
start = rng.get("start") or {}
end = rng.get("end") or {}
code = d.get("code")
if code is not None and not isinstance(code, str):
code = str(code)
return "\x00".join(
[
str(d.get("severity") or 1),
str(code or ""),
str(d.get("source") or ""),
str(d.get("message") or "").strip(),
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
]
)
__all__ = ["LSPService"]

196
agent/lsp/protocol.py Normal file
View file

@ -0,0 +1,196 @@
"""Minimal LSP JSON-RPC 2.0 framer over async streams.
LSP wire format:
Content-Length: <bytes>\\r\\n
\\r\\n
<utf-8 JSON body>
The body is a JSON-RPC 2.0 envelope: request, response, or notification.
This module replaces what ``vscode-jsonrpc/node`` would do in a
TypeScript implementation. We keep it deliberately small just the
framer + envelope helpers so :class:`agent.lsp.client.LSPClient` can
focus on protocol semantics.
"""
from __future__ import annotations
import asyncio
import json
import logging
from typing import Any, Optional, Tuple
logger = logging.getLogger("agent.lsp.protocol")
# LSP error codes we care about. Full list in
# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#errorCodes
ERROR_CONTENT_MODIFIED = -32801
ERROR_REQUEST_CANCELLED = -32800
ERROR_METHOD_NOT_FOUND = -32601
class LSPProtocolError(Exception):
"""Raised when the wire protocol is violated.
Distinct from :class:`LSPRequestError` which represents a server
returning a JSON-RPC error response that's protocol-conformant.
This exception means the framing or envelope itself is broken.
"""
class LSPRequestError(Exception):
"""Raised when an LSP request returns an error response.
Carries the JSON-RPC ``code``, ``message``, and optional ``data``.
"""
def __init__(self, code: int, message: str, data: Any = None) -> None:
super().__init__(f"LSP error {code}: {message}")
self.code = code
self.message = message
self.data = data
def encode_message(obj: dict) -> bytes:
"""Encode a JSON-RPC envelope as a Content-Length framed byte string.
The body is encoded as compact UTF-8 JSON (no spaces between
separators) matches what ``vscode-jsonrpc`` emits and keeps the
Content-Length count exact.
"""
body = json.dumps(obj, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
return header + body
async def read_message(reader: asyncio.StreamReader) -> Optional[dict]:
"""Read one Content-Length framed JSON-RPC message from the stream.
Returns ``None`` on clean EOF (server closed stdout cleanly between
messages typical shutdown). Raises :class:`LSPProtocolError` on
malformed framing.
The reader is advanced to just past the JSON body on success.
"""
headers: dict = {}
header_bytes = 0
while True:
try:
line = await reader.readuntil(b"\r\n")
except asyncio.IncompleteReadError as e:
# EOF while reading headers. If we hadn't started a header
# block, treat as clean EOF; otherwise the framing is bad.
if not e.partial and not headers:
return None
raise LSPProtocolError(
f"unexpected EOF while reading LSP headers (partial={e.partial!r})"
) from e
# Defensive cap against a server streaming headers without ever
# emitting CRLF-CRLF. Caps total header bytes at 8 KiB — a
# well-behaved server fits in well under 200 bytes.
header_bytes += len(line)
if header_bytes > 8192:
raise LSPProtocolError(
f"LSP header block exceeded 8 KiB without terminator"
)
line = line[:-2] # strip CRLF
if not line:
break # blank line ends header block
try:
key, _, value = line.decode("ascii").partition(":")
except UnicodeDecodeError as e:
raise LSPProtocolError(f"non-ASCII LSP header: {line!r}") from e
if not key:
raise LSPProtocolError(f"malformed LSP header line: {line!r}")
headers[key.strip().lower()] = value.strip()
cl = headers.get("content-length")
if cl is None:
raise LSPProtocolError(f"LSP message missing Content-Length: {headers!r}")
try:
n = int(cl)
except ValueError as e:
raise LSPProtocolError(f"non-integer Content-Length: {cl!r}") from e
if n < 0 or n > 64 * 1024 * 1024: # 64 MiB sanity cap
raise LSPProtocolError(f"unreasonable Content-Length: {n}")
try:
body = await reader.readexactly(n)
except asyncio.IncompleteReadError as e:
raise LSPProtocolError(
f"truncated LSP body: expected {n} bytes, got {len(e.partial)}"
) from e
try:
return json.loads(body.decode("utf-8"))
except json.JSONDecodeError as e:
raise LSPProtocolError(f"invalid JSON in LSP body: {e}") from e
except UnicodeDecodeError as e:
raise LSPProtocolError(f"non-UTF-8 LSP body: {e}") from e
def make_request(req_id: int, method: str, params: Any) -> dict:
"""Build a JSON-RPC 2.0 request envelope."""
msg: dict = {"jsonrpc": "2.0", "id": req_id, "method": method}
if params is not None:
msg["params"] = params
return msg
def make_notification(method: str, params: Any) -> dict:
"""Build a JSON-RPC 2.0 notification envelope (no ``id``)."""
msg: dict = {"jsonrpc": "2.0", "method": method}
if params is not None:
msg["params"] = params
return msg
def make_response(req_id: Any, result: Any) -> dict:
"""Build a JSON-RPC 2.0 success response envelope."""
return {"jsonrpc": "2.0", "id": req_id, "result": result}
def make_error_response(req_id: Any, code: int, message: str, data: Any = None) -> dict:
"""Build a JSON-RPC 2.0 error response envelope."""
err: dict = {"code": code, "message": message}
if data is not None:
err["data"] = data
return {"jsonrpc": "2.0", "id": req_id, "error": err}
def classify_message(msg: dict) -> Tuple[str, Any]:
"""Return ``(kind, key)`` where kind is one of ``request``,
``response``, ``notification``, ``invalid``.
The key is the request id for request/response, the method name
for notifications, and ``None`` for invalid messages.
"""
if not isinstance(msg, dict):
return "invalid", None
if msg.get("jsonrpc") != "2.0":
return "invalid", None
has_id = "id" in msg
has_method = "method" in msg
if has_id and has_method:
return "request", msg["id"]
if has_id and ("result" in msg or "error" in msg):
return "response", msg["id"]
if has_method and not has_id:
return "notification", msg["method"]
return "invalid", None
__all__ = [
"ERROR_CONTENT_MODIFIED",
"ERROR_REQUEST_CANCELLED",
"ERROR_METHOD_NOT_FOUND",
"LSPProtocolError",
"LSPRequestError",
"encode_message",
"read_message",
"make_request",
"make_notification",
"make_response",
"make_error_response",
"classify_message",
]

78
agent/lsp/reporter.py Normal file
View file

@ -0,0 +1,78 @@
"""Format LSP diagnostics for inclusion in tool output.
The model sees a compact, severity-filtered, line-bounded summary of
diagnostics introduced by the latest edit. Format matches what
OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
``formatDiagnosticsSummary`` produce ``<diagnostics>`` blocks with
1-indexed line/column, capped at ``MAX_PER_FILE`` errors.
"""
from __future__ import annotations
from typing import Any, Dict, List
# Severity-1 only by default — warnings/info/hints would flood the
# agent. Lift this in config under ``lsp.severities`` if needed.
SEVERITY_NAMES = {1: "ERROR", 2: "WARN", 3: "INFO", 4: "HINT"}
DEFAULT_SEVERITIES = frozenset({1}) # ERROR only
MAX_PER_FILE = 20
MAX_TOTAL_CHARS = 4000
def format_diagnostic(d: Dict[str, Any]) -> str:
"""One-line representation of a single diagnostic."""
sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
rng = d.get("range") or {}
start = rng.get("start") or {}
line = int(start.get("line", 0)) + 1
col = int(start.get("character", 0)) + 1
msg = str(d.get("message") or "").rstrip()
code = d.get("code")
code_part = f" [{code}]" if code not in (None, "") else ""
source = d.get("source")
source_part = f" ({source})" if source else ""
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
def report_for_file(
file_path: str,
diagnostics: List[Dict[str, Any]],
*,
severities: frozenset = DEFAULT_SEVERITIES,
max_per_file: int = MAX_PER_FILE,
) -> str:
"""Build a ``<diagnostics file=...>`` block for one file.
Returns an empty string when no diagnostics pass the severity
filter, so callers can do ``if block:`` to skip empty cases.
"""
if not diagnostics:
return ""
filtered = [d for d in diagnostics if (d.get("severity") or 1) in severities]
if not filtered:
return ""
limited = filtered[:max_per_file]
extra = len(filtered) - len(limited)
lines = [format_diagnostic(d) for d in limited]
body = "\n".join(lines)
if extra > 0:
body += f"\n... and {extra} more"
return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
"""Hard-cap a formatted summary string."""
if len(s) <= limit:
return s
marker = "\n…[truncated]"
return s[: limit - len(marker)] + marker
__all__ = [
"SEVERITY_NAMES",
"DEFAULT_SEVERITIES",
"MAX_PER_FILE",
"format_diagnostic",
"report_for_file",
"truncate",
]

1040
agent/lsp/servers.py Normal file

File diff suppressed because it is too large Load diff

223
agent/lsp/workspace.py Normal file
View file

@ -0,0 +1,223 @@
"""Workspace and project-root resolution for LSP.
Two concerns live here:
1. **Workspace gate** the upper-level "is this directory a project?"
check. Hermes only runs LSP when the cwd (or the file being edited)
sits inside a git worktree. Files outside any git root never
trigger LSP, even if a server is configured. This keeps Telegram
gateway users on user-home cwd's from spawning daemons.
2. **NearestRoot** the per-server project-root walk. Each language
server cares about a different marker (``pyproject.toml`` for
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
wants the directory containing that marker. ``nearest_root()``
walks up from a starting path looking for any of a list of marker
files, optionally bailing if an exclude marker shows up first.
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
from typing import Iterable, Optional, Tuple
logger = logging.getLogger("agent.lsp.workspace")
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
# Cleared on shutdown. Keyed by absolute resolved path so symlink
# folds collapse to one entry.
_workspace_cache: dict = {}
def normalize_path(path: str) -> str:
"""Normalize a path for use as a stable map key.
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
NOT resolve symlinks here symlink stability matters for some
LSP servers (rust-analyzer cares about Cargo workspace identity)
and we want the canonical path the user typed when possible.
"""
return os.path.abspath(os.path.expanduser(path))
def find_git_worktree(start: str) -> Optional[str]:
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
Returns the directory containing ``.git``, or ``None`` if no git
root is found before hitting the filesystem root.
A ``.git`` *file* (not directory) means we're inside a git
worktree set up via ``git worktree add`` both forms count.
"""
try:
start_path = Path(normalize_path(start))
if start_path.is_file():
start_path = start_path.parent
except (OSError, RuntimeError, ValueError):
# Pathological input (loop in symlinks, encoding error, etc.) —
# bail out rather than crash the lint hook.
return None
# Cache check
cached = _workspace_cache.get(str(start_path))
if cached is not None:
root, _is_git = cached
return root
cur = start_path
# Defensive cap: the deepest reasonable monorepo is well under 64
# levels. Caps the walk so a pathological cwd or a symlink cycle
# we somehow traverse can't keep us looping.
for _ in range(64):
git_marker = cur / ".git"
try:
if git_marker.exists():
resolved = str(cur)
_workspace_cache[str(start_path)] = (resolved, True)
return resolved
except OSError:
# Permission error on a parent dir — bail out cleanly.
break
parent = cur.parent
if parent == cur:
break
cur = parent
_workspace_cache[str(start_path)] = (None, False)
return None
def is_inside_workspace(path: str, workspace_root: str) -> bool:
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
Uses absolute paths but does not resolve symlinks a file accessed
via a symlink that points outside the workspace still counts as
outside. This is the conservative interpretation; matches LSP
behaviour where servers reject didOpen for unrelated files.
"""
p = normalize_path(path)
root = normalize_path(workspace_root)
if p == root:
return True
# Use os.path.commonpath to handle case-insensitive filesystems
# correctly on macOS/Windows.
try:
common = os.path.commonpath([p, root])
except ValueError:
# Different drives on Windows.
return False
return common == root
def nearest_root(
start: str,
markers: Iterable[str],
*,
excludes: Optional[Iterable[str]] = None,
ceiling: Optional[str] = None,
) -> Optional[str]:
"""Walk up from ``start`` looking for any of the given marker files.
Returns the **directory containing** the first matched marker, or
``None`` if no marker is found before hitting ``ceiling`` (or the
filesystem root if no ceiling).
If ``excludes`` is provided and an exclude marker matches *first*
in the upward walk, returns ``None`` the server is gated off
for that file. Mirrors OpenCode's NearestRoot exclude semantics
(e.g. typescript skips deno projects when ``deno.json`` is found
before ``package.json``).
"""
start_path = Path(normalize_path(start))
try:
if start_path.is_file():
start_path = start_path.parent
except (OSError, RuntimeError, ValueError):
return None
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
markers_list = list(markers)
excludes_list = list(excludes) if excludes else []
cur = start_path
# Defensive cap matching ``find_git_worktree``. Bounded walk
# protects against pathological inputs even though the
# parent-equality stop normally terminates within ~10 steps.
for _ in range(64):
# Check excludes first — if an exclude is found at this level,
# the server is gated off for this file.
for exc in excludes_list:
try:
if (cur / exc).exists():
return None
except OSError:
continue
# Then check markers.
for marker in markers_list:
try:
if (cur / marker).exists():
return str(cur)
except OSError:
continue
# Stop conditions.
if ceiling_path is not None and cur == ceiling_path:
return None
parent = cur.parent
if parent == cur:
return None
cur = parent
return None
def resolve_workspace_for_file(
file_path: str,
*,
cwd: Optional[str] = None,
) -> Tuple[Optional[str], bool]:
"""Resolve the workspace root for a file.
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
iff LSP should run for this file at all. Currently the gate is
"file is inside a git worktree found by walking up from cwd OR
from the file itself".
The cwd path takes precedence if the agent was launched in a
git project, that worktree is the workspace, and any edit inside
it (regardless of where the file lives) is in-scope. If the cwd
isn't in a git worktree, we try the file's own location as a
fallback.
Returns ``(None, False)`` when neither path is in a git worktree.
"""
cwd = cwd or os.getcwd()
cwd_root = find_git_worktree(cwd)
if cwd_root is not None:
if is_inside_workspace(file_path, cwd_root):
return cwd_root, True
# File is outside the cwd's worktree — try the file's own
# location as a secondary anchor. Useful for monorepos where
# the user opens an unrelated checkout.
file_root = find_git_worktree(file_path)
if file_root is not None:
return file_root, True
return None, False
def clear_cache() -> None:
"""Clear the workspace-resolution cache.
Called on service shutdown so a subsequent re-init doesn't pick
up stale results from a previous session.
"""
_workspace_cache.clear()
__all__ = [
"find_git_worktree",
"is_inside_workspace",
"nearest_root",
"normalize_path",
"resolve_workspace_for_file",
"clear_cache",
]

View file

@ -10,7 +10,7 @@ import os
import re
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse
import requests
@ -1330,27 +1330,66 @@ def _resolve_codex_oauth_context_length(
return None
def _resolve_nous_context_length(model: str) -> Optional[int]:
"""Resolve Nous Portal model context length via OpenRouter metadata.
def _resolve_nous_context_length(
model: str,
base_url: str = "",
api_key: str = "",
) -> Tuple[Optional[int], str]:
"""Resolve Nous Portal model context length.
Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
with version normalization (dotdash).
Tries the live Nous inference endpoint first (authoritative), then falls
back to OpenRouter metadata with suffix/version matching.
Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6'). Version
normalization (dotdash) is applied to handle name drifts.
Returns ``(context_length, source)`` where ``source`` is one of:
- ``"portal"`` live /v1/models response (authoritative)
- ``"openrouter"`` OpenRouter cache fallback (non-authoritative;
callers must NOT persist this to the on-disk cache or a single
portal blip will freeze the wrong value in forever)
- ``""`` could not resolve
"""
metadata = fetch_model_metadata() # OpenRouter cache
# Exact match first
# Portal first — the Nous /models endpoint is authoritative for what our
# infrastructure enforces and may differ from OR (e.g. OR reports 1M for
# qwen3.6-plus; the portal correctly says 262144). Fall back to the OR
# catalog only if the portal doesn't list the model.
if base_url:
portal_ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if portal_ctx is not None:
return portal_ctx, "portal"
metadata = fetch_model_metadata()
def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
ctx = entry.get("context_length")
if ctx is None:
return None
if ctx <= 32768 and _model_name_suggests_kimi(or_id):
logger.info(
"Rejecting OpenRouter metadata context=%s for %r "
"(Kimi-family underreport, Nous path); falling through to hardcoded defaults",
ctx, or_id,
)
return None
return ctx
if model in metadata:
return metadata[model].get("context_length")
ctx = _safe_ctx(model, metadata[model])
if ctx is not None:
return ctx, "openrouter"
normalized = _normalize_model_version(model).lower()
for or_id, entry in metadata.items():
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized:
return entry.get("context_length")
ctx = _safe_ctx(or_id, entry)
if ctx is not None:
return ctx, "openrouter"
# Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
# Require match to be at a word boundary (followed by -, :, or end of string)
model_lower = model.lower()
for or_id, entry in metadata.items():
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
@ -1358,9 +1397,11 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
if candidate.startswith(query) and (
len(candidate) == len(query) or candidate[len(query)] in "-:."
):
return entry.get("context_length")
ctx = _safe_ctx(or_id, entry)
if ctx is not None:
return ctx, "openrouter"
return None
return None, ""
def get_model_context_length(
@ -1375,14 +1416,18 @@ def get_model_context_length(
Resolution order:
0. Explicit config override (model.context_length or custom_providers per-model)
1. Persistent cache (previously discovered via probing)
1. Persistent cache (previously discovered via probing). Nous URLs
bypass the cache here so step 5b can always reconcile against
the authoritative portal /v1/models response.
1b. AWS Bedrock static table (must precede custom-endpoint probe)
2. Active endpoint metadata (/models for explicit custom endpoints)
3. Local server query (for local endpoints)
4. Anthropic /v1/models API (API-key users only, not OAuth)
5. Provider-aware lookups (before generic OpenRouter cache):
a. Copilot live /models API
b. Nous suffix-match via OpenRouter cache
b. Nous: live /v1/models probe first (authoritative), then OR
cache fallback with suffix/version normalisation. Only
portal-derived values are persisted to disk.
c. Codex OAuth /models probe
d. GMI /models endpoint
e. Ollama native /api/show probe (any base_url, provider-agnostic)
@ -1437,6 +1482,28 @@ def get_model_context_length(
model, base_url, f"{cached:,}",
)
_invalidate_cached_context_length(model, base_url)
# Invalidate stale 32k cache entries for Kimi-family models.
elif cached <= 32768 and _model_name_suggests_kimi(model):
logger.info(
"Dropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); "
"re-resolving via hardcoded defaults",
model, base_url, f"{cached:,}",
)
_invalidate_cached_context_length(model, base_url)
# Nous Portal: the portal /v1/models endpoint is authoritative.
# Bypass the persistent cache so step 5b can always reconcile
# against it — this corrects pre-fix entries seeded from the
# OR catalog (the same OR underreport class that the Kimi/Qwen
# DEFAULT_CONTEXT_LENGTHS overrides exist to mitigate) without
# touching the on-disk file when the portal is unreachable.
# The in-memory 300s endpoint metadata cache makes the per-call
# cost amortise to ~0 within a process.
elif _infer_provider_from_url(base_url) == "nous":
logger.debug(
"Bypassing persistent cache for %s@%s (Nous portal authoritative)",
model, base_url,
)
# Fall through; step 5b reconciles and overwrites if portal responds.
else:
return cached
@ -1528,8 +1595,18 @@ def get_model_context_length(
pass # Fall through to models.dev
if effective_provider == "nous":
ctx = _resolve_nous_context_length(model)
ctx, source = _resolve_nous_context_length(
model, base_url=base_url or "", api_key=api_key or ""
)
if ctx:
# Persist ONLY portal-derived values. Caching an OR-fallback
# value here would freeze in a wrong number on the first portal
# blip / auth glitch and step-1 would short-circuit it forever.
# OR's catalog is community-maintained and is precisely why the
# Kimi/Qwen DEFAULT_CONTEXT_LENGTHS overrides exist — we don't
# want it leaking into the persistent cache for Nous URLs.
if base_url and source == "portal":
save_context_length(model, base_url, ctx)
return ctx
if effective_provider == "openai-codex":
# Codex OAuth enforces lower context limits than the direct OpenAI
@ -1575,14 +1652,6 @@ def get_model_context_length(
if model in metadata:
or_ctx = metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
# Guard against stale OpenRouter metadata for Kimi-family models.
# OpenRouter reports 32768 for moonshotai/kimi-k2.6, but the model
# actually supports 262144 (models.dev + official Kimi docs agree).
# Providers that host their own Kimi endpoints (Ollama Cloud, Kimi
# Coding, Moonshot) would otherwise trip the 64k minimum-context
# guard and reject a perfectly capable model.
# The filter is narrow: only reject exactly 32768 for Kimi-named
# models. If OpenRouter ever updates its data, the stale path
# becomes dead code with no impact.
if or_ctx == 32768 and _model_name_suggests_kimi(model):
logger.info(
"Rejecting OpenRouter metadata context=%s for %r "

View file

@ -370,6 +370,17 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://api-docs.deepseek.com/quick_start/pricing",
pricing_version="deepseek-pricing-2026-03-16",
),
(
"deepseek",
"deepseek-v4-pro",
): PricingEntry(
input_cost_per_million=Decimal("1.74"),
output_cost_per_million=Decimal("3.48"),
cache_read_cost_per_million=Decimal("0.0145"),
source="official_docs_snapshot",
source_url="https://api-docs.deepseek.com/quick_start/pricing",
pricing_version="deepseek-pricing-2026-05-12",
),
# Google Gemini
(
"google",

View file

@ -473,7 +473,7 @@ export default function App() {
>
<div
className={cn(
"flex h-14 shrink-0 items-center justify-between gap-2",
"flex h-14 shrink-0 items-center justify-between gap-2 px-4",
"border-b border-current/20",
)}
>

View file

@ -20,6 +20,7 @@ import {
CardTitle,
} from "@/components/ui/card";
import { Badge } from "@nous-research/ui/ui/components/badge";
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
import { OAuthLoginModal } from "@/components/OAuthLoginModal";
import { useI18n } from "@/i18n";
@ -55,6 +56,8 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
const [loading, setLoading] = useState(true);
const [busyId, setBusyId] = useState<string | null>(null);
const [loginFor, setLoginFor] = useState<OAuthProvider | null>(null);
const [disconnectTarget, setDisconnectTarget] =
useState<OAuthProvider | null>(null);
const { t } = useI18n();
const onErrorRef = useRef(onError);
@ -74,10 +77,8 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
}, [refresh]);
const handleDisconnect = async (provider: OAuthProvider) => {
if (!confirm(`${t.oauth.disconnect} ${provider.name}?`)) {
return;
}
setBusyId(provider.id);
setDisconnectTarget(null);
try {
await api.disconnectOAuthProvider(provider.id);
onSuccess?.(`${provider.name} ${t.oauth.disconnect.toLowerCase()}ed`);
@ -236,7 +237,7 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
<Button
size="sm"
outlined
onClick={() => handleDisconnect(p)}
onClick={() => setDisconnectTarget(p)}
disabled={isBusy}
prefix={isBusy ? <Spinner /> : <LogOut />}
>
@ -266,6 +267,17 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
onError={(msg) => onError?.(msg)}
/>
)}
<ConfirmDialog
open={disconnectTarget !== null}
onCancel={() => setDisconnectTarget(null)}
onConfirm={() => {
if (disconnectTarget) void handleDisconnect(disconnectTarget);
}}
title={`${t.oauth.disconnect} ${disconnectTarget?.name ?? ""}?`}
description={`This will remove the stored OAuth tokens for ${disconnectTarget?.name ?? "this provider"}. You will need to re-authenticate to use it again.`}
destructive
confirmLabel={t.oauth.disconnect}
/>
</Card>
);
}

View file

@ -0,0 +1,61 @@
import { cn } from "@/lib/utils";
import { Check } from "lucide-react";
interface CheckboxProps
extends Omit<React.InputHTMLAttributes<HTMLInputElement>, "type"> {
label?: React.ReactNode;
}
export function Checkbox({
className,
label,
id,
checked,
defaultChecked,
...props
}: CheckboxProps) {
// Support both controlled (checked prop) and uncontrolled (defaultChecked) usage.
// For visual rendering, prefer `checked` if provided; otherwise fall back to defaultChecked.
const isChecked = checked ?? defaultChecked ?? false;
return (
<label
htmlFor={id}
className={cn(
"group flex items-center gap-2.5 cursor-pointer select-none",
props.disabled && "cursor-not-allowed opacity-50",
)}
>
<span
className={cn(
"flex h-4 w-4 shrink-0 items-center justify-center transition-all",
"border bg-background/40",
// Focus-visible ring for keyboard accessibility
"group-has-[:focus-visible]:ring-2 group-has-[:focus-visible]:ring-ring group-has-[:focus-visible]:ring-offset-1",
isChecked
? "border-foreground bg-foreground/20"
: "border-border group-hover:border-foreground/40",
className,
)}
>
<Check
className={cn(
"h-3 w-3 transition-opacity",
isChecked
? "text-foreground opacity-100"
: "text-foreground opacity-0",
)}
/>
</span>
<input
type="checkbox"
id={id}
checked={checked}
defaultChecked={checked === undefined ? defaultChecked : undefined}
className="sr-only"
{...props}
/>
{label && <span className="text-sm">{label}</span>}
</label>
);
}

View file

@ -0,0 +1,44 @@
import { useEffect, useRef } from "react";
/**
* Hook that adds standard modal behaviors when `open` is true:
* - Escape key calls `onClose`
* - Body scroll is locked
* - Focus is restored to the previously focused element on close
*
* Returns a ref to attach to the modal container (for optional future focus trapping).
*/
export function useModalBehavior({
open,
onClose,
}: {
open: boolean;
onClose: () => void;
}) {
const containerRef = useRef<HTMLDivElement>(null);
useEffect(() => {
if (!open) return;
const prevActive = document.activeElement as HTMLElement | null;
const onKey = (e: KeyboardEvent) => {
if (e.key === "Escape") {
e.preventDefault();
onClose();
}
};
document.addEventListener("keydown", onKey);
const prevOverflow = document.body.style.overflow;
document.body.style.overflow = "hidden";
return () => {
document.removeEventListener("keydown", onKey);
document.body.style.overflow = prevOverflow;
prevActive?.focus?.();
};
}, [open, onClose]);
return containerRef;
}

View file

@ -75,7 +75,7 @@ export const en: Translations = {
keys: "Keys",
logs: "Logs",
models: "Models",
profiles: "profiles : multi agents",
profiles: "Profiles",
plugins: "Plugins",
sessions: "Sessions",
skills: "Skills",

View file

@ -4,10 +4,12 @@ const BUILTIN: Record<string, keyof Translations["app"]["nav"]> = {
"/chat": "chat",
"/sessions": "sessions",
"/analytics": "analytics",
"/models": "models",
"/logs": "logs",
"/cron": "cron",
"/skills": "skills",
"/plugins": "plugins",
"/profiles": "profiles",
"/config": "config",
"/env": "keys",
"/docs": "documentation",
@ -30,5 +32,10 @@ export function resolvePageTitle(
if (key) {
return t.app.nav[key];
}
// Derive title from pathname: "/profiles" → "Profiles"
const segment = normalized.slice(1);
if (segment) {
return segment.charAt(0).toUpperCase() + segment.slice(1);
}
return t.app.webUi;
}

View file

@ -46,6 +46,7 @@ import { Button } from "@nous-research/ui/ui/components/button";
import { ListItem } from "@nous-research/ui/ui/components/list-item";
import { Spinner } from "@nous-research/ui/ui/components/spinner";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
import { Input } from "@/components/ui/input";
import { Badge } from "@nous-research/ui/ui/components/badge";
import { useI18n } from "@/i18n";
@ -117,7 +118,9 @@ export default function ConfigPage() {
const [yamlText, setYamlText] = useState("");
const [yamlLoading, setYamlLoading] = useState(false);
const [yamlSaving, setYamlSaving] = useState(false);
const [configPath, setConfigPath] = useState<string | null>(null);
const [activeCategory, setActiveCategory] = useState<string>("");
const [confirmReset, setConfirmReset] = useState(false);
const { toast, showToast } = useToast();
const fileInputRef = useRef<HTMLInputElement>(null);
const { t } = useI18n();
@ -175,6 +178,10 @@ export default function ConfigPage() {
.getDefaults()
.then(setDefaults)
.catch(() => {});
api
.getStatus()
.then((resp) => setConfigPath(resp.config_path))
.catch(() => {});
}, []);
// Set active category when categories load
@ -290,11 +297,17 @@ export default function ConfigPage() {
// "reset this tab", not "wipe my entire config.yaml".
const scopedFields = isSearching ? searchMatchedFields : activeFields;
if (scopedFields.length === 0) return;
setConfirmReset(true);
};
const executeReset = () => {
if (!defaults || !config) return;
setConfirmReset(false);
const scopedFields = isSearching ? searchMatchedFields : activeFields;
if (scopedFields.length === 0) return;
const scopeLabel = isSearching
? t.config.searchResults
: prettyCategoryName(activeCategory);
const message = t.config.confirmResetScope.replace("{scope}", scopeLabel);
if (!window.confirm(message)) return;
let next: Record<string, unknown> = config;
for (const [key] of scopedFields) {
next = setNestedValue(next, key, getNestedValue(defaults, key));
@ -408,7 +421,7 @@ export default function ConfigPage() {
<div className="flex items-center gap-2">
<Settings2 className="h-4 w-4 text-muted-foreground" />
<code className="text-xs text-muted-foreground bg-muted/50 px-2 py-0.5">
{t.config.configPath}
{configPath ?? t.config.configPath}
</code>
</div>
<div className="flex items-center gap-1.5">
@ -627,6 +640,22 @@ export default function ConfigPage() {
</div>
)}
<PluginSlot name="config:bottom" />
<ConfirmDialog
open={confirmReset}
onCancel={() => setConfirmReset(false)}
onConfirm={executeReset}
title={t.config.confirmResetScope.replace(
"{scope}",
isSearching
? t.config.searchResults
: prettyCategoryName(activeCategory),
)}
description={`This will reset ${
(isSearching ? searchMatchedFields : activeFields).length
} field(s) to their default values.`}
destructive
confirmLabel={t.config.resetDefaults}
/>
</div>
);
}

View file

@ -1,5 +1,5 @@
import { useCallback, useEffect, useState } from "react";
import { Clock, Pause, Play, Plus, Trash2, Zap } from "lucide-react";
import { useCallback, useEffect, useLayoutEffect, useState } from "react";
import { Clock, Pause, Play, Plus, Trash2, X, Zap } from "lucide-react";
import { Badge } from "@nous-research/ui/ui/components/badge";
import { Button } from "@nous-research/ui/ui/components/button";
import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
@ -10,11 +10,13 @@ import type { CronJob } from "@/lib/api";
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
import { useToast } from "@/hooks/useToast";
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
import { useModalBehavior } from "@/hooks/useModalBehavior";
import { Toast } from "@/components/Toast";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Card, CardContent } from "@/components/ui/card";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useI18n } from "@/i18n";
import { usePageHeader } from "@/contexts/usePageHeader";
import { PluginSlot } from "@/plugins";
function formatTime(iso?: string | null): string {
@ -80,11 +82,18 @@ export default function CronPage() {
const [loading, setLoading] = useState(true);
const { toast, showToast } = useToast();
const { t } = useI18n();
const { setEnd } = usePageHeader();
// New job form state
// New job modal state
const [createModalOpen, setCreateModalOpen] = useState(false);
const [prompt, setPrompt] = useState("");
const [schedule, setSchedule] = useState("");
const [name, setName] = useState("");
const closeCreateModal = useCallback(() => setCreateModalOpen(false), []);
const createModalRef = useModalBehavior({
open: createModalOpen,
onClose: closeCreateModal,
});
const [deliver, setDeliver] = useState("local");
const [creating, setCreating] = useState(false);
@ -118,6 +127,7 @@ export default function CronPage() {
setSchedule("");
setName("");
setDeliver("local");
setCreateModalOpen(false);
loadJobs();
} catch (e) {
showToast(`${t.config.failedToSave}: ${e}`, "error");
@ -181,6 +191,22 @@ export default function CronPage() {
),
});
// Put "Create" button in page header
useLayoutEffect(() => {
setEnd(
<Button
size="sm"
onClick={() => setCreateModalOpen(true)}
>
<Plus className="h-3 w-3" />
{t.common.create}
</Button>,
);
return () => {
setEnd(null);
};
}, [setEnd, t.common.create, loading]);
if (loading) {
return (
<div className="flex items-center justify-center py-24">
@ -213,86 +239,110 @@ export default function CronPage() {
loading={jobDelete.isDeleting}
/>
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2 text-base">
<Plus className="h-4 w-4" />
{t.cron.newJob}
</CardTitle>
</CardHeader>
<CardContent>
<div className="grid gap-4">
<div className="grid gap-2">
<Label htmlFor="cron-name">{t.cron.nameOptional}</Label>
<Input
id="cron-name"
placeholder={t.cron.namePlaceholder}
value={name}
onChange={(e) => setName(e.target.value)}
/>
</div>
{/* Create job modal */}
{createModalOpen && (
<div
ref={createModalRef}
className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
onClick={(e) => e.target === e.currentTarget && setCreateModalOpen(false)}
role="dialog"
aria-modal="true"
aria-labelledby="create-cron-title"
>
<div className="relative w-full max-w-lg border border-border bg-card shadow-2xl flex flex-col">
<Button
ghost
size="icon"
onClick={() => setCreateModalOpen(false)}
className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
aria-label="Close"
>
<X />
</Button>
<div className="grid gap-2">
<Label htmlFor="cron-prompt">{t.cron.prompt}</Label>
<textarea
id="cron-prompt"
className="flex min-h-[80px] w-full border border-input bg-transparent px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
placeholder={t.cron.promptPlaceholder}
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
/>
</div>
<header className="p-5 pb-3 border-b border-border">
<h2
id="create-cron-title"
className="font-display text-base tracking-wider uppercase"
>
{t.cron.newJob}
</h2>
</header>
<div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
<div className="p-5 grid gap-4">
<div className="grid gap-2">
<Label htmlFor="cron-schedule">{t.cron.schedule}</Label>
<Label htmlFor="cron-name">{t.cron.nameOptional}</Label>
<Input
id="cron-schedule"
placeholder={t.cron.schedulePlaceholder}
value={schedule}
onChange={(e) => setSchedule(e.target.value)}
id="cron-name"
autoFocus
placeholder={t.cron.namePlaceholder}
value={name}
onChange={(e) => setName(e.target.value)}
/>
</div>
<div className="grid gap-2">
<Label htmlFor="cron-deliver">{t.cron.deliverTo}</Label>
<Select
id="cron-deliver"
value={deliver}
onValueChange={(v) => setDeliver(v)}
>
<SelectOption value="local">
{t.cron.delivery.local}
</SelectOption>
<SelectOption value="telegram">
{t.cron.delivery.telegram}
</SelectOption>
<SelectOption value="discord">
{t.cron.delivery.discord}
</SelectOption>
<SelectOption value="slack">
{t.cron.delivery.slack}
</SelectOption>
<SelectOption value="email">
{t.cron.delivery.email}
</SelectOption>
</Select>
<Label htmlFor="cron-prompt">{t.cron.prompt}</Label>
<textarea
id="cron-prompt"
className="flex min-h-[80px] w-full border border-border bg-background/40 px-3 py-2 text-sm font-courier shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-foreground/30 focus-visible:border-foreground/25"
placeholder={t.cron.promptPlaceholder}
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
/>
</div>
<div className="flex items-end">
<div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
<div className="grid gap-2">
<Label htmlFor="cron-schedule">{t.cron.schedule}</Label>
<Input
id="cron-schedule"
placeholder={t.cron.schedulePlaceholder}
value={schedule}
onChange={(e) => setSchedule(e.target.value)}
/>
</div>
<div className="grid gap-2">
<Label htmlFor="cron-deliver">{t.cron.deliverTo}</Label>
<Select
id="cron-deliver"
value={deliver}
onValueChange={(v) => setDeliver(v)}
>
<SelectOption value="local">
{t.cron.delivery.local}
</SelectOption>
<SelectOption value="telegram">
{t.cron.delivery.telegram}
</SelectOption>
<SelectOption value="discord">
{t.cron.delivery.discord}
</SelectOption>
<SelectOption value="slack">
{t.cron.delivery.slack}
</SelectOption>
<SelectOption value="email">
{t.cron.delivery.email}
</SelectOption>
</Select>
</div>
</div>
<div className="flex justify-end">
<Button
size="sm"
onClick={handleCreate}
disabled={creating}
prefix={<Plus />}
className="w-full"
prefix={creating ? <Spinner /> : <Plus />}
>
{creating ? t.common.creating : t.common.create}
</Button>
</div>
</div>
</div>
</CardContent>
</Card>
</div>
)}
<div className="flex flex-col gap-3">
<H2

View file

@ -1,4 +1,4 @@
import { useCallback, useEffect, useMemo, useState } from "react";
import { useCallback, useEffect, useLayoutEffect, useMemo, useState } from "react";
import {
Eye,
EyeOff,
@ -35,6 +35,7 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useI18n } from "@/i18n";
import { usePageHeader } from "@/contexts/usePageHeader";
import { PluginSlot } from "@/plugins";
/* ------------------------------------------------------------------ */
@ -132,7 +133,7 @@ function EnvVarRow({
// Compact inline row for unset, non-editing keys (used inside provider groups)
if (compact && !info.is_set && !isEditing) {
return (
<div className="flex items-center justify-between gap-3 py-1.5 opacity-50 hover:opacity-100 transition-opacity">
<div className="flex items-center justify-between gap-3 py-1.5 min-w-0 overflow-hidden opacity-50 hover:opacity-100 transition-opacity">
<div className="flex items-center gap-2 min-w-0">
<span className="font-mono-ui text-[0.7rem] text-muted-foreground">
{varKey}
@ -168,7 +169,7 @@ function EnvVarRow({
// Non-compact unset row
if (!info.is_set && !isEditing) {
return (
<div className="flex items-center justify-between gap-3 border border-border/50 px-4 py-2.5 opacity-60 hover:opacity-100 transition-opacity">
<div className="flex items-center justify-between gap-3 border border-border/50 px-4 py-2.5 min-w-0 overflow-hidden opacity-60 hover:opacity-100 transition-opacity">
<div className="flex items-center gap-3 min-w-0">
<Label className="font-mono-ui text-[0.7rem] text-muted-foreground">
{varKey}
@ -203,7 +204,7 @@ function EnvVarRow({
// Full expanded row for set keys or keys being edited
return (
<div className="grid gap-2 border border-border p-4">
<div className="grid gap-2 border border-border p-4 min-w-0 overflow-hidden">
<div className="flex items-center justify-between gap-2 flex-wrap">
<div className="flex items-center gap-2">
<Label className="font-mono-ui text-[0.7rem]">{varKey}</Label>
@ -493,6 +494,7 @@ export default function EnvPage() {
const [showAdvanced, setShowAdvanced] = useState(true); // Show all providers by default
const { toast, showToast } = useToast();
const { t } = useI18n();
const { setAfterTitle } = usePageHeader();
useEffect(() => {
api
@ -501,6 +503,58 @@ export default function EnvPage() {
.catch(() => {});
}, []);
// Scroll-to sub-nav in the page header
const sections = useMemo(() => {
const items: { id: string; label: string }[] = [
{ id: "section-oauth", label: "OAuth" },
{ id: "section-providers", label: "Providers" },
];
if (vars) {
const categories = ["tool", "messaging", "setting"];
const CATEGORY_LABELS: Record<string, string> = {
tool: "Tools",
messaging: "Messaging",
setting: "Settings",
};
for (const cat of categories) {
const hasEntries = Object.values(vars).some(
(info) => info.category === cat,
);
if (hasEntries) {
items.push({ id: `section-${cat}`, label: CATEGORY_LABELS[cat] ?? cat });
}
}
}
return items;
}, [vars]);
useLayoutEffect(() => {
if (!vars) {
setAfterTitle(null);
return;
}
const scrollTo = (id: string) => {
document.getElementById(id)?.scrollIntoView({ behavior: "smooth", block: "start" });
};
setAfterTitle(
<nav className="flex items-center gap-1" aria-label="Jump to section">
{sections.map((s) => (
<button
key={s.id}
type="button"
onClick={() => scrollTo(s.id)}
className="cursor-pointer px-2 py-0.5 text-[10px] uppercase tracking-wider text-muted-foreground hover:text-foreground border border-border/50 hover:border-foreground/30 transition-colors"
>
{s.label}
</button>
))}
</nav>,
);
return () => {
setAfterTitle(null);
};
}, [vars, sections, setAfterTitle]);
const handleSave = async (key: string) => {
const value = edits[key];
if (!value) return;
@ -701,12 +755,14 @@ export default function EnvPage() {
</Button>
</div>
<OAuthProvidersCard
onError={(msg) => showToast(msg, "error")}
onSuccess={(msg) => showToast(msg, "success")}
/>
<div id="section-oauth">
<OAuthProvidersCard
onError={(msg) => showToast(msg, "error")}
onSuccess={(msg) => showToast(msg, "success")}
/>
</div>
<Card>
<Card id="section-providers">
<CardHeader className="border-b border-border bg-card">
<div className="flex items-center gap-2">
<Zap className="h-5 w-5 text-muted-foreground" />
@ -750,7 +806,7 @@ export default function EnvPage() {
if (totalEntries === 0) return null;
return (
<Card key={category}>
<Card key={category} id={`section-${category}`}>
<CardHeader className="border-b border-border bg-card">
<div className="flex items-center gap-2">
<Icon className="h-5 w-5 text-muted-foreground" />
@ -762,7 +818,7 @@ export default function EnvPage() {
</CardDescription>
</CardHeader>
<CardContent className="grid gap-3 pt-4">
<CardContent className="grid gap-3 pt-4 overflow-hidden">
{setEntries.map(([key, info]) => (
<EnvVarRow
key={key}

View file

@ -9,6 +9,7 @@ import {
Settings2,
Star,
Wrench,
X,
Zap,
} from "lucide-react";
import { api } from "@/lib/api";
@ -25,6 +26,8 @@ import { Spinner } from "@nous-research/ui/ui/components/spinner";
import { Stats } from "@nous-research/ui/ui/components/stats";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Badge } from "@nous-research/ui/ui/components/badge";
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
import { useModalBehavior } from "@/hooks/useModalBehavior";
import { usePageHeader } from "@/contexts/usePageHeader";
import { useI18n } from "@/i18n";
import { PluginSlot } from "@/plugins";
@ -91,27 +94,39 @@ function TokenBar({
if (total === 0) return null;
const segments = [
{ value: cacheRead, color: "bg-blue-400/60", label: "Cache Read" },
{ value: reasoning, color: "bg-purple-400/60", label: "Reasoning" },
{ value: input, color: "bg-[#ffe6cb]/70", label: "Input" },
{ value: output, color: "bg-emerald-500/70", label: "Output" },
{ value: cacheRead, color: "bg-blue-400/60", dotColor: "bg-blue-400", label: "Cache Read" },
{ value: reasoning, color: "bg-purple-400/60", dotColor: "bg-purple-400", label: "Reasoning" },
{ value: input, color: "bg-[#ffe6cb]/70", dotColor: "bg-[#ffe6cb]", label: "Input" },
{ value: output, color: "bg-emerald-500/70", dotColor: "bg-emerald-500", label: "Output" },
].filter((s) => s.value > 0);
return (
<div className="space-y-1">
<div className="flex h-2 w-full overflow-hidden rounded-sm bg-muted/30">
<div className="space-y-1.5">
{/* Stacked bar — segments fill proportionally to their share of total */}
<div className="relative flex min-h-[1.5rem] w-full items-stretch overflow-hidden">
{segments.map((s, i) => (
<div
key={i}
className={`${s.color} transition-all duration-300`}
className={`${s.color} relative flex items-center transition-all duration-300`}
style={{ width: `${(s.value / total) * 100}%` }}
/>
>
{/* Stepped fill pattern overlay */}
<div
className="absolute inset-0 opacity-30"
style={{
backgroundImage:
"repeating-linear-gradient(to right, transparent 0 0.4rem, currentColor 0.4rem calc(0.4rem + 1px))",
}}
/>
</div>
))}
</div>
{/* Legend */}
<div className="flex flex-wrap gap-x-3 gap-y-0.5 text-[10px] text-muted-foreground">
{segments.map((s, i) => (
<span key={i} className="flex items-center gap-1">
<span className={`inline-block h-1.5 w-1.5 rounded-full ${s.color}`} />
<span className={`inline-block h-1.5 w-1.5 rounded-full ${s.dotColor}`} />
{s.label} {formatTokens(s.value)}
</span>
))}
@ -378,7 +393,7 @@ function ModelCard({
</div>
</div>
</CardHeader>
<CardContent className="space-y-3 pt-0">
<CardContent className="space-y-3 pt-3">
<TokenBar
input={entry.input_tokens}
output={entry.output_tokens}
@ -445,6 +460,157 @@ type PickerTarget =
| { kind: "main" }
| { kind: "aux"; task: string };
function AuxiliaryTasksModal({
aux,
refreshKey,
onSaved,
onClose,
}: {
aux: AuxiliaryModelsResponse | null;
refreshKey: number;
onSaved(): void;
onClose(): void;
}) {
const [picker, setPicker] = useState<PickerTarget | null>(null);
const [resetBusy, setResetBusy] = useState(false);
const [confirmReset, setConfirmReset] = useState(false);
const modalRef = useModalBehavior({ open: true, onClose });
const resetAllAux = async () => {
setConfirmReset(false);
setResetBusy(true);
try {
await api.setModelAssignment({
scope: "auxiliary",
task: "__reset__",
provider: "",
model: "",
});
onSaved();
} finally {
setResetBusy(false);
}
};
return (
<div
ref={modalRef}
className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
onClick={(e) => e.target === e.currentTarget && onClose()}
role="dialog"
aria-modal="true"
aria-labelledby="aux-modal-title"
>
<div className="relative w-full max-w-2xl max-h-[80vh] border border-border bg-card shadow-2xl flex flex-col">
<Button
ghost
size="icon"
onClick={onClose}
className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
aria-label="Close"
>
<X />
</Button>
<header className="p-5 pb-3 border-b border-border">
<div className="flex items-center justify-between gap-3 pr-8">
<h2
id="aux-modal-title"
className="font-display text-base tracking-wider uppercase"
>
Auxiliary Tasks
</h2>
<Button
size="sm"
outlined
onClick={() => setConfirmReset(true)}
disabled={resetBusy}
className="text-[10px] h-6"
prefix={resetBusy ? <Spinner /> : null}
>
Reset all to auto
</Button>
</div>
<p className="text-[10px] text-muted-foreground/80 mt-2">
Auxiliary tasks handle side-jobs like vision, session search, and
compression. <span className="font-mono">auto</span> means
&quot;use the main model&quot;. Override per-task when you want a
cheap/fast model for a specific job.
</p>
</header>
<div className="flex-1 overflow-y-auto p-5 space-y-1">
{AUX_TASKS.map((t) => {
const cur = aux?.tasks.find((a) => a.task === t.key);
const isAuto =
!cur || cur.provider === "auto" || !cur.provider;
return (
<div
key={t.key}
className="flex items-center justify-between gap-3 px-3 py-2 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
>
<div className="min-w-0 flex-1">
<div className="flex items-baseline gap-2">
<span className="text-xs font-medium">{t.label}</span>
<span className="text-[10px] text-muted-foreground/60">
{t.hint}
</span>
</div>
<div className="text-[10px] font-mono text-muted-foreground truncate">
{isAuto
? "auto (use main model)"
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
</div>
</div>
<Button
size="sm"
outlined
onClick={() => setPicker({ kind: "aux", task: t.key })}
className="text-[10px] h-6"
>
Change
</Button>
</div>
);
})}
</div>
{picker && picker.kind === "aux" && (
<ModelPickerDialog
key={`picker-${refreshKey}`}
loader={api.getModelOptions}
alwaysGlobal
title={`Set Auxiliary: ${
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
picker.task
}`}
onApply={async ({ provider, model }) => {
await api.setModelAssignment({
scope: "auxiliary",
task: picker.task,
provider,
model,
});
onSaved();
}}
onClose={() => setPicker(null)}
/>
)}
<ConfirmDialog
open={confirmReset}
onCancel={() => setConfirmReset(false)}
onConfirm={() => void resetAllAux()}
title="Reset auxiliary models"
description="Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set."
destructive
confirmLabel="Reset all"
loading={resetBusy}
/>
</div>
</div>
);
}
function ModelSettingsPanel({
aux,
refreshKey,
@ -454,9 +620,8 @@ function ModelSettingsPanel({
refreshKey: number;
onSaved(): void;
}) {
const [expanded, setExpanded] = useState(false);
const [auxModalOpen, setAuxModalOpen] = useState(false);
const [picker, setPicker] = useState<PickerTarget | null>(null);
const [resetBusy, setResetBusy] = useState(false);
const mainProv = aux?.main.provider ?? "";
const mainModel = aux?.main.model ?? "";
@ -476,23 +641,10 @@ function ModelSettingsPanel({
onSaved();
};
const resetAllAux = async () => {
if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
return;
}
setResetBusy(true);
try {
await api.setModelAssignment({
scope: "auxiliary",
task: "__reset__",
provider: "",
model: "",
});
onSaved();
} finally {
setResetBusy(false);
}
};
// Count how many aux tasks have overrides
const auxOverrideCount = aux?.tasks.filter(
(a) => a.provider && a.provider !== "auto",
).length ?? 0;
return (
<Card>
@ -505,21 +657,10 @@ function ModelSettingsPanel({
applies to new sessions
</span>
</div>
<Button
size="sm"
outlined
onClick={() => setExpanded((v) => !v)}
className="text-xs"
>
{expanded ? "Hide auxiliary" : "Show auxiliary"}
<ChevronDown
className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
/>
</Button>
</div>
</CardHeader>
<CardContent className="space-y-3 pt-0">
<CardContent className="space-y-3 pt-3">
{/* Main row */}
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
<div className="min-w-0 flex-1">
@ -544,85 +685,41 @@ function ModelSettingsPanel({
</Button>
</div>
{/* Auxiliary rows */}
{expanded && (
<div className="space-y-1 border-t border-border/50 pt-3">
<div className="flex items-center justify-between pb-1">
<div className="text-[10px] uppercase tracking-wider text-muted-foreground">
{/* Auxiliary tasks summary + open modal */}
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
<div className="min-w-0 flex-1">
<div className="flex items-center gap-2 mb-0.5">
<Cpu className="h-3 w-3 text-muted-foreground" />
<span className="text-xs font-medium uppercase tracking-wider">
Auxiliary tasks
</div>
<Button
size="sm"
outlined
onClick={resetAllAux}
disabled={resetBusy}
className="text-[10px] h-6"
prefix={resetBusy ? <Spinner /> : null}
>
Reset all to auto
</Button>
</span>
</div>
<div className="text-xs font-mono text-muted-foreground truncate">
{auxOverrideCount > 0
? `${auxOverrideCount} override${auxOverrideCount > 1 ? "s" : ""} · ${AUX_TASKS.length - auxOverrideCount} auto`
: `${AUX_TASKS.length} tasks · all auto`}
</div>
<p className="text-[10px] text-muted-foreground/80 pb-2">
Auxiliary tasks handle side-jobs like vision, session search, and
compression. <span className="font-mono">auto</span> means
&quot;use the main model&quot;. Override per-task when you want a
cheap/fast model for a specific job.
</p>
{AUX_TASKS.map((t) => {
const cur = aux?.tasks.find((a) => a.task === t.key);
const isAuto =
!cur || cur.provider === "auto" || !cur.provider;
return (
<div
key={t.key}
className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
>
<div className="min-w-0 flex-1">
<div className="flex items-baseline gap-2">
<span className="text-xs font-medium">{t.label}</span>
<span className="text-[10px] text-muted-foreground/60">
{t.hint}
</span>
</div>
<div className="text-[10px] font-mono text-muted-foreground truncate">
{isAuto
? "auto (use main model)"
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
</div>
</div>
<Button
size="sm"
outlined
onClick={() => setPicker({ kind: "aux", task: t.key })}
className="text-[10px] h-6"
>
Change
</Button>
</div>
);
})}
</div>
)}
<Button
size="sm"
outlined
onClick={() => setAuxModalOpen(true)}
className="text-xs"
>
Configure
</Button>
</div>
{picker && (
<ModelPickerDialog
key={`picker-${refreshKey}`}
loader={api.getModelOptions}
alwaysGlobal
title={
picker.kind === "main"
? "Set Main Model"
: `Set Auxiliary: ${
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
picker.task
}`
}
title="Set Main Model"
onApply={async ({ provider, model }) => {
await applyAssignment({
scope: picker.kind === "main" ? "main" : "auxiliary",
task: picker.kind === "main" ? "" : picker.task,
scope: "main",
task: "",
provider,
model,
});
@ -630,6 +727,15 @@ function ModelSettingsPanel({
onClose={() => setPicker(null)}
/>
)}
{auxModalOpen && (
<AuxiliaryTasksModal
aux={aux}
refreshKey={refreshKey}
onSaved={onSaved}
onClose={() => setAuxModalOpen(false)}
/>
)}
</CardContent>
</Card>
);
@ -725,28 +831,14 @@ export default function ModelsPage() {
<div className="flex flex-col gap-6">
<PluginSlot name="models:top" />
<ModelSettingsPanel
aux={aux}
refreshKey={saveKey}
onSaved={onAssigned}
/>
<div className="grid gap-6 lg:grid-cols-2">
<ModelSettingsPanel
aux={aux}
refreshKey={saveKey}
onSaved={onAssigned}
/>
{loading && !data && (
<div className="flex items-center justify-center py-24">
<Spinner className="text-2xl text-primary" />
</div>
)}
{error && (
<Card>
<CardContent className="py-6">
<p className="text-sm text-destructive text-center">{error}</p>
</CardContent>
</Card>
)}
{data && (
<>
{data && (
<Card>
<CardContent className="py-6">
<Stats
@ -781,7 +873,25 @@ export default function ModelsPage() {
/>
</CardContent>
</Card>
)}
</div>
{loading && !data && (
<div className="flex items-center justify-center py-24">
<Spinner className="text-2xl text-primary" />
</div>
)}
{error && (
<Card>
<CardContent className="py-6">
<p className="text-sm text-destructive text-center">{error}</p>
</CardContent>
</Card>
)}
{data && (
<>
{data.models.length > 0 ? (
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
{data.models.map((m, i) => (

View file

@ -11,6 +11,7 @@ import { Switch } from "@nous-research/ui/ui/components/switch";
import { Spinner } from "@nous-research/ui/ui/components/spinner";
import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useToast } from "@/hooks/useToast";
@ -393,6 +394,7 @@ function PluginRowCard(props: PluginRowCardProps) {
const tabPath = dm?.tab && !dm.tab.hidden ? dm.tab.override ?? dm.tab.path : null;
const busy = rowBusy === row.name;
const [confirmRemove, setConfirmRemove] = useState(false);
const badgeTone =
row.runtime_status === "enabled"
@ -533,18 +535,7 @@ function PluginRowCard(props: PluginRowCardProps) {
disabled={busy}
ghost
size="sm"
onClick={() => {
const ok =
typeof window !== "undefined"
? window.confirm(t.pluginsPage.removeConfirm)
: false;
if (!ok) return;
void setRuntimeLoading(row.name, async () => {
await api.removeAgentPlugin(row.name);
showToast(`${row.name} removed`, "success");
});
}}
onClick={() => setConfirmRemove(true)}
>
{busy ? <Spinner /> : <Trash2 className="h-3.5 w-3.5" />}
@ -576,6 +567,21 @@ function PluginRowCard(props: PluginRowCardProps) {
) : null}
</CardContent>
<ConfirmDialog
open={confirmRemove}
onCancel={() => setConfirmRemove(false)}
onConfirm={() => {
setConfirmRemove(false);
void setRuntimeLoading(row.name, async () => {
await api.removeAgentPlugin(row.name);
showToast(`${row.name} removed`, "success");
});
}}
title={t.pluginsPage.removeConfirm}
description={`This will remove the "${row.name}" plugin from your agent.`}
destructive
confirmLabel={t.common.delete}
/>
</Card>
);
}

View file

@ -1,18 +1,21 @@
import { useCallback, useEffect, useRef, useState } from "react";
import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users } from "lucide-react";
import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react";
import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users, X } from "lucide-react";
import { H2 } from "@/components/NouiTypography";
import { api } from "@/lib/api";
import type { ProfileInfo } from "@/lib/api";
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
import { useToast } from "@/hooks/useToast";
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
import { useModalBehavior } from "@/hooks/useModalBehavior";
import { Toast } from "@/components/Toast";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Card, CardContent } from "@/components/ui/card";
import { Badge } from "@nous-research/ui/ui/components/badge";
import { Button } from "@nous-research/ui/ui/components/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Checkbox } from "@/components/ui/checkbox";
import { useI18n } from "@/i18n";
import { usePageHeader } from "@/contexts/usePageHeader";
// Mirrors hermes_cli/profiles.py::_PROFILE_ID_RE so we can reject obviously
// invalid names (uppercase, spaces, …) before round-tripping a doomed POST.
@ -23,11 +26,18 @@ export default function ProfilesPage() {
const [loading, setLoading] = useState(true);
const { toast, showToast } = useToast();
const { t } = useI18n();
const { setEnd } = usePageHeader();
// Create form
// Create modal
const [createModalOpen, setCreateModalOpen] = useState(false);
const [newName, setNewName] = useState("");
const [cloneFromDefault, setCloneFromDefault] = useState(true);
const [creating, setCreating] = useState(false);
const closeCreateModal = useCallback(() => setCreateModalOpen(false), []);
const createModalRef = useModalBehavior({
open: createModalOpen,
onClose: closeCreateModal,
});
// Inline rename state
const [renamingFrom, setRenamingFrom] = useState<string | null>(null);
@ -68,6 +78,7 @@ export default function ProfilesPage() {
await api.createProfile({ name, clone_from_default: cloneFromDefault });
showToast(`${t.profiles.created}: ${name}`, "success");
setNewName("");
setCreateModalOpen(false);
load();
} catch (e) {
showToast(`${t.status.error}: ${e}`, "error");
@ -170,6 +181,22 @@ export default function ProfilesPage() {
const pendingName = profileDelete.pendingId;
// Put "Create" button in page header
useLayoutEffect(() => {
setEnd(
<Button
size="sm"
onClick={() => setCreateModalOpen(true)}
>
<Plus className="h-3 w-3" />
{t.common.create}
</Button>,
);
return () => {
setEnd(null);
};
}, [setEnd, t.common.create, loading]);
if (loading) {
return (
<div className="flex items-center justify-center py-24">
@ -198,51 +225,75 @@ export default function ProfilesPage() {
loading={profileDelete.isDeleting}
/>
{/* Create new profile */}
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2 text-base">
<Plus className="h-4 w-4" />
{t.profiles.newProfile}
</CardTitle>
</CardHeader>
<CardContent>
<div className="grid gap-4">
<div className="grid gap-2">
<Label htmlFor="profile-name">{t.profiles.name}</Label>
<Input
id="profile-name"
placeholder={t.profiles.namePlaceholder}
value={newName}
onChange={(e) => setNewName(e.target.value)}
aria-invalid={
newName.trim() !== "" &&
!PROFILE_NAME_RE.test(newName.trim())
}
/>
<p className="text-xs text-muted-foreground">
{t.profiles.nameRule}
</p>
</div>
{/* Create profile modal */}
{createModalOpen && (
<div
ref={createModalRef}
className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
onClick={(e) => e.target === e.currentTarget && setCreateModalOpen(false)}
role="dialog"
aria-modal="true"
aria-labelledby="create-profile-title"
>
<div className="relative w-full max-w-md border border-border bg-card shadow-2xl flex flex-col">
<Button
ghost
size="icon"
onClick={() => setCreateModalOpen(false)}
className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
aria-label="Close"
>
<X />
</Button>
<label className="flex items-center gap-2 text-sm cursor-pointer">
<input
type="checkbox"
<header className="p-5 pb-3 border-b border-border">
<h2
id="create-profile-title"
className="font-display text-base tracking-wider uppercase"
>
{t.profiles.newProfile}
</h2>
</header>
<div className="p-5 grid gap-4">
<div className="grid gap-2">
<Label htmlFor="profile-name">{t.profiles.name}</Label>
<Input
id="profile-name"
autoFocus
placeholder={t.profiles.namePlaceholder}
value={newName}
onChange={(e) => setNewName(e.target.value)}
onKeyDown={(e) => {
if (e.key === "Enter") handleCreate();
}}
aria-invalid={
newName.trim() !== "" &&
!PROFILE_NAME_RE.test(newName.trim())
}
/>
<p className="text-xs text-muted-foreground">
{t.profiles.nameRule}
</p>
</div>
<Checkbox
id="clone-from-default"
checked={cloneFromDefault}
onChange={(e) => setCloneFromDefault(e.target.checked)}
label={t.profiles.cloneFromDefault}
/>
{t.profiles.cloneFromDefault}
</label>
<div>
<Button onClick={handleCreate} disabled={creating}>
<Plus className="h-3 w-3" />
{creating ? t.common.creating : t.common.create}
</Button>
<div className="flex justify-end">
<Button size="sm" onClick={handleCreate} disabled={creating}>
<Plus className="h-3 w-3" />
{creating ? t.common.creating : t.common.create}
</Button>
</div>
</div>
</div>
</CardContent>
</Card>
</div>
)}
{/* List */}
<div className="flex flex-col gap-3">

45
cli.py
View file

@ -3669,7 +3669,7 @@ class HermesCLI:
if self.show_timestamps:
label = f"{label} {datetime.now().strftime('%H:%M')}"
w = shutil.get_terminal_size().columns
fill = w - 2 - len(label)
fill = w - 2 - HermesCLI._status_bar_display_width(label)
_cprint(f"\n{_ACCENT}╭─{label}{'' * max(fill - 1, 0)}{_RST}")
self._stream_buf += text
@ -4214,12 +4214,34 @@ class HermesCLI:
ChatConsole().print(f"[bold red]Failed to initialize agent: {e}[/]")
return False
def _show_security_advisories(self):
"""Show a startup banner if any unacked security advisories match.
Renders a single bold-red box on stderr (so piped stdout remains
clean) listing the worst hit and pointing at ``hermes doctor``.
Banner-cache rate-limits this to once per 24h per advisory; full
remediation lives behind ``hermes doctor`` so the banner stays
small.
"""
try:
from hermes_cli.security_advisories import (
detect_compromised,
startup_banner,
)
hits = detect_compromised()
banner = startup_banner(hits)
if banner:
# Print to stderr — keeps stdout clean for piped automation,
# and Rich's banner rendering already wrote to stdout above.
print(banner, file=sys.stderr, flush=True)
except Exception:
# Never let the security banner block startup. Failures are
# logged at DEBUG by the advisory module.
pass
def show_banner(self):
"""Display the welcome banner in Claude Code style."""
self.console.clear()
# Get context length for display before branching so it remains
# available to the low-context warning logic in compact mode too.
ctx_len = None
if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
ctx_len = self.agent.context_compressor.context_length
@ -8783,6 +8805,9 @@ class HermesCLI:
elif parts[i] == "--source" and i + 1 < len(parts):
source = parts[i + 1]
i += 2
elif parts[i].isdigit():
days = int(parts[i])
i += 1
else:
i += 1
@ -10368,7 +10393,7 @@ class HermesCLI:
label = " ⚕ Hermes "
if self.show_timestamps:
label = f"{label}{datetime.now().strftime('%H:%M')} "
fill = w - 2 - len(label)
fill = w - 2 - HermesCLI._status_bar_display_width(label)
_cprint(f"\n{_ACCENT}╭─{label}{'' * max(fill - 1, 0)}{_RST}")
_cprint(f"{_STREAM_PAD}{sentence.rstrip()}")
@ -11016,10 +11041,9 @@ class HermesCLI:
pass
self.show_banner()
# One-line Honcho session indicator (TTY-only, not captured by agent).
# Only show when the user explicitly configured Honcho for Hermes
# (not auto-enabled from a stray HONCHO_API_KEY env var).
# Surface any active supply-chain security advisories right after the
# welcome banner. Quiet/single-query paths call this themselves.
self._show_security_advisories()
# If resuming a session, load history and display it immediately
# so the user has context before typing their first message.
if self._resumed:
@ -13528,6 +13552,9 @@ def main(
_query_label = query or ("[image attached]" if single_query_images else "")
if _query_label:
cli.console.print(f"[bold blue]Query:[/] {_query_label}")
# Surface security advisories before the agent runs — short
# banner, doesn't depend on the welcome banner being shown.
cli._show_security_advisories()
cli.chat(query, images=single_query_images or None)
cli._print_exit_summary()
return

View file

@ -111,6 +111,7 @@ _HOME_TARGET_ENV_VARS = {
"weixin": "WEIXIN_HOME_CHANNEL",
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
"qqbot": "QQBOT_HOME_CHANNEL",
"whatsapp": "WHATSAPP_HOME_CHANNEL",
}
# Legacy env var names kept for back-compat. Each entry is the current

View file

@ -2,7 +2,7 @@
Hermes Gateway - Multi-platform messaging integration.
This module provides a unified gateway for connecting the Hermes agent
to various messaging platforms (Telegram, Discord, WhatsApp) with:
to various messaging platforms (Telegram, Discord, WhatsApp, Weixin, and more) with:
- Session management (persistent conversations with reset policies)
- Dynamic context injection (agent knows where messages come from)
- Delivery routing (cron job outputs to appropriate channels)

View file

@ -2,7 +2,7 @@
Gateway configuration management.
Handles loading and validating configuration for:
- Connected platforms (Telegram, Discord, WhatsApp)
- Connected platforms (Telegram, Discord, WhatsApp, Weixin, and more)
- Home channels for each platform
- Session reset policies
- Delivery preferences

View file

@ -1168,6 +1168,9 @@ class APIServerAdapter(BasePlatformAdapter):
agent_ref=agent_ref,
gateway_session_key=gateway_session_key,
))
# Ensure SSE drain loops can terminate without relying on polling
# agent_task.done(), which can race with queue timeout checks.
agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
return await self._write_sse_chat_completion(
request, completion_id, model_name, created, _stream_q,
@ -2197,6 +2200,9 @@ class APIServerAdapter(BasePlatformAdapter):
agent_ref=agent_ref,
gateway_session_key=gateway_session_key,
))
# Ensure SSE drain loops can terminate without relying on polling
# agent_task.done(), which can race with queue timeout checks.
agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
response_id = f"resp_{uuid.uuid4().hex[:28]}"
model_name = body.get("model", self._model_name)

View file

@ -1,7 +1,7 @@
"""
Base platform adapter interface.
All platform adapters (Telegram, Discord, WhatsApp) inherit from this
All platform adapters (Telegram, Discord, WhatsApp, Weixin, and more) inherit from this
and implement the required methods.
"""
@ -1743,6 +1743,55 @@ class BasePlatformAdapter(ABC):
"""
return SendResult(success=False, error="Not supported")
async def send_clarify(
self,
chat_id: str,
question: str,
choices: Optional[list],
clarify_id: str,
session_key: str,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send a clarify prompt to the user.
Two render modes:
* **Multiple choice** (``choices`` is a non-empty list) adapters
that override this should render inline buttons (one per choice
plus a final "Other" / free-text option). Button callbacks
MUST resolve via
``tools.clarify_gateway.resolve_gateway_clarify(clarify_id, response)``
with the chosen string. Picking the "Other" button calls
``mark_awaiting_text(clarify_id)`` so the next message in the
session is captured as the response.
* **Open-ended** (``choices`` is None or empty) render the
question as a plain text message; the next user message in the
session is captured by the gateway's text-intercept and
resolves the clarify automatically (see
``GatewayRunner._maybe_intercept_clarify_text``).
The default implementation falls back to a numbered text list,
which works on every platform the user replies with a number
("2") or with the literal choice text, and the gateway intercepts
and resolves. Adapters with native button UIs (Telegram, Discord)
SHOULD override this for a richer UX.
"""
if choices:
lines = [f"{question}", ""]
for i, choice in enumerate(choices, start=1):
lines.append(f" {i}. {choice}")
lines.append("")
lines.append("Reply with the number, the option text, or your own answer.")
text = "\n".join(lines)
else:
text = f"{question}"
return await self.send(
chat_id=chat_id,
content=text,
metadata=metadata,
)
async def send_private_notice(
self,
chat_id: str,
@ -2831,6 +2880,58 @@ class BasePlatformAdapter(ABC):
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
return
# Clarify text-capture bypass: if the agent is blocked on a
# clarify_tool call awaiting a free-form text response (open-
# ended clarify, or user picked "Other"), the next non-command
# message in this session MUST reach the runner so the
# clarify-intercept can resolve it and unblock the agent.
#
# Without this bypass: the message gets queued in
# _pending_messages AND triggers an interrupt, killing the
# agent run mid-clarify and discarding the user's answer.
# Same shape as the /approve deadlock fix (PR #4926) — both
# cases are "agent thread blocked on Event.wait, message must
# reach the resolver before being treated as a new turn."
if not cmd:
try:
from tools import clarify_gateway as _clarify_mod
_has_text_clarify = (
_clarify_mod.get_pending_for_session(session_key) is not None
)
except Exception:
_has_text_clarify = False
if _has_text_clarify:
logger.debug(
"[%s] Routing message to clarify text-intercept for %s",
self.name, session_key,
)
try:
_thread_meta = _thread_metadata_for_source(
event.source, _reply_anchor_for_event(event)
)
response = await self._message_handler(event)
_text, _eph_ttl = self._unwrap_ephemeral(response)
if _text:
_r = await self._send_with_retry(
chat_id=event.source.chat_id,
content=_text,
reply_to=_reply_anchor_for_event(event),
metadata=_thread_meta,
)
if _eph_ttl > 0 and _r.success and _r.message_id:
self._schedule_ephemeral_delete(
chat_id=event.source.chat_id,
message_id=_r.message_id,
ttl_seconds=_eph_ttl,
)
except Exception as e:
logger.error(
"[%s] Clarify text-intercept dispatch failed: %s",
self.name, e, exc_info=True,
)
return
if self._busy_session_handler is not None:
try:
if await self._busy_session_handler(event, session_key):

View file

@ -86,8 +86,32 @@ def _clean_discord_id(entry: str) -> str:
def check_discord_requirements() -> bool:
"""Check if Discord dependencies are available."""
return DISCORD_AVAILABLE
"""Check if Discord dependencies are available.
Lazy-installs discord.py via ``tools.lazy_deps.ensure("platform.discord")``
on first call if not present. After successful install, re-binds module
globals so ``DISCORD_AVAILABLE`` becomes True.
"""
global DISCORD_AVAILABLE, discord, DiscordMessage, Intents, commands
if DISCORD_AVAILABLE:
return True
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("platform.discord", prompt=False)
except Exception:
return False
try:
import discord as _discord
from discord import Message as _DM, Intents as _Intents
from discord.ext import commands as _commands
except ImportError:
return False
discord = _discord
DiscordMessage = _DM
Intents = _Intents
commands = _commands
DISCORD_AVAILABLE = True
return True
def _build_allowed_mentions():

View file

@ -103,8 +103,58 @@ _TELEGRAM_IMAGE_EXT_TO_MIME = {
def check_telegram_requirements() -> bool:
"""Check if Telegram dependencies are available."""
return TELEGRAM_AVAILABLE
"""Check if Telegram dependencies are available.
If python-telegram-bot is missing, attempts to lazy-install it via
``tools.lazy_deps.ensure("platform.telegram")``. After a successful
install, re-imports the SDK and flips ``TELEGRAM_AVAILABLE`` to True
so the adapter's class-level type aliases get rebound.
"""
global TELEGRAM_AVAILABLE, Update, Bot, Message, InlineKeyboardButton
global InlineKeyboardMarkup, LinkPreviewOptions, Application
global CommandHandler, CallbackQueryHandler, TelegramMessageHandler
global ContextTypes, filters, ParseMode, ChatType, HTTPXRequest
if TELEGRAM_AVAILABLE:
return True
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("platform.telegram", prompt=False)
except Exception:
return False
try:
from telegram import Update as _Update, Bot as _Bot, Message as _Message
from telegram import InlineKeyboardButton as _IKB, InlineKeyboardMarkup as _IKM
try:
from telegram import LinkPreviewOptions as _LPO
except ImportError:
_LPO = None
from telegram.ext import (
Application as _App, CommandHandler as _CH,
CallbackQueryHandler as _CQH,
MessageHandler as _MH,
ContextTypes as _CT, filters as _filters,
)
from telegram.constants import ParseMode as _PM, ChatType as _CtT
from telegram.request import HTTPXRequest as _HR
except ImportError:
return False
Update = _Update
Bot = _Bot
Message = _Message
InlineKeyboardButton = _IKB
InlineKeyboardMarkup = _IKM
LinkPreviewOptions = _LPO
Application = _App
CommandHandler = _CH
CallbackQueryHandler = _CQH
TelegramMessageHandler = _MH
ContextTypes = _CT
filters = _filters
ParseMode = _PM
ChatType = _CtT
HTTPXRequest = _HR
TELEGRAM_AVAILABLE = True
return True
# Matches every character that MarkdownV2 requires to be backslash-escaped
@ -377,6 +427,9 @@ class TelegramAdapter(BasePlatformAdapter):
# Slash-confirm button state: confirm_id → session_key (for /reload-mcp
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
self._slash_confirm_state: Dict[str, str] = {}
# Clarify button state: clarify_id → session_key (for the clarify tool's
# multiple-choice prompts; see GatewayRunner clarify_callback wiring).
self._clarify_state: Dict[str, str] = {}
# Notification mode for message sends.
# "important" — only final responses, approvals, and slash confirmations
# trigger notifications; tool progress, streaming, status
@ -2165,6 +2218,80 @@ class TelegramAdapter(BasePlatformAdapter):
logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
return SendResult(success=False, error=str(e))
async def send_clarify(
self,
chat_id: str,
question: str,
choices: Optional[list],
clarify_id: str,
session_key: str,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Render a clarify prompt with one inline button per choice.
Multi-choice mode (``choices`` non-empty): renders one button per
option plus a final "✏️ Other (type answer)" button. Picking the
"Other" button flips the entry into text-capture mode so the next
message becomes the response.
Open-ended mode (``choices`` empty): renders the question as plain
text no buttons. The next message in the session is captured by
the gateway's text-intercept and resolves the clarify.
"""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
text = f"{_html.escape(question)}"
thread_id = self._metadata_thread_id(metadata)
kwargs: Dict[str, Any] = {
"chat_id": int(chat_id),
"text": text,
"parse_mode": ParseMode.HTML,
**self._link_preview_kwargs(),
}
if choices:
# Telegram caps callback_data at 64 bytes; keep "cl:<id>:<idx>"
# short. Button label is also capped (~64 chars in practice).
rows = []
for idx, choice in enumerate(choices):
label = str(choice)
if len(label) > 60:
label = label[:57] + "..."
rows.append([
InlineKeyboardButton(
f"{idx + 1}. {label}",
callback_data=f"cl:{clarify_id}:{idx}",
)
])
rows.append([
InlineKeyboardButton(
"✏️ Other (type answer)",
callback_data=f"cl:{clarify_id}:other",
)
])
kwargs["reply_markup"] = InlineKeyboardMarkup(rows)
reply_to_id = self._reply_to_message_id_for_send(None, metadata)
kwargs["reply_to_message_id"] = reply_to_id
kwargs.update(
self._thread_kwargs_for_send(
chat_id,
thread_id,
metadata,
reply_to_message_id=reply_to_id,
)
)
msg = await self._send_message_with_thread_fallback(**kwargs)
self._clarify_state[clarify_id] = session_key
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
logger.warning("[%s] send_clarify failed: %s", self.name, e)
return SendResult(success=False, error=str(e))
async def send_model_picker(
self,
chat_id: str,
@ -2650,6 +2777,111 @@ class TelegramAdapter(BasePlatformAdapter):
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
return
# --- Clarify callbacks (cl:clarify_id:idx | cl:clarify_id:other) ---
if data.startswith("cl:"):
parts = data.split(":", 2)
if len(parts) == 3:
clarify_id = parts[1]
choice_token = parts[2]
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(
caller_id,
chat_id=query_chat_id,
chat_type=str(query_chat_type) if query_chat_type is not None else None,
thread_id=str(query_thread_id) if query_thread_id is not None else None,
user_name=query_user_name,
):
await query.answer(text="⛔ You are not authorized to answer this prompt.")
return
session_key = self._clarify_state.get(clarify_id)
if not session_key:
await query.answer(text="This prompt has already been resolved.")
return
user_display = getattr(query.from_user, "first_name", "User")
if choice_token == "other":
# Flip into text-capture mode and tell the user to type
# their answer. The gateway's text-intercept will pick
# up the next message in this session and resolve the
# clarify. Do NOT pop _clarify_state yet — we still
# need it if the user is slow to respond and the entry
# is cleared by something else.
try:
from tools.clarify_gateway import mark_awaiting_text
mark_awaiting_text(clarify_id)
except Exception as exc:
logger.warning("[%s] mark_awaiting_text failed: %s", self.name, exc)
await query.answer(text="✏️ Type your answer in the chat.")
try:
await query.edit_message_text(
text=f"{query.message.text or ''}\n\n<i>Awaiting typed response from {_html.escape(user_display)}…</i>",
parse_mode=ParseMode.HTML,
reply_markup=None,
)
except Exception:
pass
return
# Numeric choice → resolve immediately with the chosen text
try:
idx = int(choice_token)
except (ValueError, TypeError):
await query.answer(text="Invalid choice.")
return
# Look up the choice text from the entry registered in the
# clarify primitive. Fall back to the index if the entry
# has been cleaned up (race with timeout / session reset).
resolved_text: Optional[str] = None
try:
from tools.clarify_gateway import _entries as _clarify_entries # type: ignore
entry = _clarify_entries.get(clarify_id)
if entry and entry.choices and 0 <= idx < len(entry.choices):
resolved_text = entry.choices[idx]
except Exception:
resolved_text = None
if resolved_text is None:
# Race: entry vanished. Echo the index as a number so
# the agent at least sees an intentional response
# rather than nothing.
resolved_text = f"choice {idx + 1}"
# Pop state and resolve
self._clarify_state.pop(clarify_id, None)
try:
from tools.clarify_gateway import resolve_gateway_clarify
resolved = resolve_gateway_clarify(clarify_id, resolved_text)
except Exception as exc:
logger.error("[%s] resolve_gateway_clarify failed: %s", self.name, exc)
resolved = False
await query.answer(text=f"{resolved_text[:60]}")
try:
await query.edit_message_text(
text=f"{_html.escape(query.message.text or '')}\n\n<b>{_html.escape(user_display)}:</b> {_html.escape(resolved_text)}",
parse_mode=ParseMode.HTML,
reply_markup=None,
)
except Exception:
pass
if resolved:
logger.info(
"Telegram clarify button resolved (id=%s, choice=%r, user=%s)",
clarify_id, resolved_text, user_display,
)
else:
logger.warning(
"Telegram clarify button: resolve_gateway_clarify returned False (id=%s)",
clarify_id,
)
return
# --- Update prompt callbacks ---
if not data.startswith("update_prompt:"):
return
@ -4529,6 +4761,27 @@ class TelegramAdapter(BasePlatformAdapter):
logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
return False
async def _clear_reactions(self, chat_id: str, message_id: str) -> bool:
"""Clear all reactions from a Telegram message.
Calling ``set_message_reaction`` with ``reaction=None`` (or an empty
sequence) is the documented Bot API way to remove all bot-set
reactions on a message equivalent to Bot API 10.0's
``deleteMessageReaction`` but supported in PTB 22.6 already.
"""
if not self._bot:
return False
try:
await self._bot.set_message_reaction(
chat_id=int(chat_id),
message_id=int(message_id),
reaction=None,
)
return True
except Exception as e:
logger.debug("[%s] clear reactions failed: %s", self.name, e)
return False
async def on_processing_start(self, event: MessageEvent) -> None:
"""Add an in-progress reaction when message processing begins."""
if not self._reactions_enabled():
@ -4543,12 +4796,23 @@ class TelegramAdapter(BasePlatformAdapter):
Unlike Discord (additive reactions), Telegram's set_message_reaction
replaces all existing reactions in one call no remove step needed.
On CANCELLED outcomes (e.g. the user runs ``/stop``, or a session is
interrupted mid-flight), we explicitly clear the 👀 in-progress
reaction so it doesn't linger on the user's message indefinitely.
Without this clear, the only way to remove the 👀 was to wait for
another agent run to swap it to 👍/👎 which never happens if the
cancellation was the last activity in the chat.
"""
if not self._reactions_enabled():
return
chat_id = getattr(event.source, "chat_id", None)
message_id = getattr(event, "message_id", None)
if chat_id and message_id and outcome != ProcessingOutcome.CANCELLED:
if not (chat_id and message_id):
return
if outcome == ProcessingOutcome.CANCELLED:
await self._clear_reactions(chat_id, message_id)
else:
await self._set_reaction(
chat_id,
message_id,

View file

@ -3275,6 +3275,30 @@ class GatewayRunner:
write_runtime_status(gateway_state="starting", exit_reason=None)
except Exception:
pass
# Log any active supply-chain security advisories. Operators see this
# in gateway.log and `hermes status` surfaces it; we do NOT block
# startup or surface it inline to user messages, since the gateway
# operator is the one who can act on it (uninstall the package,
# rotate credentials). See hermes_cli/security_advisories.py.
try:
from hermes_cli.security_advisories import (
detect_compromised,
gateway_log_message,
)
_adv_hits = detect_compromised()
_adv_msg = gateway_log_message(_adv_hits)
if _adv_msg:
logger.warning("%s", _adv_msg)
logger.warning(
"Run `hermes doctor` on the gateway host for full "
"remediation steps."
)
except Exception:
logger.debug(
"security advisory check failed at gateway startup",
exc_info=True,
)
# Warn if no user allowlists are configured and open access is not opted in
_builtin_allowed_vars = (
@ -5804,6 +5828,37 @@ class GatewayRunner:
)
_update_prompts.pop(_quick_key, None)
# Intercept messages that are responses to a pending clarify
# request that is awaiting free-form text (either an open-ended
# clarify with no choices, or one where the user picked the
# "Other" button). The first non-empty user message in the
# session resolves the clarify and unblocks the agent thread —
# we do NOT route it to the agent as a new turn.
try:
from tools import clarify_gateway as _clarify_mod
_pending_clarify = _clarify_mod.get_pending_for_session(_quick_key)
except Exception:
_pending_clarify = None
if _pending_clarify is not None:
_raw_clarify_reply = (event.text or "").strip()
# Skip slash commands — the user clearly wanted to issue a
# command, not answer the clarify. Leave the clarify pending
# so the user can retry; if it times out, the agent unblocks
# with an empty response.
if _raw_clarify_reply and not _raw_clarify_reply.startswith("/"):
_resolved = _clarify_mod.resolve_gateway_clarify(
_pending_clarify.clarify_id, _raw_clarify_reply,
)
if _resolved:
logger.info(
"Gateway intercepted clarify text response (session=%s, id=%s)",
_quick_key, _pending_clarify.clarify_id,
)
# Acknowledge with empty string so adapters that emit
# the agent's response don't double-post. The agent
# itself will produce the next user-facing message.
return ""
# Intercept messages that are responses to a pending /reload-mcp
# (or future) slash-confirm prompt. Recognized confirm replies are
# /approve, /always, /cancel (plus short aliases). Anything else
@ -14933,6 +14988,76 @@ class GatewayRunner:
if _pdc is not None:
_pdc[session_key] = _release_bg_review_messages
# ------------------------------------------------------------------
# Clarify callback: present a clarify prompt and block on a response.
#
# Runs on the agent's worker thread (see clarify_tool's synchronous
# callback contract). Bridges sync→async by scheduling the
# adapter's send_clarify on the gateway event loop, then blocks on
# the clarify primitive's threading.Event with a configurable
# timeout. Returns the user's response string, or a sentinel
# explaining that no response arrived (so the agent can adapt
# rather than hang forever).
# ------------------------------------------------------------------
def _clarify_callback_sync(question: str, choices) -> str:
from tools import clarify_gateway as _clarify_mod
import uuid as _uuid
if not _status_adapter:
return ""
clarify_id = _uuid.uuid4().hex[:10]
_clarify_mod.register(
clarify_id=clarify_id,
session_key=session_key or "",
question=question,
choices=list(choices) if choices else None,
)
# Pause typing — like approval, we don't want a "thinking..."
# status to obscure the prompt or block the user from typing
# an "Other" response on platforms that disable input while
# typing is active (Slack Assistant API).
try:
_status_adapter.pause_typing_for_chat(_status_chat_id)
except Exception:
pass
send_ok = False
try:
fut = asyncio.run_coroutine_threadsafe(
_status_adapter.send_clarify(
chat_id=_status_chat_id,
question=question,
choices=list(choices) if choices else None,
clarify_id=clarify_id,
session_key=session_key or "",
metadata=_status_thread_metadata,
),
_loop_for_step,
)
result = fut.result(timeout=15)
send_ok = bool(getattr(result, "success", False))
except Exception as exc:
logger.warning("Clarify send failed: %s", exc)
send_ok = False
if not send_ok:
# Couldn't deliver the prompt — clean up and return
# sentinel so the agent can fall back to a sensible
# default rather than hanging.
_clarify_mod.clear_session(session_key or "")
return "[clarify prompt could not be delivered]"
timeout = _clarify_mod.get_clarify_timeout()
response = _clarify_mod.wait_for_response(clarify_id, timeout=float(timeout))
if response is None or response == "":
# Timeout or session-boundary cancellation
return f"[user did not respond within {int(timeout / 60)}m]"
return response
agent.clarify_callback = _clarify_callback_sync
# Store agent reference for interrupt support
agent_holder[0] = agent
# Capture the full tool definitions for transcript logging
@ -15204,6 +15329,14 @@ class GatewayRunner:
result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
finally:
unregister_gateway_notify(_approval_session_key)
# Cancel any pending clarify entries so blocked agent
# threads don't hang past the end of the run (interrupt,
# completion, gateway shutdown). Idempotent.
try:
from tools.clarify_gateway import clear_session as _clear_clarify_session
_clear_clarify_session(_approval_session_key)
except Exception:
pass
reset_current_session_key(_approval_session_token)
result_holder[0] = result

View file

@ -124,16 +124,33 @@ def get_process_start_time(pid: int) -> Optional[int]:
def _read_process_cmdline(pid: int) -> Optional[str]:
"""Return the process command line as a space-separated string."""
"""Return the process command line as a space-separated string.
On Linux, reads /proc/<pid>/cmdline directly. On macOS and other
platforms without /proc, falls back to ``ps -p <pid> -o command=``.
"""
cmdline_path = Path(f"/proc/{pid}/cmdline")
try:
raw = cmdline_path.read_bytes()
except (FileNotFoundError, PermissionError, OSError):
return None
pass
else:
if raw:
return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
if not raw:
return None
return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
try:
result = subprocess.run(
["ps", "-p", str(pid), "-o", "command="],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0 and result.stdout.strip():
return result.stdout.strip()
except (OSError, subprocess.TimeoutExpired):
pass
return None
def _looks_like_gateway_process(pid: int) -> bool:
@ -594,6 +611,22 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
and current_start != existing.get("start_time")
):
stale = True
# When start_time comparison is unavailable (macOS / Windows
# have no /proc, so both sides are None), fall back to
# checking the live process command line. When cmdline is
# also unreadable (Windows has no ps), consult the lock
# record's own argv — the gateway writes it at startup and
# it's the only identity signal on platforms without ps.
# Both oracles must indicate "not a gateway" to mark stale.
if (
not stale
and existing.get("start_time") is None
and current_start is None
and not _looks_like_gateway_process(existing_pid)
):
live_cmdline = _read_process_cmdline(existing_pid)
if live_cmdline is not None or not _record_looks_like_gateway(existing):
stale = True
# Check if process is stopped (Ctrl+Z / SIGTSTP) — stopped
# processes still appear alive to _pid_exists but are not
# actually running. Treat them as stale so --replace works.

View file

@ -4046,6 +4046,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
return get_qwen_auth_status()
if target == "google-gemini-cli":
return get_gemini_oauth_auth_status()
if target == "minimax-oauth":
return get_minimax_oauth_auth_status()
if target == "copilot-acp":
return get_external_process_provider_status(target)
# API-key providers
@ -4757,6 +4759,20 @@ def _minimax_request_user_code(
return payload
def _minimax_expired_in_looks_like_unix_ms(expired_in: int, *, now_ms: int) -> bool:
"""True if ``expired_in`` is plausibly a unix-ms absolute time (vs TTL seconds)."""
return int(expired_in) > (now_ms // 2)
def _minimax_resolve_token_expiry_unix(expired_in: int, *, now: datetime) -> float:
"""Return access-token expiry as unix seconds (MiniMax uses ms epoch or TTL seconds)."""
raw = int(expired_in)
now_ms = int(now.timestamp() * 1000)
if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
return raw / 1000.0
return now.timestamp() + max(1, raw)
def _minimax_poll_token(
client: httpx.Client, *, portal_base_url: str, client_id: str,
user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
@ -4765,12 +4781,11 @@ def _minimax_poll_token(
# Defensive parsing: if it's small enough to be a duration, treat as seconds.
import time as _time
now_ms = int(_time.time() * 1000)
if expired_in > now_ms // 2:
# Looks like a unix-ms timestamp.
deadline = expired_in / 1000.0
raw = int(expired_in)
if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
deadline = raw / 1000.0
else:
# Treat as duration in seconds from now.
deadline = _time.time() + max(1, expired_in)
deadline = _time.time() + max(1, raw)
interval = max(2.0, (interval_ms or 2000) / 1000.0)
while _time.time() < deadline:
@ -4884,8 +4899,10 @@ def _minimax_oauth_login(
)
now = datetime.now(timezone.utc)
expires_in_s = int(token_data["expired_in"])
expires_at = now.timestamp() + expires_in_s
expires_at_unix = _minimax_resolve_token_expiry_unix(
int(token_data["expired_in"]), now=now,
)
expires_in_s = max(0, int(expires_at_unix - now.timestamp()))
auth_state = {
"provider": "minimax-oauth",
@ -4899,7 +4916,7 @@ def _minimax_oauth_login(
"refresh_token": token_data["refresh_token"],
"resource_url": token_data.get("resource_url"),
"obtained_at": now.isoformat(),
"expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
"expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
"expires_in": expires_in_s,
}
@ -4960,14 +4977,16 @@ def _refresh_minimax_oauth_state(
relogin_required=True,
)
now_dt = datetime.now(timezone.utc)
expires_in_s = int(payload["expired_in"])
expires_at_unix = _minimax_resolve_token_expiry_unix(
int(payload["expired_in"]), now=now_dt,
)
expires_in_s = max(0, int(expires_at_unix - now_dt.timestamp()))
new_state = dict(state)
new_state.update({
"access_token": payload["access_token"],
"refresh_token": payload.get("refresh_token", state["refresh_token"]),
"obtained_at": now_dt.isoformat(),
"expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
tz=timezone.utc).isoformat(),
"expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
"expires_in": expires_in_s,
})
_minimax_save_auth_state(new_state)
@ -5252,6 +5271,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
get_curated_nous_model_ids, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
union_with_portal_free_recommendations,
union_with_portal_paid_recommendations,
)
model_ids = get_curated_nous_model_ids()
@ -5260,19 +5280,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
if model_ids:
pricing = get_pricing_for_provider("nous")
free_tier = check_nous_free_tier()
_portal_for_recs = auth_state.get("portal_base_url", "")
if free_tier:
# The Portal's freeRecommendedModels endpoint is the
# source of truth for what's free *right now*. Augment
# the curated list with anything new the Portal flags
# as free so users on older Hermes builds still see
# newly-launched free models without a CLI release.
_portal_for_recs = auth_state.get("portal_base_url", "")
model_ids, pricing = union_with_portal_free_recommendations(
model_ids, pricing, _portal_for_recs,
)
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True,
)
else:
# Paid-tier mirror: pull paidRecommendedModels so newly
# launched paid models surface in the picker even if
# the in-repo curated list and docs-hosted manifest
# haven't caught up yet.
model_ids, pricing = union_with_portal_paid_recommendations(
model_ids, pricing, _portal_for_recs,
)
_portal = auth_state.get("portal_base_url", "")
if model_ids:
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")

View file

@ -375,10 +375,12 @@ def auth_add_command(args) -> None:
return
if provider == "minimax-oauth":
from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
creds = resolve_minimax_oauth_runtime_credentials()
creds = auth_mod._minimax_oauth_login(
open_browser=not getattr(args, "no_browser", False),
timeout_seconds=getattr(args, "timeout", None) or 15.0,
)
label = (getattr(args, "label", None) or "").strip() or label_from_token(
creds["api_key"],
creds["access_token"],
_oauth_default_label(provider, len(pool.entries()) + 1),
)
entry = PooledCredential(
@ -388,8 +390,9 @@ def auth_add_command(args) -> None:
auth_type=AUTH_TYPE_OAUTH,
priority=0,
source=f"{SOURCE_MANUAL}:minimax_oauth",
access_token=creds["api_key"],
base_url=creds.get("base_url"),
access_token=creds["access_token"],
refresh_token=creds.get("refresh_token"),
base_url=creds.get("inference_base_url"),
)
pool.add_entry(entry)
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')

View file

@ -468,20 +468,23 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
Telegram command names cannot contain hyphens, so they are replaced with
underscores. Aliases are skipped -- Telegram shows one menu entry per
canonical command. Commands that require arguments are skipped because
selecting a Telegram BotCommand sends only ``/command`` and would execute
an incomplete command.
canonical command.
Plugin-registered slash commands are included so plugins get native
autocomplete in Telegram without touching core code.
Built-in commands that require arguments (e.g. /queue, /steer, /background)
are **included** because their handlers return usage text when selected
without a payload, making them discoverable via autocomplete.
Plugin-registered slash commands that require arguments are **excluded**
because plugins may not provide a no-arg usage fallback.
"""
overrides = _resolve_config_gates()
result: list[tuple[str, str]] = []
for cmd in COMMAND_REGISTRY:
if not _is_gateway_available(cmd, overrides):
continue
if _requires_argument(cmd.args_hint):
continue
# Built-in arg-taking commands are included — their handlers show
# usage text when invoked without arguments, and hiding them from
# the menu hurts discoverability (issue #24312).
tg_name = _sanitize_telegram_name(cmd.name)
if tg_name:
result.append((tg_name, cmd.description))
@ -1359,9 +1362,9 @@ class SlashCommandCompleter(Completer):
try:
proc = subprocess.run(
cmd, capture_output=True, text=True, timeout=2,
cwd=cwd,
cwd=cwd, encoding="utf-8", errors="replace",
)
if proc.returncode == 0 and proc.stdout.strip():
if proc.returncode == 0 and proc.stdout and proc.stdout.strip():
raw = proc.stdout.strip().split("\n")
# Store relative paths
for p in raw[:5000]:

View file

@ -477,6 +477,12 @@ DEFAULT_CONFIG = {
# threshold before escalating to a full timeout. The warning fires
# once per run and does not interrupt the agent. 0 = disable warning.
"gateway_timeout_warning": 900,
# Maximum time (seconds) the gateway will block an agent waiting for
# a clarify-tool response from the user. Hit this and the agent
# unblocks with "[user did not respond within Xm]" so it can adapt
# rather than pinning the running-agent guard forever. CLI clarify
# blocks indefinitely (input() is synchronous) and ignores this.
"clarify_timeout": 600,
# Periodic "still working" notification interval (seconds).
# Sends a status message every N seconds so the user knows the
# agent hasn't died during long tasks. 0 = disable notifications.
@ -628,6 +634,12 @@ DEFAULT_CONFIG = {
# so the server maps it to a persistent Firefox profile automatically.
# When false (default), each session gets a random userId (ephemeral).
"managed_persistence": False,
# Optional externally managed Camofox identity. Useful when another
# app owns the visible browser and Hermes should operate in it.
"user_id": "",
"session_key": "",
# Rehydrate tab_id from Camofox before creating a new tab.
"adopt_existing_tab": False,
},
},
@ -917,6 +929,14 @@ DEFAULT_CONFIG = {
"persistent_output": True,
"persistent_output_max_lines": 200,
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
# File-mutation verifier footer. When true (default), the agent
# appends a one-line advisory to its final response whenever a
# write_file / patch call failed during the turn and was never
# superseded by a successful write to the same path. This catches
# the "batch of parallel patches, half fail, model claims success"
# class of over-claim that otherwise forces users to run
# `git status` to verify edits landed. Set false to suppress.
"file_mutation_verifier": True,
"show_cost": False, # Show $ cost in the status bar (off by default)
"skin": "default",
# UI language for static user-facing messages (approval prompts, a
@ -1338,6 +1358,21 @@ DEFAULT_CONFIG = {
"domains": [],
"shared_files": [],
},
# Acknowledged supply-chain security advisories. Each entry is the
# ID of an advisory the user has read and acted on (uninstalled the
# compromised package, rotated credentials). Acked advisories no
# longer trigger the startup banner. Add via `hermes doctor --ack
# <id>`; remove by editing the list directly. See
# ``hermes_cli/security_advisories.py`` for the catalog.
"acked_advisories": [],
# Allow Hermes to lazy-install opt-in backend packages from PyPI
# the first time the user enables a backend that needs them
# (e.g. installing ``elevenlabs`` when the user picks ElevenLabs as
# their TTS provider). Set to false to require explicit
# ``pip install`` for everything beyond the base set — appropriate
# for restricted networks, audited environments, or air-gapped
# systems where any runtime install is unacceptable.
"allow_lazy_installs": True,
},
"cron": {
@ -1476,6 +1511,53 @@ DEFAULT_CONFIG = {
"backup_keep": 5,
},
# Language Server Protocol — semantic diagnostics from real
# language servers (pyright, gopls, rust-analyzer, etc.) wired
# into the post-write lint check used by ``write_file`` and
# ``patch``.
#
# LSP is gated on git-workspace detection: when the agent's
# cwd (or the file being edited) is inside a git worktree, LSP
# runs against that workspace. When neither is in a git repo,
# LSP stays dormant and the in-process syntax check is the only
# tier — handy for Telegram/Discord chats where the cwd is the
# user's home directory.
"lsp": {
# Master toggle. Setting this to false disables the entire
# subsystem — no servers spawn, no background event loop, no
# cost.
"enabled": True,
# Diagnostic-wait mode for the post-write check.
# ``"document"`` waits up to ``wait_timeout`` seconds for the
# current file's diagnostics; ``"full"`` additionally requests
# workspace-wide diagnostics (slower).
"wait_mode": "document",
"wait_timeout": 5.0,
# How to handle missing server binaries.
# ``"auto"`` — try to install via npm/go/pip into
# ``<HERMES_HOME>/lsp/bin/`` on first use.
# ``"manual"`` — only use binaries already on PATH.
# ``"off"`` — alias for ``manual``.
"install_strategy": "auto",
# Per-server overrides. Each key is a server_id from the
# registry (``pyright``, ``typescript``, ``gopls``,
# ``rust-analyzer``, etc.) and accepts:
# disabled: true
# — skip this server even when its extensions match
# command: ["full/path/to/server", "--stdio"]
# — pin a custom binary path; bypasses auto-install
# env: {"KEY": "value"}
# — extra env vars passed to the spawned process
# initialization_options: {...}
# — merged into the LSP ``initializationOptions``
# Empty by default; the registry defaults work for typical
# setups.
"servers": {},
},
# Config schema version - bump this when adding new required fields
"_config_version": 23,
}

View file

@ -287,7 +287,8 @@ def _build_apikey_providers_list() -> list:
(_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
if _pp.base_url else None
)
_static.append((_label, _key_vars, _models_url, _base_var, True))
_hc = getattr(_pp, "supports_health_check", True)
_static.append((_label, _key_vars, _models_url, _base_var, _hc))
except Exception:
pass
return _static
@ -296,19 +297,101 @@ def _build_apikey_providers_list() -> list:
def run_doctor(args):
"""Run diagnostic checks."""
should_fix = getattr(args, 'fix', False)
ack_target = getattr(args, 'ack', None)
# Doctor runs from the interactive CLI, so CLI-gated tool availability
# checks (like cronjob management) should see the same context as `hermes`.
os.environ.setdefault("HERMES_INTERACTIVE", "1")
# Handle `hermes doctor --ack <id>` as a fast path. Persist the ack and
# return without running the rest of the diagnostics — the user has
# already seen the advisory and just wants to silence it.
if ack_target:
from hermes_cli.security_advisories import (
ADVISORIES,
ack_advisory,
)
valid_ids = {a.id for a in ADVISORIES}
if ack_target not in valid_ids:
print(color(
f"Unknown advisory ID: {ack_target!r}. Known IDs: "
f"{', '.join(sorted(valid_ids)) or '(none)'}",
Colors.RED,
))
sys.exit(2)
if ack_advisory(ack_target):
print(color(
f" ✓ Acknowledged advisory {ack_target}. "
f"It will no longer trigger startup banners.",
Colors.GREEN,
))
else:
print(color(
f" ✗ Failed to persist ack for {ack_target}. "
f"Check ~/.hermes/config.yaml is writable.",
Colors.RED,
))
sys.exit(1)
return
issues = []
manual_issues = [] # issues that can't be auto-fixed
fixed_count = 0
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
print(color("│ 🩺 Hermes Doctor │", Colors.CYAN))
print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
# =========================================================================
# Check: Security advisories (RUNS FIRST — these are the most urgent)
# =========================================================================
print()
print(color("◆ Security Advisories", Colors.CYAN, Colors.BOLD))
try:
from hermes_cli.security_advisories import (
detect_compromised,
filter_unacked,
full_remediation_text,
get_acked_ids,
)
all_hits = detect_compromised()
fresh_hits = filter_unacked(all_hits)
if fresh_hits:
for hit in fresh_hits:
check_fail(
f"{hit.advisory.title}",
f"({hit.package}=={hit.installed_version})",
)
# Print the full remediation block, indented under the
# check_fail header so it reads as a single section.
for line in full_remediation_text(hit):
if line:
print(f" {color(line, Colors.YELLOW)}")
else:
print()
# Funnel into the action list so the summary block surfaces it
# for users who scroll past the section.
manual_issues.append(
f"Resolve security advisory {hit.advisory.id}: "
f"uninstall {hit.package}=={hit.installed_version} and "
f"rotate credentials, then run "
f"`hermes doctor --ack {hit.advisory.id}`."
)
# Acked-but-still-installed: show as informational so the user
# knows the package is still on disk after the ack.
acked_ids = get_acked_ids()
for h in all_hits:
if h.advisory.id in acked_ids:
check_warn(
f"{h.package}=={h.installed_version} still installed "
f"(advisory {h.advisory.id} acknowledged)",
)
else:
check_ok("No active security advisories")
except Exception as e:
# Never let a bug in the advisory check block the rest of doctor.
check_warn(f"Security advisory check failed: {e}")
# =========================================================================
# Check: Python version

View file

@ -2164,7 +2164,7 @@ Environment="PATH={sane_path}"
Environment="VIRTUAL_ENV={venv_dir}"
Environment="HERMES_HOME={hermes_home}"
Restart=always
RestartSec=60
RestartSec=5
RestartMaxDelaySec=300
RestartSteps=5
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
@ -2199,7 +2199,7 @@ Environment="PATH={sane_path}"
Environment="VIRTUAL_ENV={venv_dir}"
Environment="HERMES_HOME={hermes_home}"
Restart=always
RestartSec=60
RestartSec=5
RestartMaxDelaySec=300
RestartSteps=5
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
@ -3658,6 +3658,15 @@ def _all_platforms() -> list[dict]:
``hermes setup gateway`` without needing the gateway to be running.
Built-ins keep their dict shape; plugin entries are adapted to the same
shape with ``_registry_entry`` holding the source.
Platform-specific gating: some platforms can't be configured on
every host. Currently:
- Matrix is hidden on Windows. The [matrix] extra pulls
``mautrix[encryption]`` -> ``python-olm``, which has no Windows
wheel and needs ``make`` + libolm to build from sdist. There's
no native Windows path that works, so we don't offer it in the
picker. Users who want Matrix on Windows can run hermes under
WSL.
"""
# Populate the registry so plugin platforms are visible. Idempotent.
# Bundled platform plugins (``kind: platform``) auto-load unconditionally,
@ -3671,6 +3680,11 @@ def _all_platforms() -> list[dict]:
logger.debug("plugin discovery failed during platform enumeration: %s", e)
platforms = [dict(p) for p in _PLATFORMS]
# Drop platforms that can't function on this host. See docstring.
if sys.platform == "win32":
platforms = [p for p in platforms if p.get("key") != "matrix"]
by_key = {p["key"]: p for p in platforms}
try:

View file

@ -2607,6 +2607,7 @@ def _model_flow_nous(config, current_model="", args=None):
check_nous_free_tier,
partition_nous_models_by_tier,
union_with_portal_free_recommendations,
union_with_portal_paid_recommendations,
)
model_ids = get_curated_nous_model_ids()
@ -2662,6 +2663,10 @@ def _model_flow_nous(config, current_model="", args=None):
# with the Portal's freeRecommendedModels list so newly-launched free
# models show up even if this CLI build's hardcoded curated list and
# docs-hosted manifest haven't caught up yet.
#
# For paid users: mirror the same idea with paidRecommendedModels so
# newly-launched paid models surface in the picker too — independent
# of CLI release cadence.
unavailable_models: list[str] = []
if free_tier:
model_ids, pricing = union_with_portal_free_recommendations(
@ -2670,6 +2675,10 @@ def _model_flow_nous(config, current_model="", args=None):
model_ids, unavailable_models = partition_nous_models_by_tier(
model_ids, pricing, free_tier=True
)
else:
model_ids, pricing = union_with_portal_paid_recommendations(
model_ids, pricing, _nous_portal_url,
)
if not model_ids and not unavailable_models:
print("No models available for Nous Portal after filtering.")
@ -9412,7 +9421,7 @@ def main():
gateway_parser = subparsers.add_parser(
"gateway",
help="Messaging gateway management",
description="Manage the messaging gateway (Telegram, Discord, WhatsApp)",
description="Manage the messaging gateway (Telegram, Discord, WhatsApp, Weixin, and more)",
)
gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
@ -9555,6 +9564,17 @@ def main():
gateway_parser.set_defaults(func=cmd_gateway)
# =========================================================================
# lsp command
# =========================================================================
try:
from agent.lsp.cli import register_subparser as _lsp_register
_lsp_register(subparsers)
except Exception as _lsp_err: # noqa: BLE001
# LSP is optional infrastructure — never let a registration
# failure break the CLI overall.
logger.debug("LSP CLI registration failed: %s", _lsp_err)
# =========================================================================
# setup command
# =========================================================================
@ -10117,6 +10137,16 @@ def main():
doctor_parser.add_argument(
"--fix", action="store_true", help="Attempt to fix issues automatically"
)
doctor_parser.add_argument(
"--ack",
metavar="ADVISORY_ID",
default=None,
help=(
"Acknowledge a security advisory by ID and exit. After ack, the "
"advisory will no longer trigger startup banners. Run `hermes "
"doctor` first to see active advisories and their IDs."
),
)
doctor_parser.set_defaults(func=cmd_doctor)
# =========================================================================

View file

@ -621,6 +621,71 @@ def union_with_portal_free_recommendations(
return (augmented_ids, augmented_pricing)
def union_with_portal_paid_recommendations(
curated_ids: list[str],
pricing: dict[str, dict[str, str]],
portal_base_url: str = "",
*,
force_refresh: bool = False,
) -> tuple[list[str], dict[str, dict[str, str]]]:
"""Augment curated list with the Portal's ``paidRecommendedModels``.
Mirror of :func:`union_with_portal_free_recommendations` for paid-tier
users. The Portal's ``/api/nous/recommended-models`` endpoint advertises
which paid models are blessed *right now* independent of what the
in-repo ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether
the docs-hosted catalog manifest has been rebuilt since the last release.
For paid-tier users this lets newly-launched paid models surface in the
picker even if the user is running an older Hermes that doesn't ship
them in its hardcoded curated list. This function returns an augmented
``(model_ids, pricing)`` pair where:
* Portal paid recommendations missing from ``curated_ids`` are
appended at the front (so the picker shows them first).
* ``pricing`` is left untouched we deliberately do NOT synthesize
pricing entries for paid models. Live pricing is fetched separately
via :func:`get_pricing_for_provider`; if the live endpoint hasn't
published pricing yet, the picker shows a blank price column rather
than fabricating numbers. (The free helper synthesizes ``$0`` so
:func:`partition_nous_models_by_tier` keeps free models selectable;
no equivalent gating applies on the paid side, so synthesis would
only mislead the user.)
Failures (network, parse, missing field) are silent and degrade to
returning the inputs unchanged never block the picker on a
Portal-side hiccup.
"""
try:
payload = fetch_nous_recommended_models(
portal_base_url, force_refresh=force_refresh
)
except Exception:
return (list(curated_ids), dict(pricing))
paid_block = payload.get("paidRecommendedModels") if isinstance(payload, dict) else None
if not isinstance(paid_block, list) or not paid_block:
return (list(curated_ids), dict(pricing))
portal_paid_ids: list[str] = []
for entry in paid_block:
name = _extract_model_name(entry)
if name:
portal_paid_ids.append(name)
if not portal_paid_ids:
return (list(curated_ids), dict(pricing))
augmented_ids = list(curated_ids)
seen = set(augmented_ids)
# Prepend Portal paid recommendations that aren't already curated, so
# the Portal-blessed picks surface first in the picker.
new_ones = [mid for mid in portal_paid_ids if mid not in seen]
if new_ones:
augmented_ids = new_ones + augmented_ids
return (augmented_ids, dict(pricing))
# ---------------------------------------------------------------------------
# TTL cache for free-tier detection — avoids repeated API calls within a
# session while still picking up upgrades quickly.

View file

@ -205,6 +205,14 @@ def _resolve_runtime_from_pool_entry(
elif provider == "google-gemini-cli":
api_mode = "chat_completions"
base_url = base_url or "cloudcode-pa://google"
elif provider == "minimax-oauth":
# MiniMax OAuth tokens are valid only against the Anthropic Messages
# compatible endpoint. Do not honor stale model.api_mode values from a
# prior OpenAI-compatible provider, or the client will hit
# /chat/completions under /anthropic and receive a bare nginx 404.
api_mode = "anthropic_messages"
pconfig = PROVIDER_REGISTRY.get(provider)
base_url = base_url or (pconfig.inference_base_url if pconfig else "")
elif provider == "anthropic":
api_mode = "anthropic_messages"
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()

View file

@ -0,0 +1,451 @@
"""
Security advisory checker for Hermes Agent.
Detects known-compromised Python packages installed in the active venv
(supply-chain attacks like the Mini Shai-Hulud worm of May 2026 that
poisoned ``mistralai 2.4.6`` on PyPI) and surfaces remediation guidance to
the user.
Design goals:
- **Cheap.** A single ``importlib.metadata.version()`` call per advisory
package. Safe to run on every CLI startup.
- **Loud when it matters, silent otherwise.** If no compromised package is
installed, the user sees nothing.
- **Acknowledgeable.** Once the user has read and acted on an advisory they
can dismiss it via ``hermes doctor --ack <id>``; the ack is persisted to
``config.security.acked_advisories`` and survives restart.
- **Extensible.** Adding a new advisory is one entry in ``ADVISORIES``;
adding a new compromised version is a one-line edit. No code changes
needed when the next worm hits.
The check is invoked from three places:
1. ``hermes doctor`` (and ``hermes doctor --ack <id>``)
2. CLI startup banner (one short line, then full guidance via
``hermes doctor``)
3. Gateway startup (logged to gateway.log; first interactive message gets
a one-line operator banner)
This module is intentionally dependency-free beyond the stdlib so it can
run in environments where the rest of Hermes failed to import.
"""
from __future__ import annotations
import logging
import os
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable, Optional
logger = logging.getLogger(__name__)
# =============================================================================
# Advisory catalog
#
# Each advisory is a community-facing security warning about one or more
# specific package versions that are known to be compromised. To add a new
# advisory:
#
# 1. Append a new ``Advisory`` to ``ADVISORIES`` below
# 2. Set ``compromised`` to a tuple of ``(pkg_name, frozenset_of_versions)``
# — version strings must match what ``importlib.metadata.version()``
# returns. Use an empty frozenset to flag *any installed version*
# (rare; only when the maintainer namespace itself is compromised).
# 3. Write 2-4 short ``remediation`` lines a non-expert can copy/paste.
#
# Do NOT remove old advisories. Once an advisory ships, leave it in place so
# users running an older release with the compromised package still get
# warned. Mark superseded ones via ``superseded_by`` if needed.
# =============================================================================
@dataclass(frozen=True)
class Advisory:
"""One security advisory entry.
Attributes:
id: stable identifier used for acks (e.g. ``shai-hulud-2026-05``).
Lowercase-hyphen, never reused.
title: one-line headline shown in banners.
summary: 1-3 sentence description of what was compromised and how.
url: reference URL (Socket advisory, GitHub advisory, PyPI page).
compromised: tuple of ``(package_name, frozenset_of_versions)``
pairs. Empty frozenset means "any version of this package is
considered suspect" — use sparingly.
remediation: ordered list of steps the user should take. First step
should be the uninstall command; subsequent steps the credential
audit / rotation guidance.
published: ISO date string for sort order.
"""
id: str
title: str
summary: str
url: str
compromised: tuple[tuple[str, frozenset[str]], ...]
remediation: tuple[str, ...]
published: str = ""
severity: str = "high" # low / medium / high / critical
ADVISORIES: tuple[Advisory, ...] = (
Advisory(
id="shai-hulud-2026-05",
title="Mini Shai-Hulud worm — mistralai 2.4.6 compromised on PyPI",
summary=(
"PyPI quarantined the mistralai package on 2026-05-12 after a "
"malicious 2.4.6 release. The worm steals credentials from "
"environment variables and credential files (~/.npmrc, ~/.pypirc, "
"~/.aws/credentials, GitHub PATs, cloud SDK tokens) and exfils "
"them to a hardcoded webhook. If you ran any Python process that "
"imported mistralai 2.4.6 — including hermes when configured "
"with provider=mistral for TTS or STT — assume those credentials "
"are exposed."
),
url="https://socket.dev/blog/mini-shai-hulud-worm-pypi",
compromised=(
("mistralai", frozenset({"2.4.6"})),
),
remediation=(
"Run: pip uninstall -y mistralai (or: uv pip uninstall mistralai)",
"Rotate API keys in ~/.hermes/.env (OpenRouter, Anthropic, OpenAI, "
"Nous, GitHub, AWS, Google, Mistral, etc.).",
"Audit ~/.npmrc, ~/.pypirc, ~/.aws/credentials, ~/.config/gh/hosts.yml, "
"and any other credential files for tokens that may have been read.",
"Check GitHub for unexpected new SSH keys, deploy keys, or webhook "
"additions on repos you have admin on.",
"After cleanup: hermes doctor --ack shai-hulud-2026-05 to dismiss "
"this warning.",
),
published="2026-05-12",
severity="critical",
),
)
# =============================================================================
# Detection
# =============================================================================
@dataclass(frozen=True)
class AdvisoryHit:
"""One package-version match against an advisory."""
advisory: Advisory
package: str
installed_version: str
def _installed_version(pkg_name: str) -> Optional[str]:
"""Return the installed version of ``pkg_name``, or None if not installed.
Uses ``importlib.metadata`` so we don't depend on pip being importable
inside the active venv (uv-created venvs may lack pip).
"""
try:
from importlib.metadata import PackageNotFoundError, version
except ImportError: # py<3.8 — Hermes requires 3.10+ but defensive.
return None
try:
return version(pkg_name)
except PackageNotFoundError:
return None
except Exception:
# Some metadata corruption modes raise ValueError or OSError. Don't
# let advisory checking crash the CLI startup path.
logger.debug("importlib.metadata.version(%s) raised", pkg_name, exc_info=True)
return None
def detect_compromised(
advisories: Iterable[Advisory] = ADVISORIES,
) -> list[AdvisoryHit]:
"""Scan installed packages and return all advisory hits.
A "hit" means an advisory's listed package is installed AND the version
is in the compromised set (or the compromised set is empty, meaning
*any* version is suspect).
"""
hits: list[AdvisoryHit] = []
for advisory in advisories:
for pkg_name, bad_versions in advisory.compromised:
installed = _installed_version(pkg_name)
if installed is None:
continue
if not bad_versions or installed in bad_versions:
hits.append(AdvisoryHit(
advisory=advisory,
package=pkg_name,
installed_version=installed,
))
return hits
# =============================================================================
# Acknowledgement persistence
#
# Acks live under ``security.acked_advisories`` in config.yaml as a list of
# advisory IDs. The list is the only state — no per-host data, no
# timestamps, no fingerprints. Users sharing a config.yaml across machines
# (rare but possible) get the same dismissal everywhere, which is the
# correct behavior for a global advisory.
# =============================================================================
def get_acked_ids() -> set[str]:
"""Return the set of advisory IDs the user has dismissed.
Returns an empty set if config can't be loaded (don't block startup
just because config is broken the advisory will keep firing until
config is repaired, which is fine).
"""
try:
from hermes_cli.config import load_config
cfg = load_config()
except Exception:
logger.debug("Could not load config for advisory acks", exc_info=True)
return set()
sec = cfg.get("security") or {}
raw = sec.get("acked_advisories") or []
if not isinstance(raw, list):
return set()
return {str(x).strip() for x in raw if str(x).strip()}
def ack_advisory(advisory_id: str) -> bool:
"""Persist an ack for ``advisory_id``. Returns True on success.
Idempotent acking an already-acked ID is a no-op.
"""
advisory_id = advisory_id.strip()
if not advisory_id:
return False
try:
from hermes_cli.config import load_config, save_config
except Exception:
logger.warning("Could not import config module to persist ack")
return False
try:
cfg = load_config()
sec = cfg.setdefault("security", {})
existing = sec.get("acked_advisories") or []
if not isinstance(existing, list):
existing = []
if advisory_id not in existing:
existing.append(advisory_id)
sec["acked_advisories"] = existing
save_config(cfg)
return True
except Exception:
logger.exception("Failed to persist advisory ack for %s", advisory_id)
return False
def filter_unacked(hits: list[AdvisoryHit]) -> list[AdvisoryHit]:
"""Return only hits whose advisories the user has not dismissed."""
if not hits:
return []
acked = get_acked_ids()
return [h for h in hits if h.advisory.id not in acked]
# =============================================================================
# Rendering helpers
# =============================================================================
def _term_supports_color() -> bool:
if os.environ.get("NO_COLOR"):
return False
if not sys.stdout.isatty():
return False
return True
def short_banner_lines(hits: list[AdvisoryHit]) -> list[str]:
"""Return 1-3 short lines suitable for a startup banner.
Caller is responsible for color/styling. Always names the worst hit
explicitly so the user knows what's wrong without running doctor.
"""
if not hits:
return []
primary = hits[0]
lines = [
f"SECURITY ADVISORY [{primary.advisory.id}]: {primary.advisory.title}",
f" Detected: {primary.package}=={primary.installed_version}",
" Run 'hermes doctor' for remediation steps.",
]
if len(hits) > 1:
lines.insert(1, f" ({len(hits) - 1} additional advisor"
f"{'ies' if len(hits) > 2 else 'y'} also active.)")
return lines
def full_remediation_text(hit: AdvisoryHit) -> list[str]:
"""Return a multi-line block describing the advisory + remediation."""
a = hit.advisory
lines = [
f"=== {a.title} ===",
f"ID: {a.id} Severity: {a.severity} Published: {a.published}",
f"Detected: {hit.package}=={hit.installed_version}",
f"Reference: {a.url}",
"",
a.summary,
"",
"Remediation:",
]
for i, step in enumerate(a.remediation, 1):
lines.append(f" {i}. {step}")
return lines
# =============================================================================
# Startup-banner gating
#
# We do NOT want to hammer the user with the banner on every command. Once
# they've seen it inside a 24h window we cache that fact in
# ``~/.hermes/cache/advisory_banner_seen`` (a single line per advisory ID:
# ``<id> <iso8601_timestamp>``).
#
# Acked advisories never re-banner. Cached-but-not-acked advisories
# re-banner after 24h so the user doesn't fully forget.
# =============================================================================
_BANNER_CACHE_FILE = "advisory_banner_seen"
_BANNER_REPEAT_HOURS = 24
def _banner_cache_path() -> Optional[Path]:
try:
from hermes_constants import get_hermes_home
cache_dir = Path(get_hermes_home()) / "cache"
cache_dir.mkdir(parents=True, exist_ok=True)
return cache_dir / _BANNER_CACHE_FILE
except Exception:
return None
def _read_banner_cache() -> dict[str, float]:
p = _banner_cache_path()
if p is None or not p.exists():
return {}
out: dict[str, float] = {}
try:
for line in p.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
parts = line.split(None, 1)
if len(parts) != 2:
continue
advisory_id, ts = parts
try:
out[advisory_id] = float(ts)
except ValueError:
continue
except Exception:
return {}
return out
def _write_banner_cache(seen: dict[str, float]) -> None:
p = _banner_cache_path()
if p is None:
return
try:
lines = [f"{aid} {ts}" for aid, ts in seen.items()]
p.write_text("\n".join(lines) + "\n", encoding="utf-8")
except Exception:
logger.debug("Could not write advisory banner cache", exc_info=True)
def hits_due_for_banner(
hits: list[AdvisoryHit],
*,
repeat_hours: int = _BANNER_REPEAT_HOURS,
) -> list[AdvisoryHit]:
"""Return only hits whose banner is due (not acked, not recently shown).
Side effect: stamps the banner cache for any hit that's about to be
shown. Callers should subsequently render the result.
"""
import time
fresh = filter_unacked(hits)
if not fresh:
return []
now = time.time()
cache = _read_banner_cache()
cutoff = now - (repeat_hours * 3600)
due: list[AdvisoryHit] = []
for hit in fresh:
last = cache.get(hit.advisory.id, 0.0)
if last < cutoff:
due.append(hit)
cache[hit.advisory.id] = now
if due:
_write_banner_cache(cache)
return due
# =============================================================================
# Public entry points used by doctor / CLI / gateway
# =============================================================================
def render_doctor_section(hits: list[AdvisoryHit]) -> tuple[bool, list[str]]:
"""Render the security-advisory section for ``hermes doctor``.
Returns ``(has_problems, lines)``. Caller is responsible for printing
with whatever color scheme it uses.
"""
fresh = filter_unacked(hits)
if not fresh:
return False, ["No active security advisories. ✓"]
lines: list[str] = []
for i, hit in enumerate(fresh):
if i:
lines.append("")
lines.extend(full_remediation_text(hit))
return True, lines
def startup_banner(hits: list[AdvisoryHit]) -> Optional[str]:
"""Return a printable startup banner, or None if nothing is due.
Updates the banner cache as a side effect (so the next call within
24h returns None for the same hit).
"""
due = hits_due_for_banner(hits)
if not due:
return None
lines = short_banner_lines(due)
if _term_supports_color():
red = "\x1b[1;31m"
reset = "\x1b[0m"
return red + "\n".join(lines) + reset
return "\n".join(lines)
def gateway_log_message(hits: list[AdvisoryHit]) -> Optional[str]:
"""Return a one-line log message for gateway operators, or None."""
fresh = filter_unacked(hits)
if not fresh:
return None
if len(fresh) == 1:
h = fresh[0]
return (f"Security advisory [{h.advisory.id}] active: "
f"{h.package}=={h.installed_version} matches {h.advisory.title}. "
f"See {h.advisory.url}")
return (f"{len(fresh)} security advisories active "
f"(IDs: {', '.join(h.advisory.id for h in fresh)}). "
f"Run `hermes doctor` on the gateway host for details.")

View file

@ -205,15 +205,9 @@ TOOL_CATEGORIES = {
],
"tts_provider": "elevenlabs",
},
{
"name": "Mistral (Voxtral TTS)",
"badge": "paid",
"tag": "Multilingual, native Opus",
"env_vars": [
{"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
],
"tts_provider": "mistral",
},
# Mistral (Voxtral TTS) temporarily hidden — `mistralai` PyPI
# package is currently quarantined (malicious 2.4.6 release on
# 2026-05-12). Restore this entry once PyPI un-quarantines.
{
"name": "Google Gemini TTS",
"badge": "preview",

View file

@ -59,10 +59,22 @@ try:
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
except ImportError:
raise SystemExit(
"Web UI requires fastapi and uvicorn.\n"
f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
)
# First try lazy-installing the dashboard extras. Only the user actually
# running `hermes dashboard` needs fastapi+uvicorn; lazy install keeps
# them out of every other install path. After install, re-import.
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("tool.dashboard", prompt=False)
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
except Exception:
raise SystemExit(
"Web UI requires fastapi and uvicorn.\n"
f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
)
WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
_log = logging.getLogger(__name__)
@ -280,7 +292,9 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
"stt.provider": {
"type": "select",
"description": "Speech-to-text provider",
"options": ["local", "groq", "openai", "mistral", "xai", "elevenlabs"],
# "mistral" temporarily removed — mistralai PyPI package quarantined
# (malicious 2.4.6 release on 2026-05-12). Restore once available.
"options": ["local", "groq", "openai", "xai", "elevenlabs"],
},
"stt.elevenlabs.model_id": {
"type": "select",
@ -2808,6 +2822,7 @@ def _minimax_poller(session_id: str) -> None:
"""
from hermes_cli.auth import (
_minimax_poll_token,
_minimax_resolve_token_expiry_unix,
_minimax_save_auth_state,
MINIMAX_OAUTH_GLOBAL_INFERENCE,
MINIMAX_OAUTH_SCOPE,
@ -2845,8 +2860,10 @@ def _minimax_poller(session_id: str) -> None:
# dashboard path; cn-region operators can still use the CLI
# flow which supports `--region cn`.
now = datetime.now(timezone.utc)
expires_in_s = int(token_data["expired_in"])
expires_at_ts = now.timestamp() + expires_in_s
expires_at_ts = _minimax_resolve_token_expiry_unix(
int(token_data["expired_in"]), now=now,
)
expires_in_s = max(0, int(expires_at_ts - now.timestamp()))
auth_state = {
"provider": "minimax-oauth",
"region": sess.get("region", "global"),
@ -4802,6 +4819,9 @@ def _get_dashboard_plugins(force_rescan: bool = False) -> list:
global _dashboard_plugins_cache
if _dashboard_plugins_cache is None or force_rescan:
_dashboard_plugins_cache = _discover_dashboard_plugins()
elif _dashboard_plugins_cache:
if any(not Path(p["_dir"]).is_dir() for p in _dashboard_plugins_cache):
_dashboard_plugins_cache = _discover_dashboard_plugins()
return _dashboard_plugins_cache
@ -5213,11 +5233,33 @@ def start_server(
if open_browser:
import webbrowser
def _open():
time.sleep(1.0)
webbrowser.open(f"http://{host}:{port}")
# On headless Linux (no DISPLAY or WAYLAND_DISPLAY) some registered
# browsers are TUI programs (links, lynx, www-browser) that try to
# take over the terminal. That can send SIGHUP to the server process
# and cause an immediate exit even though uvicorn bound successfully.
# Skip the auto-open attempt on headless systems and let the user
# open the URL manually. macOS and Windows are always considered
# display-capable.
_has_display = (
sys.platform != "linux"
or bool(os.environ.get("DISPLAY"))
or bool(os.environ.get("WAYLAND_DISPLAY"))
)
threading.Thread(target=_open, daemon=True).start()
if _has_display:
def _open():
try:
time.sleep(1.0)
webbrowser.open(f"http://{host}:{port}")
except Exception:
pass
threading.Thread(target=_open, daemon=True).start()
else:
_log.debug(
"Skipping browser-open: no DISPLAY or WAYLAND_DISPLAY detected "
"(headless Linux). Pass --no-open to suppress this detection."
)
print(f" Hermes Web UI → http://{host}:{port}")
uvicorn.run(app, host=host, port=port, log_level="warning")

View file

@ -0,0 +1,14 @@
{
"name": "example",
"label": "Example",
"description": "Example dashboard plugin — used by test suite for auth coverage",
"icon": "Sparkles",
"version": "1.0.0",
"tab": {
"path": "/example",
"position": "after:skills"
},
"slots": [],
"entry": "dist/index.js",
"api": "plugin_api.py"
}

View file

@ -0,0 +1,17 @@
"""Example dashboard plugin — backend API routes.
Mounted at /api/plugins/example/ by the dashboard plugin system.
This minimal plugin exists so the test suite has a stable, side-effect-free
GET endpoint to verify that plugin API routes work with auth.
"""
from fastapi import APIRouter
router = APIRouter()
@router.get("/hello")
async def hello():
"""Simple greeting endpoint to demonstrate plugin API routes."""
return {"message": "Hello from the example plugin!", "plugin": "example", "version": "1.0.0"}

View file

@ -875,6 +875,13 @@ class HindsightMemoryProvider(MemoryProvider):
"Hindsight local runtime is unavailable"
+ (f": {reason}" if reason else "")
)
try:
from tools.lazy_deps import ensure as _lazy_ensure
_lazy_ensure("memory.hindsight", prompt=False)
except ImportError:
pass
except Exception as _e:
raise ImportError(str(_e))
from hindsight import HindsightEmbedded
HindsightEmbedded.__del__ = lambda self: None
llm_provider = self._config.get("llm_provider", "")

View file

@ -687,12 +687,28 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
"For local instances, set HONCHO_BASE_URL instead."
)
# Lazy-install the honcho SDK on demand. ensure() honors
# security.allow_lazy_installs (default true). On failure we surface
# the original ImportError-shape message so existing callers still get
# the "go run hermes honcho setup" hint they used to.
try:
from tools.lazy_deps import FeatureUnavailable, ensure as _lazy_ensure
_lazy_ensure("memory.honcho", prompt=False)
except ImportError:
# lazy_deps module missing — fall through to the raw import below.
pass
except Exception:
# FeatureUnavailable or unexpected error. Don't crash here; let the
# actual import attempt produce the canonical error message.
pass
try:
from honcho import Honcho
except ImportError:
raise ImportError(
"honcho-ai is required for Honcho integration. "
"Install it with: pip install honcho-ai"
"Install it with: pip install honcho-ai "
"(or run `hermes honcho setup` to configure)."
)
# Allow config.yaml honcho.base_url to override the SDK's environment

View file

@ -8,6 +8,7 @@ xiaomi = ProviderProfile(
aliases=("mimo", "xiaomi-mimo"),
env_vars=("XIAOMI_API_KEY",),
base_url="https://api.xiaomimimo.com/v1",
supports_health_check=False, # /v1/models returns 401 even with valid key
)
register_provider(xiaomi)

View file

@ -40,6 +40,7 @@ class ProviderProfile:
base_url: str = ""
models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
supports_health_check: bool = True # False → doctor skips /models probe for this provider
# ── Model catalog ─────────────────────────────────────────
# fallback_models: curated list shown in /model picker when live fetch fails.

View file

@ -11,44 +11,55 @@ requires-python = ">=3.11"
authors = [{ name = "Nous Research" }]
license = { text = "MIT" }
dependencies = [
# Core — pinned to known-good ranges to limit supply chain attack surface
"openai>=2.21.0,<3",
"anthropic>=0.39.0,<1",
"python-dotenv>=1.2.1,<2",
"fire>=0.7.1,<1",
"httpx[socks]>=0.28.1,<1",
"rich>=14.3.3,<15",
"tenacity>=9.1.4,<10",
"pyyaml>=6.0.2,<7",
"ruamel.yaml>=0.18.16,<0.19",
"requests>=2.33.0,<3", # CVE-2026-25645
"jinja2>=3.1.5,<4",
"pydantic>=2.12.5,<3",
# Core — every direct dep is exact-pinned to ==X.Y.Z (no ranges).
# Rationale: ranges allow PyPI to ship a fresh version of a transitive
# at any time without a code review on our side. Exact pins mean the
# only way a new package version reaches a user is via an intentional
# update on our end (bump the pin in this file, regenerate uv.lock).
# This was tightened on 2026-05-12 in response to the Mini Shai-Hulud
# worm hitting mistralai 2.4.6 on PyPI; if that release had been
# captured by `mistralai>=2.3.0,<3` rather than an exact pin, every
# install in the hours before the quarantine would have pulled it.
#
# When updating: bump the version below AND regenerate uv.lock with
# `uv lock` so the transitive resolution stays consistent. Don't
# introduce ranges back without a written justification.
#
# Scope rule: only packages used by EVERY hermes session belong here.
# Anything that's provider-specific (`anthropic`, `firecrawl-py`,
# `exa-py`, `fal-client`, `edge-tts`, `parallel-web`) belongs in an
# extra and gets lazy-installed via `tools/lazy_deps.py` when the
# user picks that backend. Smaller `dependencies` = smaller blast
# radius for the next supply-chain attack.
"openai==2.24.0",
"python-dotenv==1.2.1",
"fire==0.7.1",
"httpx[socks]==0.28.1",
"rich==14.3.3",
"tenacity==9.1.4",
"pyyaml==6.0.3",
"ruamel.yaml==0.18.17",
"requests==2.33.0", # CVE-2026-25645
"jinja2==3.1.6",
"pydantic==2.12.5",
# Interactive CLI (prompt_toolkit is used directly by cli.py)
"prompt_toolkit>=3.0.52,<4",
# Tools
"exa-py>=2.9.0,<3",
"firecrawl-py>=4.16.0,<5",
"parallel-web>=0.4.2,<1",
"fal-client>=0.13.1,<1",
"prompt_toolkit==3.0.52",
# Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
"croniter>=6.0.0,<7",
# Text-to-speech (Edge TTS is free, no API key needed)
"edge-tts>=7.2.7,<8",
"croniter==6.0.0",
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
"PyJWT[crypto]>=2.12.0,<3", # CVE-2026-32597
"PyJWT[crypto]==2.12.1", # CVE-2026-32597
# Windows has no IANA tzdata shipped with the OS, so Python's ``zoneinfo``
# (PEP 615) raises ``ZoneInfoNotFoundError`` for every non-UTC timezone
# out of the box. ``tzdata`` ships the Olson database as a data package
# Python resolves automatically. No-op on Linux/macOS (which have
# /usr/share/zoneinfo). Credits: PR #13182 (@sprmn24).
"tzdata>=2023.3; sys_platform == 'win32'",
"tzdata==2025.3; sys_platform == 'win32'",
# Cross-platform process / PID management. `psutil` is the canonical
# answer for "is this PID alive" and process-tree walking across Linux,
# macOS and Windows. It replaces POSIX-only idioms like `os.kill(pid, 0)`
# (which is a silent killer on Windows — see CONTRIBUTING.md) and
# `os.killpg` (which doesn't exist on Windows).
"psutil>=5.9.0,<8",
"psutil==7.2.2",
"fastapi>=0.104.0,<1",
"uvicorn[standard]>=0.24.0,<1",
"ptyprocess>=0.7.0,<1; sys_platform != 'win32'",
@ -56,45 +67,78 @@ dependencies = [
]
[project.optional-dependencies]
modal = ["modal>=1.0.0,<2"]
daytona = ["daytona>=0.148.0,<1"]
vercel = ["vercel>=0.5.7,<0.6.0"]
hindsight = ["hindsight-client>=0.4.22"]
dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "pytest-split>=0.9,<1", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
# Native Anthropic provider — only needed when provider=anthropic (not via
# OpenRouter or other aggregators).
anthropic = ["anthropic==0.86.0"]
# Web search backends — each only loaded when the user picks it as their
# search provider (configured via `hermes tools` or config.yaml).
exa = ["exa-py==2.10.2"]
firecrawl = ["firecrawl-py==4.17.0"]
parallel-web = ["parallel-web==0.4.2"]
# Image generation backends
fal = ["fal-client==0.13.1"]
# Edge TTS — default TTS provider but still optional (users can pick
# ElevenLabs / OpenAI / MiniMax instead).
edge-tts = ["edge-tts==7.2.7"]
modal = ["modal==1.3.4"]
daytona = ["daytona==0.155.0"]
vercel = ["vercel==0.5.7"]
hindsight = ["hindsight-client==0.6.1"]
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
cron = [] # croniter is now a core dependency; this extra kept for back-compat
slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29", "aiohttp-socks>=0.10,<1"]
cli = ["simple-term-menu>=1.0,<2"]
tts-premium = ["elevenlabs>=1.0,<2"]
slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1"]
matrix = ["mautrix[encryption]==0.21.0", "Markdown==3.10.2", "aiosqlite==0.22.1", "asyncpg==0.31.0", "aiohttp-socks==0.11.0"]
cli = ["simple-term-menu==1.6.6"]
tts-premium = ["elevenlabs==1.59.0"]
voice = [
# Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
# so keep it out of the base install for source-build packagers like Homebrew.
"faster-whisper>=1.0.0,<2",
"sounddevice>=0.4.6,<1",
"numpy>=1.24.0,<3",
"faster-whisper==1.2.1",
"sounddevice==0.5.5",
"numpy==2.4.3",
]
pty = [
<<<<<<< HEAD
# Kept as a no-op back-compat alias — `ptyprocess` and `pywinpty` are now
# in the main `dependencies` list (with the same platform markers), so
# any existing `pip install hermes-agent[pty]` invocations resolve cleanly
# without pulling in extra packages.
=======
"ptyprocess==0.7.0; sys_platform != 'win32'",
"pywinpty==2.0.15; sys_platform == 'win32'",
>>>>>>> main
]
honcho = ["honcho-ai>=2.0.1,<3"]
mcp = ["mcp>=1.2.0,<2"]
homeassistant = ["aiohttp>=3.9.0,<4"]
sms = ["aiohttp>=3.9.0,<4"]
honcho = ["honcho-ai==2.0.1"]
mcp = ["mcp==1.26.0"]
homeassistant = ["aiohttp==3.13.3"]
sms = ["aiohttp==3.13.3"]
# Computer use — macOS background desktop control via cua-driver (MCP stdio).
# The cua-driver binary itself is installed via `hermes tools` post-setup
# (curl install script); this extra just pins the MCP client used to talk
# to it, which is already provided by the `mcp` extra.
computer-use = ["mcp>=1.2.0,<2"]
acp = ["agent-client-protocol>=0.9.0,<1.0"]
mistral = ["mistralai>=2.3.0,<3"]
bedrock = ["boto3>=1.35.0,<2"]
computer-use = ["mcp==1.26.0"]
acp = ["agent-client-protocol==0.9.0"]
# mistral: extra REMOVED 2026-05-12 — `mistralai` PyPI project quarantined
# after malicious 2.4.6 release (Mini Shai-Hulud worm). Every version of
# `mistralai` returns 404 on PyPI right now, so any pin we'd write is
# unresolvable, which breaks `uv lock --check` in CI.
#
# To restore once PyPI un-quarantines:
# 1. Verify the new release is clean (read the changelog, check Socket
# advisory page, confirm no malicious code review findings).
# 2. Add back: mistral = ["mistralai==<verified-version>"]
# 3. Re-enable Mistral in:
# - tools/lazy_deps.py (LAZY_DEPS["tts.mistral"], LAZY_DEPS["stt.mistral"])
# - hermes_cli/tools_config.py (un-hide from provider picker)
# - hermes_cli/web_server.py (re-add to dashboard STT options)
# - tools/transcription_tools.py / tools/tts_tool.py (drop disabled stubs)
# 4. Run `uv lock` to regenerate transitives.
# 5. Optionally re-add to [all] only after a few days of clean operation.
bedrock = ["boto3==1.42.89"]
termux = [
# Baseline Android / Termux path for reliable fresh installs.
"python-telegram-bot[webhooks]>=22.6,<23",
"python-telegram-bot[webhooks]==22.6",
"hermes-agent[cron]",
"hermes-agent[cli]",
"hermes-agent[pty]",
@ -103,80 +147,75 @@ termux = [
"hermes-agent[acp]",
]
termux-all = [
# Best-effort "install all" profile for Termux: include broad extras that
# are known to resolve on Android, while intentionally excluding extras that
# currently hard-fail from missing/broken Android wheels/toolchains.
#
# Excluded for now:
# - matrix (mautrix[encryption] -> python-olm build failures on Termux)
# - voice (faster-whisper chain requires ctranslate2/av builds not packaged)
# Best-effort "install all" profile for Termux. Same policy as [all]:
# only includes extras that aren't covered by `tools/lazy_deps.py`.
# Backends like telegram/slack/dingtalk/feishu/honcho lazy-install at
# first use, so they're no longer eager-installed here.
"hermes-agent[termux]",
"hermes-agent[messaging]",
"hermes-agent[slack]",
"hermes-agent[tts-premium]",
"hermes-agent[dingtalk]",
"hermes-agent[feishu]",
"hermes-agent[google]",
"hermes-agent[mistral]",
"hermes-agent[bedrock]",
"hermes-agent[homeassistant]",
"hermes-agent[sms]",
"hermes-agent[web]",
]
dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
dingtalk = ["dingtalk-stream==0.24.3", "alibabacloud-dingtalk==2.2.42", "qrcode==7.4.2"]
feishu = ["lark-oapi==1.5.3", "qrcode==7.4.2"]
google = [
# Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
# Sheets, Docs). Declared here so packagers (Nix, Homebrew) ship them with
# the [all] extra and users don't hit runtime `pip install` paths that fail
# in environments without pip (e.g. Nix-managed Python).
"google-api-python-client>=2.100,<3",
"google-auth-oauthlib>=1.0,<2",
"google-auth-httplib2>=0.2,<1",
"google-api-python-client==2.194.0",
"google-auth-oauthlib==1.3.1",
"google-auth-httplib2==0.3.1",
]
youtube = [
# Required by skills/media/youtube-content and
# optional-skills/productivity/memento-flashcards (youtube_quiz.py).
# Without this declaration uv sync omits the package and both skills fail
# at first invocation with ModuleNotFoundError (issue #22243).
"youtube-transcript-api>=1.2.0",
"youtube-transcript-api==1.2.4",
]
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"]
rl = [
"atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
"tinker @ git+https://github.com/thinking-machines-lab/tinker.git@30517b667f18a3dfb7ef33fb56cf686d5820ba2b",
"fastapi>=0.104.0,<1",
"uvicorn[standard]>=0.24.0,<1",
"wandb>=0.15.0,<1",
"fastapi==0.133.1",
"uvicorn[standard]==0.41.0",
"wandb==0.25.1",
]
yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
all = [
"hermes-agent[modal]",
"hermes-agent[daytona]",
"hermes-agent[vercel]",
"hermes-agent[messaging]",
# matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
# modern macOS (archived libolm, C++ errors with Clang 21+). On Linux the
# [matrix] extra's own marker pulls in the [e2e] variant automatically.
"hermes-agent[matrix]; sys_platform == 'linux'",
# Policy (2026-05-12): `[all]` includes only extras that genuinely
# CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
# session can use, things needed before the agent loop is alive
# (terminal/CLI), and skill deps that packagers (Nix, AUR, Homebrew)
# need in the wheel. Anything an opt-in backend (provider, search,
# TTS, image, memory, messaging platform, terminal sandbox) needs
# MUST live exclusively in `LAZY_DEPS` and resolve at first use —
# otherwise one quarantined PyPI release breaks every fresh install.
#
# Removed from [all] on 2026-05-12 (covered by lazy-install):
# anthropic, exa, firecrawl, parallel-web, fal, edge-tts,
# modal, daytona, vercel, messaging (telegram/discord/slack),
# matrix, slack, honcho, voice (faster-whisper),
# dingtalk, feishu, bedrock, tts-premium (elevenlabs)
#
# Why: the matrix extra in particular pulls `mautrix[encryption]`
# which depends on `python-olm`. python-olm has Linux-only wheels and
# no native build path on Windows or modern macOS. With matrix in
# [all], `uv sync --locked` on Windows tried to build it from sdist
# and failed on `make`. Lazy-install routes that build to first use,
# where the user is expected to have a toolchain available.
"hermes-agent[cron]",
"hermes-agent[cli]",
"hermes-agent[dev]",
"hermes-agent[tts-premium]",
"hermes-agent[slack]",
"hermes-agent[pty]",
"hermes-agent[honcho]",
"hermes-agent[mcp]",
"hermes-agent[homeassistant]",
"hermes-agent[sms]",
"hermes-agent[acp]",
"hermes-agent[voice]",
"hermes-agent[dingtalk]",
"hermes-agent[feishu]",
"hermes-agent[google]",
"hermes-agent[mistral]",
"hermes-agent[bedrock]",
"hermes-agent[web]",
"hermes-agent[youtube]",
]

View file

@ -347,6 +347,10 @@ _PARALLEL_SAFE_TOOLS = frozenset({
# File tools can run concurrently when they target independent paths.
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
# Tools that mutate files on disk. Used by the per-turn verifier that
# surfaces silently-failed file edits so the model can't over-claim success.
_FILE_MUTATING_TOOLS = frozenset({"write_file", "patch"})
# Maximum number of concurrent worker threads for parallel tool execution.
_MAX_TOOL_WORKERS = 8
@ -524,6 +528,68 @@ def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
value["text_summary"] = value["text_summary"] + hint
def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
"""Return the file paths a ``write_file`` or ``patch`` call is targeting.
For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
For ``patch`` in V4A patch mode we parse the patch content for
``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
the verifier can track each file in a multi-file patch separately.
"""
if tool_name not in _FILE_MUTATING_TOOLS:
return []
if tool_name == "write_file":
p = args.get("path")
return [str(p)] if p else []
# tool_name == "patch"
mode = args.get("mode") or "replace"
if mode == "replace":
p = args.get("path")
return [str(p)] if p else []
if mode == "patch":
body = args.get("patch") or ""
if not isinstance(body, str) or not body:
return []
import re as _re
paths: List[str] = []
for _m in _re.finditer(
r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
body,
_re.MULTILINE,
):
p = _m.group(1).strip()
if p:
paths.append(p)
return paths
return []
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
"""Pull a one-line error summary out of a tool result for footer display."""
text = _multimodal_text_summary(result) if result is not None else ""
if not isinstance(text, str):
try:
text = str(text)
except Exception:
return ""
# Try to parse JSON and pull the ``error`` field — tool handlers return
# ``{"success": false, "error": "..."}``; raw string wins if parse fails.
stripped = text.strip()
if stripped.startswith("{"):
try:
import json as _json
data = _json.loads(stripped)
if isinstance(data, dict) and isinstance(data.get("error"), str):
text = data["error"]
except Exception:
pass
# Collapse whitespace, trim to max_len.
text = " ".join(text.split())
if len(text) > max_len:
text = text[: max_len - 1] + ""
return text
def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
"""Strip image blobs from a message for trajectory saving.
@ -5346,6 +5412,103 @@ class AIAgent:
self._pending_steer = None
return text
def _record_file_mutation_result(
self,
tool_name: str,
args: Dict[str, Any],
result: Any,
is_error: bool,
) -> None:
"""Record a ``write_file`` / ``patch`` outcome for the turn-end verifier.
On failure, store ``{path: {error_preview, tool}}`` entries. On
success, remove any prior failure entries for the same paths (the
model recovered within the turn). Silently no-ops if the per-turn
state dict hasn't been initialised yet (e.g. a tool dispatched
outside ``run_conversation``).
"""
if tool_name not in _FILE_MUTATING_TOOLS:
return
state = getattr(self, "_turn_failed_file_mutations", None)
if state is None:
return
targets = _extract_file_mutation_targets(tool_name, args)
if not targets:
return
if is_error:
preview = _extract_error_preview(result)
for path in targets:
# Keep the FIRST error we saw for a given path unless we
# later see success. A repeated failure with a different
# message shouldn't silently overwrite the original.
if path not in state:
state[path] = {
"tool": tool_name,
"error_preview": preview,
}
else:
for path in targets:
state.pop(path, None)
def _file_mutation_verifier_enabled(self) -> bool:
"""Check whether the per-turn file-mutation verifier footer is on.
Config path: ``display.file_mutation_verifier`` (bool, default True).
``HERMES_FILE_MUTATION_VERIFIER`` env var overrides config. Exposed
as a method so tests can patch a single seam without reaching into
the private ``_turn_failed_file_mutations`` state dict.
"""
try:
import os as _os
env = _os.environ.get("HERMES_FILE_MUTATION_VERIFIER")
if env is not None:
return env.strip().lower() not in ("0", "false", "no", "off")
# Read from the persisted config.yaml so gateway and CLI share
# the same setting. Import lazily to avoid a startup-time cycle.
try:
from hermes_cli.config import load_config as _load_config
_cfg = _load_config() or {}
except Exception:
_cfg = {}
_display = _cfg.get("display") if isinstance(_cfg, dict) else None
if isinstance(_display, dict) and "file_mutation_verifier" in _display:
return bool(_display.get("file_mutation_verifier"))
except Exception:
pass
return True # safe default: verifier on
@staticmethod
def _format_file_mutation_failure_footer(failed: Dict[str, Dict[str, Any]]) -> str:
"""Render the per-turn failed-mutation dict as a user-facing footer.
Displays up to 10 paths with their first error preview, then a
count of any additional failures. Returns an empty string when
the dict is empty so callers can concatenate unconditionally.
"""
if not failed:
return ""
lines = [
"⚠️ File-mutation verifier: "
f"{len(failed)} file(s) were NOT modified this turn despite any "
"wording above that may suggest otherwise. Run `git status` or "
"`read_file` to confirm."
]
shown = 0
for path, info in failed.items():
if shown >= 10:
break
preview = (info.get("error_preview") or "").strip()
tool = info.get("tool") or "patch"
if preview:
lines.append(f"{path} — [{tool}] {preview}")
else:
lines.append(f"{path} — [{tool}] failed")
shown += 1
remaining = len(failed) - shown
if remaining > 0:
lines.append(f" • … and {remaining} more")
return "\n".join(lines)
def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
"""Append any pending /steer text to the last tool result in this turn.
@ -10872,6 +11035,17 @@ class AIAgent:
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
# Track file-mutation outcome for the turn-end verifier.
# `blocked` calls never actually ran — don't let a guardrail
# block count as either a failure or a success.
if not blocked:
try:
self._record_file_mutation_result(
function_name, function_args, function_result, is_error,
)
except Exception as _ver_err:
logging.debug("file-mutation verifier record failed: %s", _ver_err)
if not blocked and self.tool_progress_callback:
try:
self.tool_progress_callback(
@ -11298,6 +11472,18 @@ class AIAgent:
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
# Track file-mutation outcome for the turn-end verifier. See
# the concurrent path for the rationale; both paths must feed
# the same state so the footer reflects every tool call in the
# turn, not just the parallel ones.
if not _execution_blocked:
try:
self._record_file_mutation_result(
function_name, function_args, function_result, _is_error_result,
)
except Exception as _ver_err:
logging.debug("file-mutation verifier record failed: %s", _ver_err)
if not _execution_blocked and self.tool_progress_callback:
try:
self.tool_progress_callback(
@ -11995,6 +12181,14 @@ class AIAgent:
truncated_response_prefix = ""
compression_attempts = 0
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
# Per-turn file-mutation verifier state. Keyed by resolved path;
# each failed ``write_file`` / ``patch`` call records the error
# preview. Later successful writes to the same path remove the
# entry (the model recovered). At end-of-turn, any entries still
# present are surfaced in an advisory footer so the model cannot
# over-claim success while the file is actually unchanged on disk.
self._turn_failed_file_mutations: Dict[str, Dict[str, Any]] = {}
# Record the execution thread so interrupt()/clear_interrupt() can
# scope the tool-level interrupt signal to THIS agent's thread only.
@ -15310,6 +15504,31 @@ class AIAgent:
else:
logger.info(_diag_msg, *_diag_args)
# File-mutation verifier footer.
# If one or more ``write_file`` / ``patch`` calls failed during this
# turn and were never superseded by a successful write to the same
# path, append an advisory footer to the assistant response. This
# catches the specific case — reported by Ben Eng (#15524-adjacent)
# — where a model issues a batch of parallel patches, half of them
# fail with "Could not find old_string", and the model summarises
# the turn claiming every file was edited. The user then has to
# manually run ``git status`` to catch the lie. With this footer
# the truth is surfaced on every turn, so over-claiming is
# structurally impossible past the model.
#
# Gate: only applied when a real text response exists for this
# turn and the user didn't interrupt. Empty/interrupted turns
# already have other surface text that shouldn't be augmented.
if final_response and not interrupted:
try:
_failed = getattr(self, "_turn_failed_file_mutations", None) or {}
if _failed and self._file_mutation_verifier_enabled():
footer = self._format_file_mutation_failure_footer(_failed)
if footer:
final_response = final_response.rstrip() + "\n\n" + footer
except Exception as _ver_err:
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
# Plugin hook: transform_llm_output
# Fired once per turn after the tool-calling loop completes.
# Plugins can transform the LLM's output text before it's returned.

View file

@ -793,30 +793,100 @@ function Install-Dependencies {
# Tell uv to install into our venv (no activation needed)
$env:VIRTUAL_ENV = "$InstallDir\venv"
}
# Install main package. Tiered fallback so a single flaky git+https dep
# (atroposlib / tinker in the [rl] extra) doesn't silently drop
# dashboard/MCP/cron/messaging extras. Each tier's stdout/stderr is
# Hash-verified install (Tier 0) — when uv.lock is present, prefer
# `uv sync --locked`. The lockfile records SHA256 hashes for every
# transitive dependency, so a compromised transitive (different hash
# than what we shipped) is REJECTED by the resolver. This is the
# *only* path that protects against the "direct dep is fine, but the
# dep's dep got worm-poisoned overnight" failure mode. The
# `uv pip install` tiers below re-resolve transitives fresh from PyPI
# without any hash verification — they exist to keep installs working
# when the lockfile is stale, missing, or out-of-sync with the
# current extras spec, NOT because they're equivalent in posture.
if (Test-Path "uv.lock") {
Write-Info "Trying tier: hash-verified (uv.lock) ..."
# Critical flag choice: `--extra all`, NOT `--all-extras`.
# --all-extras = every [project.optional-dependencies] key,
# bypassing the curated [all] extra. On Windows
# that means [matrix] -> python-olm (no wheel,
# needs `make` to build from sdist) and the
# install fails.
# --extra all = just the [all] extra's contents (curated).
& $UvCmd sync --extra all --locked
if ($LASTEXITCODE -eq 0) {
Write-Success "Main package installed (hash-verified via uv.lock)"
$script:InstalledTier = "hash-verified (uv.lock)"
# Skip the rest of the tiered cascade — we already have a
# complete, hash-verified install.
$skipPipFallback = $true
} else {
Write-Warn "uv.lock sync failed (lockfile may be stale), falling back to PyPI resolve..."
$skipPipFallback = $false
}
} else {
Write-Info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
$skipPipFallback = $false
}
# Install main package. Tiered fallback so a single flaky transitive
# doesn't silently drop everything. Each tier's stdout/stderr is
# preserved — no Out-Null swallowing — so the user can see what failed.
#
# Tier 1: [all] — everything, including RL git+https deps (best case).
# Tier 2: [core-extras] synthesised locally — all PyPI-only extras we
# ship (web, mcp, cron, cli, voice, messaging, slack, dev, acp,
# pty, homeassistant, sms, tts-premium, honcho, google, mistral,
# bedrock, dingtalk, feishu, modal, daytona, vercel). Drops [rl]
# and [matrix] (linux-only) which are the usual failure culprits.
# Tier 3: [web,mcp,cron,cli,messaging,dev] — the minimum we strongly
# believe a user expects `hermes dashboard` / slash commands /
# cron / messaging platforms to work out of the box.
# Tier 4: bare `.` — last-resort so at least the core CLI launches.
# Tier 1: [all] — the curated extra in pyproject.toml.
# Tier 2: [all] minus the currently-broken extras list ($brokenExtras).
# Edit $brokenExtras below when something on PyPI breaks; this
# lets users keep the rest of [all] when one transitive is
# unavailable. The list of [all]'s contents is parsed from
# pyproject.toml at runtime — there is NO hand-mirrored copy
# to drift out of sync.
# Tier 3: bare `.` — last-resort so at least the core CLI launches.
# Currently-broken extras. Edit this list when an upstream package
# gets quarantined / yanked / breaks resolution. Empty means everything
# in [all] should be installable; populate with the names of extras
# whose deps are temporarily unavailable.
$brokenExtras = @()
# Parse [project.optional-dependencies].all from pyproject.toml.
# tomllib is stdlib on Python 3.11+ which the bootstrap guarantees.
$pythonExeForParse = if (-not $NoVenv) { "$InstallDir\venv\Scripts\python.exe" } else { (& $UvCmd python find $PythonVersion) }
$allExtras = @()
if (Test-Path $pythonExeForParse) {
$parsed = & $pythonExeForParse -c @"
import re, sys, tomllib
try:
with open('pyproject.toml', 'rb') as fh:
data = tomllib.load(fh)
specs = data['project']['optional-dependencies']['all']
out = []
for s in specs:
m = re.search(r'hermes-agent\[([\w-]+)\]', s)
if m: out.append(m.group(1))
print(','.join(out))
except Exception:
sys.exit(1)
"@ 2>$null
if ($LASTEXITCODE -eq 0 -and $parsed) {
$allExtras = $parsed.Trim().Split(',')
}
}
if (-not $allExtras -or $allExtras.Count -eq 0) {
Write-Warn "Could not parse [all] from pyproject.toml; Tier 2 will be a no-op."
$safeAll = "all"
} else {
$safeAll = ($allExtras | Where-Object { $brokenExtras -notcontains $_ }) -join ","
}
$brokenLabel = if ($brokenExtras) { ($brokenExtras -join ", ") } else { "none" }
$installTiers = @(
@{ Name = "all (with RL/matrix extras)"; Spec = ".[all]" },
@{ Name = "PyPI-only extras (no git deps)"; Spec = ".[web,mcp,cron,cli,voice,messaging,slack,dev,acp,pty,homeassistant,sms,tts-premium,honcho,google,mistral,bedrock,dingtalk,feishu,modal,daytona,vercel]" },
@{ Name = "dashboard + core platforms"; Spec = ".[web,mcp,cron,cli,messaging,dev]" },
@{ Name = "all"; Spec = ".[all]" },
@{ Name = "all minus known-broken ($brokenLabel)"; Spec = ".[$safeAll]" },
@{ Name = "core only (no extras)"; Spec = "." }
)
$installed = $false
foreach ($tier in $installTiers) {
$installed = $skipPipFallback
if (-not $skipPipFallback) {
foreach ($tier in $installTiers) {
Write-Info "Trying tier: $($tier.Name) ..."
& $UvCmd pip install -e $tier.Spec
if ($LASTEXITCODE -eq 0) {
@ -826,6 +896,7 @@ function Install-Dependencies {
break
}
Write-Warn "Tier '$($tier.Name)' failed (exit $LASTEXITCODE). Trying next tier..."
}
}
if (-not $installed) {
throw "Failed to install hermes-agent package even with no extras. Inspect the uv pip install output above."

View file

@ -366,7 +366,27 @@ install_uv() {
# Install uv
log_info "Installing uv (fast Python package manager)..."
if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
# Capture installer output so a failure shows the user WHY (network,
# glibc mismatch on old distros, missing curl, ~/.local/bin not
# writable, disk full, corp proxy / TLS interception, etc.) instead
# of the previous "✗ Failed to install uv" with zero diagnostic.
#
# Two-stage: download the installer, then run it. Piping
# `curl | sh` masks curl failures (sh exits 0 on empty stdin)
# and conflates network errors with installer errors.
local _uv_install_log _uv_installer
_uv_install_log="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-install.$$.log")"
_uv_installer="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-installer.$$.sh")"
if ! curl -LsSf https://astral.sh/uv/install.sh -o "$_uv_installer" 2>"$_uv_install_log"; then
log_error "Failed to download uv installer from https://astral.sh/uv/install.sh"
log_info "curl output:"
sed 's/^/ /' "$_uv_install_log" >&2
log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
rm -f "$_uv_install_log" "$_uv_installer"
exit 1
fi
if sh "$_uv_installer" >>"$_uv_install_log" 2>&1; then
rm -f "$_uv_installer"
# uv installs to ~/.local/bin by default
if [ -x "$HOME/.local/bin/uv" ]; then
UV_CMD="$HOME/.local/bin/uv"
@ -375,15 +395,22 @@ install_uv() {
elif command -v uv &> /dev/null; then
UV_CMD="uv"
else
log_error "uv installed but not found on PATH"
log_error "uv installer reported success but binary not found on PATH"
log_info "Installer output:"
sed 's/^/ /' "$_uv_install_log" >&2
log_info "Try adding ~/.local/bin to your PATH and re-running"
rm -f "$_uv_install_log"
exit 1
fi
rm -f "$_uv_install_log"
UV_VERSION=$($UV_CMD --version 2>/dev/null)
log_success "uv installed ($UV_VERSION)"
else
log_error "Failed to install uv"
log_info "Installer output:"
sed 's/^/ /' "$_uv_install_log" >&2
log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
rm -f "$_uv_install_log" "$_uv_installer"
exit 1
fi
}
@ -1060,20 +1087,151 @@ install_deps() {
fi
# Install the main package in editable mode with all extras.
# Try [all] first, fall back to base install if extras have issues.
ALL_INSTALL_LOG=$(mktemp)
if ! $UV_CMD pip install -e ".[all]" 2>"$ALL_INSTALL_LOG"; then
log_warn "Full install (.[all]) failed, trying base install..."
log_info "Reason: $(tail -5 "$ALL_INSTALL_LOG" | head -3)"
rm -f "$ALL_INSTALL_LOG"
if ! $UV_CMD pip install -e "."; then
log_error "Package installation failed."
log_info "Check that build tools are installed: sudo apt install build-essential python3-dev"
log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'"
exit 1
#
# Hash-verified install (Tier 0) — when uv.lock is present, prefer
# `uv sync --locked`. The lockfile records SHA256 hashes for every
# transitive, so a compromised transitive (different hash than what
# we shipped) is REJECTED by the resolver. This is the *only* path
# that protects against the "direct dep is fine, but the dep's dep
# got worm-poisoned overnight" failure mode. All `uv pip install`
# tiers below re-resolve transitives fresh from PyPI without any
# hash verification — they exist to keep installs working when the
# lockfile is stale, missing, or out-of-sync with the current
# extras spec, NOT because they're equivalent in posture.
if [ -f "uv.lock" ]; then
log_info "Trying tier: hash-verified (uv.lock) ..."
log_info "(this resolves + downloads the curated [all] set — first run on a"
log_info " fresh venv can take 1-5 minutes; uv prints progress below)"
# Stream uv's progress directly to the user instead of swallowing
# it with `2>"$(mktemp)"`. Two reasons:
# 1. `--extra all --locked` against a fresh venv has to pull
# every transitive — silencing stderr makes the install
# look frozen for minutes on slow networks. Users see
# "Trying tier: hash-verified ..." and assume it's hung.
# 2. The previous `2>"$(mktemp)"` substituted the path at
# command-build time but never saved it, so on failure the
# uv error message was unreachable — the user just got the
# generic "lockfile may be stale" warning.
#
# Critical flag choice: `--extra all`, NOT `--all-extras`.
# --all-extras = every [project.optional-dependencies] key.
# This bypasses the curated `[all]` extra
# entirely and pulls e.g. [matrix] (which
# needs python-olm + make on Windows) and
# [rl] (git+https deps that fail offline).
# --extra all = install just the `[all]` extra's contents.
# This respects the curation in pyproject.toml.
# uv's own progress UI handles TTY detection and downgrades
# gracefully when stdout/stderr aren't terminals.
if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --extra all --locked; then
log_success "Main package installed (hash-verified via uv.lock)"
log_success "All dependencies installed"
return 0
fi
log_warn "uv.lock sync failed (see uv output above), falling back to PyPI resolve..."
else
rm -f "$ALL_INSTALL_LOG"
log_info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
fi
# Multi-tier fallback. The point of the tiers is that ONE compromised
# PyPI package (a worm-poisoned release that gets quarantined, like
# mistralai 2.4.6 in May 2026) shouldn't be able to silently demote a
# fresh install all the way down to "core only" — the user should keep
# everything else they signed up for.
#
# Tier 1: [all] — the curated extra in pyproject.toml.
# Tier 2: [all] minus the currently-broken extras list (_BROKEN_EXTRAS).
# Edit _BROKEN_EXTRAS below when something on PyPI breaks; this
# lets users keep the rest of [all] when one transitive is
# unavailable. The list of [all]'s contents is parsed from
# pyproject.toml at runtime — there is NO hand-mirrored copy
# to drift out of sync. If you want to change what [all]
# contains, edit pyproject.toml only.
# Tier 3: bare `.` — last-resort so at least the core CLI launches.
# Skipped tiers like "PyPI-only extras (no git deps)" used to
# exist to dodge [rl] / [matrix] git+sdist deps; those are no
# longer in [all] post-2026-05-12 lazy-install migration, so
# a separate PyPI-only tier had no remaining content.
local _BROKEN_EXTRAS=() # populate when an extra becomes unresolvable
# Parse [project.optional-dependencies].all from pyproject.toml.
# tomllib is stdlib on Python 3.11+ which uv's bootstrap guarantees.
# Falls back to a hand list if parse fails — defensive only.
local _ALL_EXTRAS_CSV
_ALL_EXTRAS_CSV="$(
"$PYTHON_PATH" - <<'PY' 2>/dev/null
import re, sys, tomllib
try:
with open("pyproject.toml", "rb") as fh:
data = tomllib.load(fh)
specs = data["project"]["optional-dependencies"]["all"]
extras = []
for s in specs:
m = re.search(r"hermes-agent\[([\w-]+)\]", s)
if m:
extras.append(m.group(1))
print(",".join(extras))
except Exception as e:
print("", file=sys.stderr)
sys.exit(1)
PY
)"
if [ -z "$_ALL_EXTRAS_CSV" ]; then
log_warn "Could not parse [all] from pyproject.toml; falling back to .[all] only."
_ALL_EXTRAS_CSV=""
fi
# Build "[all] minus broken" spec by filtering the parsed list.
local _SAFE_SPEC=".[all]"
if [ -n "$_ALL_EXTRAS_CSV" ] && [ "${#_BROKEN_EXTRAS[@]}" -gt 0 ]; then
local _SAFE_EXTRAS=()
local _e _b _skip
IFS=',' read -ra _ALL_EXTRAS_ARR <<< "$_ALL_EXTRAS_CSV"
for _e in "${_ALL_EXTRAS_ARR[@]}"; do
_skip=false
for _b in "${_BROKEN_EXTRAS[@]}"; do
if [ "$_e" = "$_b" ]; then _skip=true; break; fi
done
if [ "$_skip" = false ]; then _SAFE_EXTRAS+=("$_e"); fi
done
_SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
fi
ALL_INSTALL_LOG=$(mktemp)
local _installed=false
local _tier_name=""
install_tier() {
local name="$1"; local spec="$2"
log_info "Trying tier: $name ..."
if $UV_CMD pip install -e "$spec" 2>"$ALL_INSTALL_LOG"; then
log_success "Main package installed ($name)"
_installed=true
_tier_name="$name"
return 0
fi
log_warn "Tier '$name' failed. Top of pip output:"
head -5 "$ALL_INSTALL_LOG" | sed 's/^/ /' >&2
return 1
}
install_tier "all" ".[all]" \
|| install_tier "all minus known-broken (${_BROKEN_EXTRAS[*]:-none})" "$_SAFE_SPEC" \
|| install_tier "core only (no extras)" "."
rm -f "$ALL_INSTALL_LOG"
if [ "$_installed" = false ]; then
log_error "Package installation failed even with no extras."
log_info "Check that build tools are installed: sudo apt install build-essential python3-dev"
log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'"
exit 1
fi
if [ "$_tier_name" != "all (with RL/matrix extras)" ]; then
log_warn "Note: installed via fallback tier ($_tier_name)."
log_info "Some optional features may be missing. After resolving any"
log_info "PyPI/network issue, re-run: $UV_CMD pip install -e '.[all]'"
fi
log_success "Main package installed"

View file

@ -53,12 +53,15 @@ AUTHOR_MAP = {
"421774554@qq.com": "wuli666",
"harish.kukreja@gmail.com": "counterposition",
"1046611633@qq.com": "zhengyn0001",
"db@project-aeon.com": "db-aeon",
"ahmed@abadr.net": "ahmedbadr3",
"cleo@edaphic.xyz": "curiouscleo",
"hirokazu.ogawa@kwansei.ac.jp": "hrkzogw",
"datapod.k@gmail.com": "dandacompany",
"treydong.zh@gmail.com": "TreyDong",
"kyanam.preetham@gmail.com": "pkyanam",
"127238744+teknium1@users.noreply.github.com": "teknium1",
"147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0",
"hugosequier@gmail.com": "Hugo-SEQUIER",
"128259593+Gutslabs@users.noreply.github.com": "Gutslabs",
"50326054+nocturnum91@users.noreply.github.com": "nocturnum91",
@ -137,6 +140,22 @@ AUTHOR_MAP = {
"tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc",
"leon@agentlinker.ai": "agentlinker",
"santoshhumagain1887@gmail.com": "npmisantosh",
"39641663+luarss@users.noreply.github.com": "luarss",
"16263913+zccyman@users.noreply.github.com": "zccyman",
"ahmetosrak@Ahmet-MacBook-Air.local": "Osraka",
"98612432+Osraka@users.noreply.github.com": "Osraka",
"112634774+ryptotalent@users.noreply.github.com": "ryptotalent",
"270097726+hookinglau@users.noreply.github.com": "hookinglau",
"5029547+AllynSheep@users.noreply.github.com": "AllynSheep",
"allyn0306@gmail.com": "AllynSheep",
"46887634+aqilaziz@users.noreply.github.com": "aqilaziz",
"gonzes7@gmail.com": "aqilaziz",
"6966326+laoli-no1@users.noreply.github.com": "laoli-no1",
"laoli_no1@163.com": "laoli-no1",
"39730900+NorethSea@users.noreply.github.com": "NorethSea",
"963979204@qq.com": "NorethSea",
"2283389+JamesX88@users.noreply.github.com": "JamesX88",
"JamesX88@users.noreply.github.com": "JamesX88",
"novax635@gmail.com": "novax635",
"krionex1@gmail.com": "Krionex",
"rxdxxxx@users.noreply.github.com": "rxdxxxx",

View file

@ -82,7 +82,22 @@ else
echo -e "${GREEN}${NC} uv found ($UV_VERSION)"
else
echo -e "${CYAN}${NC} Installing uv..."
if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
# Capture installer output so a failure shows the user WHY
# (network, glibc mismatch on old distros, missing curl, disk
# full, etc.) instead of "✗ Failed to install uv" with zero
# diagnostic. Two-stage to avoid `curl | sh` masking curl
# failures (sh exits 0 on empty stdin under no pipefail).
_uv_log="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-install.$$.log")"
_uv_installer="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-installer.$$.sh")"
if ! curl -LsSf https://astral.sh/uv/install.sh -o "$_uv_installer" 2>"$_uv_log"; then
echo -e "${RED}${NC} Failed to download uv installer."
sed 's/^/ /' "$_uv_log" >&2
echo -e "${CYAN}${NC} Install manually: https://docs.astral.sh/uv/"
rm -f "$_uv_log" "$_uv_installer"
exit 1
fi
if sh "$_uv_installer" >>"$_uv_log" 2>&1; then
rm -f "$_uv_installer"
if [ -x "$HOME/.local/bin/uv" ]; then
UV_CMD="$HOME/.local/bin/uv"
elif [ -x "$HOME/.cargo/bin/uv" ]; then
@ -90,14 +105,22 @@ else
fi
if [ -n "$UV_CMD" ]; then
rm -f "$_uv_log"
UV_VERSION=$($UV_CMD --version 2>/dev/null)
echo -e "${GREEN}${NC} uv installed ($UV_VERSION)"
else
echo -e "${RED}${NC} uv installed but not found. Add ~/.local/bin to PATH and retry."
echo -e "${RED}${NC} uv installer reported success but binary not found. Add ~/.local/bin to PATH and retry."
echo -e "${CYAN}${NC} Installer output:"
sed 's/^/ /' "$_uv_log" >&2
rm -f "$_uv_log"
exit 1
fi
else
echo -e "${RED}${NC} Failed to install uv. Visit https://docs.astral.sh/uv/"
echo -e "${RED}${NC} Failed to install uv."
echo -e "${CYAN}${NC} Installer output:"
sed 's/^/ /' "$_uv_log" >&2
echo -e "${CYAN}${NC} Install manually: https://docs.astral.sh/uv/"
rm -f "$_uv_log" "$_uv_installer"
exit 1
fi
fi
@ -183,17 +206,63 @@ if is_termux; then
else
# Prefer uv sync with lockfile (hash-verified installs) when available,
# fall back to pip install for compatibility or when lockfile is stale.
#
# Multi-tier pip fallback. Goal: ONE compromised PyPI package
# (mistralai 2.4.6 in May 2026 → quarantined) shouldn't silently demote
# a fresh setup to "core only". Edit _BROKEN_EXTRAS when a transitive
# breaks; users keep voice / honcho / google / slack / matrix etc. even
# if mistral can't resolve.
_BROKEN_EXTRAS=() # populate when an extra becomes unresolvable
_ALL_EXTRAS=(
modal daytona vercel messaging matrix cron cli dev tts-premium slack
pty honcho mcp homeassistant sms acp voice dingtalk feishu google
bedrock web youtube
)
_SAFE_EXTRAS=()
for _e in "${_ALL_EXTRAS[@]}"; do
_skip=false
for _b in "${_BROKEN_EXTRAS[@]}"; do
[ "$_e" = "$_b" ] && _skip=true && break
done
[ "$_skip" = false ] && _SAFE_EXTRAS+=("$_e")
done
_SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
_try_install() {
$UV_CMD pip install -e ".[all]" \
|| $UV_CMD pip install -e "$_SAFE_SPEC" \
|| $UV_CMD pip install -e "."
}
if [ -f "uv.lock" ]; then
# Hash-verified install (preferred). The lockfile records SHA256
# hashes for every transitive — a compromised transitive would have
# a different hash and be REJECTED by uv. This is the only path
# that protects against transitive-package supply-chain attacks
# (the direct deps in pyproject.toml are exact-pinned, but
# `uv pip install` re-resolves transitives fresh from PyPI).
echo -e "${CYAN}${NC} Using uv.lock for hash-verified installation..."
UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>/dev/null && \
echo -e "${GREEN}${NC} Dependencies installed (lockfile verified)" || {
echo -e "${YELLOW}${NC} Lockfile install failed (may be outdated), falling back to pip install..."
$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
echo -e "${GREEN}${NC} Dependencies installed"
}
echo -e "${CYAN}${NC} (first run on a fresh venv can take 1-5 minutes; uv prints progress below)"
# Critical flag choice: `--extra all`, NOT `--all-extras`. The
# latter installs every [project.optional-dependencies] key,
# bypassing the curated [all] extra and pulling backends like
# [matrix] (python-olm needs make on Windows) and [rl] (git+https
# deps that fail offline). See pyproject.toml's [all] for the
# curated set, and tools/lazy_deps.py for backends that install
# at first use.
# Also: stream stderr through directly so the user sees uv's
# progress UI instead of staring at a frozen prompt.
if UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --extra all --locked; then
echo -e "${GREEN}${NC} Dependencies installed (hash-verified via uv.lock)"
else
echo -e "${YELLOW}${NC} Lockfile sync failed (see uv output above)."
echo -e "${YELLOW}${NC} Falling back to PyPI resolve — transitives will NOT be hash-verified."
_try_install
echo -e "${GREEN}${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
fi
else
$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
echo -e "${GREEN}${NC} Dependencies installed"
echo -e "${YELLOW}${NC} uv.lock not found — installing without hash verification of transitives."
_try_install
echo -e "${GREEN}${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
fi
fi

View file

@ -50,6 +50,7 @@ Your job description says "route, don't execute." The rules that enforce that:
- **For any concrete task, create a Kanban task and assign it.** Every single time.
- **Split multi-lane requests before creating cards.** A user prompt can contain several independent workstreams. Extract those lanes first, then create one card per lane instead of bundling unrelated work into a single implementer card.
- **Run independent lanes in parallel.** If two cards do not need each other's output, leave them unlinked so the dispatcher can fan them out. Link only true data dependencies.
- **Never create dependent work as independent ready cards.** If a card must wait for another card, pass `parents=[...]` in the original `kanban_create` call. Do not create it first and link it later, and do not rely on prose like "wait for T1" inside the body.
- **If no specialist fits the available profiles, ask the user which profile to create or which existing profile to use.** Do not invent profile names; the dispatcher will silently drop unknown assignees.
- **Decompose, route, and summarize — that's the whole job.**
@ -67,7 +68,7 @@ Before creating anything, draft the graph out loud (in your response to the user
2. Map each lane to one of the profiles you discovered in Step 0. If a lane doesn't fit any existing profile, ask the user which to use or create.
3. Decide whether each lane is independent or gated by another lane.
4. Create independent lanes as parallel cards with no parent links.
5. Create synthesis/review/integration cards with parent links to the lanes they depend on.
5. Create synthesis/review/integration cards with parent links to the lanes they depend on. A child created with unfinished parents starts in `todo`; the dispatcher promotes it to `ready` only after every parent is done.
Examples of prompts that should fan out (using placeholder profile names — substitute whatever exists on the user's setup):
@ -115,6 +116,8 @@ t4 = kanban_create(
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
If the task graph has dependencies, create the parent cards first, capture their returned ids, and include those ids in the child card's `parents` list during the child `kanban_create` call. Avoid creating all cards in parallel and linking them afterward; that creates a window where the dispatcher can claim a child before its inputs exist.
### Step 4 — Complete your own task
If you were spawned as a task yourself (e.g. a planner profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:

View file

@ -0,0 +1 @@
"""Pytest helpers for LSP-related tests."""

View file

@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""A minimal in-process LSP server used by tests.
Speaks just enough LSP to drive :class:`agent.lsp.client.LSPClient`
through a full lifecycle: ``initialize``, ``initialized``,
``textDocument/didOpen``, ``textDocument/didChange``, then a
``textDocument/publishDiagnostics`` notification followed by
``shutdown`` + ``exit``.
Behaviour (all behaviours selectable via env var ``MOCK_LSP_SCRIPT``):
- ``"clean"`` initialize, accept didOpen/didChange, push empty
diagnostics on every open/change, exit cleanly on shutdown.
- ``"errors"`` same as ``clean`` but the published diagnostics
carry one severity-1 entry pointing at line 0:0.
- ``"crash"`` exit immediately after responding to ``initialize``
(simulates a crashing server).
- ``"slow"`` same as ``clean`` but sleeps 1s before responding to
``initialize`` (lets us test timeout behaviour).
The script writes JSON-RPC framed messages to stdout and reads from
stdin. No third-party dependencies uses only stdlib so it runs
under whatever Python the test process picks up.
"""
from __future__ import annotations
import json
import os
import sys
import time
def read_message():
"""Read one Content-Length framed JSON-RPC message from stdin."""
headers = {}
while True:
line = sys.stdin.buffer.readline()
if not line:
return None
line = line.rstrip(b"\r\n")
if not line:
break
k, _, v = line.decode("ascii").partition(":")
headers[k.strip().lower()] = v.strip()
n = int(headers["content-length"])
body = sys.stdin.buffer.read(n)
return json.loads(body.decode("utf-8"))
def write_message(obj):
body = json.dumps(obj, separators=(",", ":")).encode("utf-8")
sys.stdout.buffer.write(f"Content-Length: {len(body)}\r\n\r\n".encode("ascii"))
sys.stdout.buffer.write(body)
sys.stdout.buffer.flush()
def main():
script = os.environ.get("MOCK_LSP_SCRIPT", "clean")
while True:
msg = read_message()
if msg is None:
return 0
if "id" in msg and msg.get("method") == "initialize":
if script == "slow":
time.sleep(1.0)
write_message(
{
"jsonrpc": "2.0",
"id": msg["id"],
"result": {
"capabilities": {
"textDocumentSync": 1, # Full
"diagnosticProvider": {"interFileDependencies": False, "workspaceDiagnostics": False},
},
"serverInfo": {"name": "mock-lsp", "version": "0.1"},
},
}
)
if script == "crash":
return 0
continue
if msg.get("method") == "initialized":
continue
if msg.get("method") == "workspace/didChangeConfiguration":
continue
if msg.get("method") == "workspace/didChangeWatchedFiles":
continue
if msg.get("method") in ("textDocument/didOpen", "textDocument/didChange"):
params = msg.get("params") or {}
td = params.get("textDocument") or {}
uri = td.get("uri", "")
version = td.get("version", 0)
diagnostics = []
if script == "errors":
diagnostics = [
{
"range": {
"start": {"line": 0, "character": 0},
"end": {"line": 0, "character": 5},
},
"severity": 1,
"code": "MOCK001",
"source": "mock-lsp",
"message": "synthetic error from mock-lsp",
}
]
write_message(
{
"jsonrpc": "2.0",
"method": "textDocument/publishDiagnostics",
"params": {
"uri": uri,
"version": version,
"diagnostics": diagnostics,
},
}
)
continue
if msg.get("method") == "textDocument/diagnostic":
# Pull endpoint — return empty.
write_message(
{
"jsonrpc": "2.0",
"id": msg["id"],
"result": {"kind": "full", "items": []},
}
)
continue
if msg.get("method") == "textDocument/didSave":
continue
if msg.get("method") == "shutdown":
write_message({"jsonrpc": "2.0", "id": msg["id"], "result": None})
continue
if msg.get("method") == "exit":
return 0
# Unknown request: respond with method-not-found.
if "id" in msg:
write_message(
{
"jsonrpc": "2.0",
"id": msg["id"],
"error": {"code": -32601, "message": f"method not found: {msg.get('method')}"},
}
)
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,108 @@
"""Integration test: LSP layer is skipped on non-local backends.
The host-side LSP server can't see files inside a Docker/Modal/SSH
sandbox. When the agent's terminal env isn't ``LocalEnvironment``,
the file_operations layer must skip both ``snapshot_baseline`` and
``get_diagnostics_sync`` calls falling back to the in-process
syntax check exactly as if LSP were disabled.
"""
from __future__ import annotations
import os
import sys
from unittest.mock import MagicMock
import pytest
from agent.lsp import eventlog
@pytest.fixture(autouse=True)
def _reset():
eventlog.reset_announce_caches()
def test_local_only_helper_returns_true_for_local_env():
from tools.environments.local import LocalEnvironment
from tools.file_operations import ShellFileOperations
fops = ShellFileOperations(LocalEnvironment(cwd="/tmp"))
assert fops._lsp_local_only() is True
def test_local_only_helper_returns_false_for_non_local_env():
"""A mocked non-local env (Docker/Modal/SSH stand-in) returns False."""
from tools.file_operations import ShellFileOperations
# Build something that's NOT a LocalEnvironment. We use a bare
# MagicMock — isinstance() against LocalEnvironment is False.
fake_env = MagicMock()
fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
fake_env.cwd = "/sandbox"
fops = ShellFileOperations(fake_env)
assert fops._lsp_local_only() is False
def test_snapshot_baseline_skipped_for_non_local(monkeypatch):
"""Verify the LSP service's snapshot_baseline is NOT called when
the backend isn't local."""
from tools.file_operations import ShellFileOperations
fake_env = MagicMock()
fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
fake_env.cwd = "/sandbox"
fops = ShellFileOperations(fake_env)
snapshot_called = []
class FakeService:
def snapshot_baseline(self, path):
snapshot_called.append(path)
monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
fops._snapshot_lsp_baseline("/sandbox/x.py")
assert snapshot_called == [], "snapshot must be skipped for non-local backends"
def test_maybe_lsp_diagnostics_returns_empty_for_non_local(monkeypatch):
from tools.file_operations import ShellFileOperations
fake_env = MagicMock()
fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
fake_env.cwd = "/sandbox"
fops = ShellFileOperations(fake_env)
called = []
class FakeService:
def enabled_for(self, path):
called.append(("enabled_for", path))
return True
def get_diagnostics_sync(self, path, **kw):
called.append(("get_diagnostics_sync", path))
return [{"severity": 1, "message": "should not see this"}]
monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
result = fops._maybe_lsp_diagnostics("/sandbox/x.py")
assert result == ""
assert called == [], "service must not be queried for non-local backends"
def test_snapshot_baseline_called_for_local_env(tmp_path, monkeypatch):
from tools.environments.local import LocalEnvironment
from tools.file_operations import ShellFileOperations
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
snapshot_called = []
class FakeService:
def snapshot_baseline(self, path):
snapshot_called.append(path)
monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
fops._snapshot_lsp_baseline(str(tmp_path / "x.py"))
assert snapshot_called == [str(tmp_path / "x.py")]

View file

@ -0,0 +1,213 @@
"""Tests for the broken-set short-circuit added to handle outer-timeout failures.
When ``snapshot_baseline`` or ``get_diagnostics_sync`` time out from the
service layer (because a language server hangs during initialize, or
the binary is wedged), the inner spawn task is cancelled but the
inner exception handler that adds to ``_broken`` never runs. Without
the service-layer fallback added in this module, every subsequent
edit re-pays the full timeout cost until the process exits.
This module verifies:
- ``_mark_broken_for_file`` adds the right key
- ``enabled_for`` short-circuits on broken keys
- a missing binary is broken-set'd after one snapshot attempt
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from agent.lsp.manager import LSPService
from agent.lsp.servers import SERVERS, ServerContext, ServerDef, SpawnSpec
from agent.lsp.workspace import clear_cache
@pytest.fixture(autouse=True)
def _clear_workspace_cache():
clear_cache()
yield
clear_cache()
def _make_git_workspace(tmp_path: Path) -> Path:
"""Build a minimal git repo with a pyproject so pyright's root resolver fires."""
repo = tmp_path / "repo"
repo.mkdir()
(repo / ".git").mkdir()
(repo / "pyproject.toml").write_text("[project]\nname='t'\n")
return repo
def test_mark_broken_for_file_adds_correct_key(tmp_path, monkeypatch):
"""``_mark_broken_for_file`` keys the broken-set on
(server_id, per_server_root) so subsequent ``enabled_for`` calls
for files in the same project skip immediately."""
repo = _make_git_workspace(tmp_path)
monkeypatch.chdir(str(repo))
src = repo / "x.py"
src.write_text("")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
svc._mark_broken_for_file(str(src), RuntimeError("simulated"))
# The pyright server resolves to the repo root via pyproject.toml.
assert ("pyright", str(repo)) in svc._broken
finally:
svc.shutdown()
def test_enabled_for_returns_false_after_broken(tmp_path, monkeypatch):
"""Once a (server_id, root) pair is in the broken-set,
``enabled_for`` returns False so the file_operations layer skips
the LSP path entirely."""
repo = _make_git_workspace(tmp_path)
monkeypatch.chdir(str(repo))
src = repo / "x.py"
src.write_text("")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
# Initially enabled.
assert svc.enabled_for(str(src)) is True
# Mark broken.
svc._mark_broken_for_file(str(src), RuntimeError("simulated"))
# Now disabled — the broken-set short-circuits.
assert svc.enabled_for(str(src)) is False
finally:
svc.shutdown()
def test_enabled_for_other_file_in_same_project_also_skipped(tmp_path, monkeypatch):
"""The broken key is (server_id, root), so ALL files routed through
the same server in the same project are skipped not just the one
that triggered the failure."""
repo = _make_git_workspace(tmp_path)
monkeypatch.chdir(str(repo))
a = repo / "a.py"
a.write_text("")
b = repo / "b.py"
b.write_text("")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
svc._mark_broken_for_file(str(a), RuntimeError("simulated"))
# Both files in the same project skip pyright now.
assert svc.enabled_for(str(a)) is False
assert svc.enabled_for(str(b)) is False
finally:
svc.shutdown()
def test_unrelated_project_not_affected_by_broken(tmp_path, monkeypatch):
"""Marking pyright broken for project A must NOT affect project B."""
repo_a = _make_git_workspace(tmp_path)
repo_b = tmp_path / "repo-b"
repo_b.mkdir()
(repo_b / ".git").mkdir()
(repo_b / "pyproject.toml").write_text("[project]\nname='b'\n")
a_src = repo_a / "x.py"
a_src.write_text("")
b_src = repo_b / "x.py"
b_src.write_text("")
monkeypatch.chdir(str(repo_a))
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
svc._mark_broken_for_file(str(a_src), RuntimeError("simulated"))
# Project A skipped.
assert svc.enabled_for(str(a_src)) is False
# Project B still enabled — the broken key is per-project.
monkeypatch.chdir(str(repo_b))
assert svc.enabled_for(str(b_src)) is True
finally:
svc.shutdown()
def test_mark_broken_handles_missing_server_silently(tmp_path):
"""If the file extension doesn't match any registered server,
``_mark_broken_for_file`` no-ops nothing to mark."""
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
# No registered server for .xyz; must not raise.
svc._mark_broken_for_file(str(tmp_path / "weird.xyz"), RuntimeError("x"))
assert len(svc._broken) == 0
finally:
svc.shutdown()
def test_mark_broken_handles_no_workspace_silently(tmp_path):
"""File outside any git worktree → no workspace → no key to add."""
src = tmp_path / "orphan.py"
src.write_text("")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
svc._mark_broken_for_file(str(src), RuntimeError("x"))
assert len(svc._broken) == 0
finally:
svc.shutdown()
def test_snapshot_failure_marks_broken_via_outer_timeout(tmp_path, monkeypatch):
"""End-to-end: ``snapshot_baseline``'s outer ``_loop.run`` timeout
triggers ``_mark_broken_for_file``, so a second call to
``enabled_for`` returns False."""
repo = _make_git_workspace(tmp_path)
monkeypatch.chdir(str(repo))
src = repo / "x.py"
src.write_text("")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
try:
# Force the inner snapshot coroutine to raise.
async def boom(_path):
raise RuntimeError("outer-timeout simulated")
with patch.object(svc, "_snapshot_async", boom):
assert svc.enabled_for(str(src)) is True
svc.snapshot_baseline(str(src))
# After the failure, the file's pair is in the broken-set and
# ``enabled_for`` skips it.
assert ("pyright", str(repo)) in svc._broken
assert svc.enabled_for(str(src)) is False
finally:
svc.shutdown()

View file

@ -0,0 +1,143 @@
"""End-to-end client tests against the in-process mock LSP server.
Spins up :file:`_mock_lsp_server.py` as an actual subprocess, drives
it through real LSP traffic, and asserts diagnostic flow. This is
the closest thing we have to integration coverage without requiring
pyright/gopls/etc. to be installed in CI.
"""
from __future__ import annotations
import asyncio
import os
import sys
from pathlib import Path
import pytest
from agent.lsp.client import LSPClient
MOCK_SERVER = str(Path(__file__).parent / "_mock_lsp_server.py")
def _client(workspace: Path, script: str = "clean") -> LSPClient:
env = {"MOCK_LSP_SCRIPT": script, "PYTHONPATH": os.environ.get("PYTHONPATH", "")}
return LSPClient(
server_id=f"mock-{script}",
workspace_root=str(workspace),
command=[sys.executable, MOCK_SERVER],
env=env,
cwd=str(workspace),
)
@pytest.mark.asyncio
async def test_client_lifecycle_clean(tmp_path: Path):
"""Full lifecycle: spawn, initialize, open, get clean diagnostics, shutdown."""
f = tmp_path / "x.py"
f.write_text("print('hi')\n")
client = _client(tmp_path, "clean")
await client.start()
try:
assert client.is_running
version = await client.open_file(str(f), language_id="python")
assert version == 0
await client.wait_for_diagnostics(str(f), version, mode="document")
diags = client.diagnostics_for(str(f))
assert diags == []
finally:
await client.shutdown()
assert not client.is_running
@pytest.mark.asyncio
async def test_client_receives_published_errors(tmp_path: Path):
f = tmp_path / "x.py"
f.write_text("print('hi')\n")
client = _client(tmp_path, "errors")
await client.start()
try:
version = await client.open_file(str(f), language_id="python")
await client.wait_for_diagnostics(str(f), version, mode="document")
diags = client.diagnostics_for(str(f))
assert len(diags) == 1
d = diags[0]
assert d["severity"] == 1
assert d["code"] == "MOCK001"
assert d["source"] == "mock-lsp"
assert "synthetic error" in d["message"]
finally:
await client.shutdown()
@pytest.mark.asyncio
async def test_client_didchange_bumps_version(tmp_path: Path):
f = tmp_path / "x.py"
f.write_text("print('hi')\n")
client = _client(tmp_path, "errors")
await client.start()
try:
v0 = await client.open_file(str(f), language_id="python")
f.write_text("print('hi 2')\n")
v1 = await client.open_file(str(f), language_id="python") # re-open path = didChange
assert v1 == v0 + 1
await client.wait_for_diagnostics(str(f), v1, mode="document")
# Mock pushed a diagnostic for both events; merged view has one
# entry (push store keyed by file path).
diags = client.diagnostics_for(str(f))
assert len(diags) == 1
finally:
await client.shutdown()
@pytest.mark.asyncio
async def test_client_handles_crashing_server(tmp_path: Path):
"""When the server exits right after initialize, subsequent requests
fail gracefully (not hang)."""
f = tmp_path / "x.py"
f.write_text("")
client = _client(tmp_path, "crash")
await client.start() # should succeed (mock answers initialize before crashing)
# Give the OS a moment to deliver the EOF.
await asyncio.sleep(0.2)
# The reader loop should detect EOF and mark pending requests as failed.
try:
await asyncio.wait_for(
client.open_file(str(f), language_id="python"), timeout=2.0
)
except Exception:
pass # any exception is acceptable; the contract is "doesn't hang"
await client.shutdown()
@pytest.mark.asyncio
async def test_client_shutdown_idempotent(tmp_path: Path):
"""Calling shutdown twice must be safe."""
f = tmp_path / "x.py"
f.write_text("")
client = _client(tmp_path, "clean")
await client.start()
await client.shutdown()
await client.shutdown() # must not raise
@pytest.mark.asyncio
async def test_client_diagnostics_are_deduped(tmp_path: Path):
"""Repeated identical pushes must not produce duplicate diagnostics."""
f = tmp_path / "x.py"
f.write_text("")
client = _client(tmp_path, "errors")
await client.start()
try:
for _ in range(3):
v = await client.open_file(str(f), language_id="python")
await client.wait_for_diagnostics(str(f), v, mode="document")
diags = client.diagnostics_for(str(f))
# Push store overwrites on every notification — should have 1.
assert len(diags) == 1
finally:
await client.shutdown()

View file

@ -0,0 +1,146 @@
"""Tests for the ``lsp_diagnostics`` field on WriteResult / PatchResult.
The field exists so the agent can read syntax errors (``lint``) and
semantic errors (``lsp_diagnostics``) as separate signals rather than
having LSP output prepended to the lint string.
"""
from __future__ import annotations
import os
import sys
import tempfile
from unittest.mock import MagicMock, patch
import pytest
from tools.environments.local import LocalEnvironment
from tools.file_operations import (
PatchResult,
ShellFileOperations,
WriteResult,
)
# ---------------------------------------------------------------------------
# Dataclass shape
# ---------------------------------------------------------------------------
def test_writeresult_lsp_diagnostics_optional():
r = WriteResult()
assert r.lsp_diagnostics is None
def test_writeresult_to_dict_omits_field_when_none():
r = WriteResult(bytes_written=10)
assert "lsp_diagnostics" not in r.to_dict()
def test_writeresult_to_dict_includes_field_when_set():
r = WriteResult(bytes_written=10, lsp_diagnostics="<diagnostics>...</diagnostics>")
d = r.to_dict()
assert d["lsp_diagnostics"] == "<diagnostics>...</diagnostics>"
def test_patchresult_to_dict_includes_field_when_set():
r = PatchResult(success=True, lsp_diagnostics="ERROR [1:1] thing")
d = r.to_dict()
assert d["lsp_diagnostics"] == "ERROR [1:1] thing"
def test_patchresult_to_dict_omits_field_when_none():
r = PatchResult(success=True)
assert "lsp_diagnostics" not in r.to_dict()
def test_patchresult_to_dict_omits_field_when_empty_string():
"""Empty string counts as falsy — agent shouldn't see an empty field."""
r = PatchResult(success=True, lsp_diagnostics="")
assert "lsp_diagnostics" not in r.to_dict()
# ---------------------------------------------------------------------------
# Channel separation: lint and lsp_diagnostics stay independent
# ---------------------------------------------------------------------------
def test_lint_and_lsp_diagnostics_are_separate_channels():
"""A WriteResult can carry BOTH a syntax-error lint AND an LSP
diagnostic block. They belong in separate fields."""
r = WriteResult(
bytes_written=42,
lint={"status": "error", "output": "SyntaxError: ..."},
lsp_diagnostics="<diagnostics>ERROR [1:5] type mismatch</diagnostics>",
)
d = r.to_dict()
assert "lint" in d
assert "lsp_diagnostics" in d
assert d["lint"]["output"] == "SyntaxError: ..."
assert "type mismatch" in d["lsp_diagnostics"]
# ---------------------------------------------------------------------------
# write_file populates the field via _maybe_lsp_diagnostics
# ---------------------------------------------------------------------------
def test_write_file_populates_lsp_diagnostics_when_layer_returns_block(tmp_path):
"""When the LSP layer returns a non-empty block, write_file puts it
into the ``lsp_diagnostics`` field NOT into ``lint.output``."""
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
target = tmp_path / "x.py"
block = "<diagnostics file=\"x.py\">\nERROR [1:1] problem\n</diagnostics>"
with patch.object(fops, "_maybe_lsp_diagnostics", return_value=block):
res = fops.write_file(str(target), "x = 1\n")
assert res.lsp_diagnostics == block
# Lint is the syntax check, which is clean for "x = 1" — must NOT
# have the LSP block folded into it.
assert res.lint == {"status": "ok", "output": ""}
def test_write_file_lsp_diagnostics_none_when_layer_returns_empty(tmp_path):
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
target = tmp_path / "x.py"
with patch.object(fops, "_maybe_lsp_diagnostics", return_value=""):
res = fops.write_file(str(target), "x = 1\n")
assert res.lsp_diagnostics is None
def test_write_file_skips_lsp_when_syntax_failed(tmp_path):
"""If the syntax check finds errors, the LSP layer should not be
consulted (a file that won't parse won't yield meaningful semantic
diagnostics)."""
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
target = tmp_path / "broken.py"
with patch.object(fops, "_maybe_lsp_diagnostics") as mock_lsp:
res = fops.write_file(str(target), "def x(:\n") # syntax error
assert mock_lsp.call_count == 0
assert res.lsp_diagnostics is None
assert res.lint["status"] == "error"
# ---------------------------------------------------------------------------
# patch_replace propagates the field from the inner write_file
# ---------------------------------------------------------------------------
def test_patch_replace_propagates_lsp_diagnostics(tmp_path):
"""patch_replace's internal write_file populates lsp_diagnostics —
the outer PatchResult must carry it forward."""
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
target = tmp_path / "x.py"
target.write_text("x = 1\n")
block = "<diagnostics>ERROR [1:5] semantic issue</diagnostics>"
with patch.object(fops, "_maybe_lsp_diagnostics", return_value=block):
res = fops.patch_replace(str(target), "x = 1", "x = 2")
assert res.success is True
assert res.lsp_diagnostics == block

View file

@ -0,0 +1,199 @@
"""Tests for the structured logging dedup model.
The contract: a 1000-write session in one project should emit exactly
ONE INFO line ("active for <root>") at the default INFO threshold.
Steady-state events stay at DEBUG; first-time-seen events surface
once at INFO/WARNING.
"""
from __future__ import annotations
import logging
import pytest
from agent.lsp import eventlog
@pytest.fixture(autouse=True)
def _reset():
eventlog.reset_announce_caches()
yield
eventlog.reset_announce_caches()
@pytest.fixture
def caplog_lsp(caplog):
caplog.set_level(logging.DEBUG, logger="hermes.lint.lsp")
return caplog
# ---------------------------------------------------------------------------
# Steady-state silence (DEBUG)
# ---------------------------------------------------------------------------
def test_clean_emits_at_debug(caplog_lsp):
for _ in range(10):
eventlog.log_clean("pyright", "/proj/x.py")
info_records = [r for r in caplog_lsp.records if r.levelno >= logging.INFO]
debug_records = [r for r in caplog_lsp.records if r.levelno == logging.DEBUG]
assert info_records == []
assert len(debug_records) == 10
def test_disabled_emits_at_debug(caplog_lsp):
eventlog.log_disabled("pyright", "/x.py", "feature off")
eventlog.log_disabled("pyright", "/x.py", "ext not mapped")
assert all(r.levelno == logging.DEBUG for r in caplog_lsp.records)
# ---------------------------------------------------------------------------
# State transitions: INFO once, DEBUG thereafter
# ---------------------------------------------------------------------------
def test_active_for_fires_once_per_root(caplog_lsp):
for _ in range(50):
eventlog.log_active("pyright", "/proj")
info_records = [
r for r in caplog_lsp.records
if r.levelno == logging.INFO and "active for" in r.getMessage()
]
assert len(info_records) == 1
def test_active_for_fires_per_distinct_root(caplog_lsp):
eventlog.log_active("pyright", "/proj-a")
eventlog.log_active("pyright", "/proj-b")
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
assert len(info) == 2
def test_active_for_separate_per_server(caplog_lsp):
eventlog.log_active("pyright", "/proj")
eventlog.log_active("typescript", "/proj")
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
assert len(info) == 2
def test_no_project_root_fires_once_per_path(caplog_lsp):
for _ in range(5):
eventlog.log_no_project_root("pyright", "/orphan.py")
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
assert len(info) == 1
# ---------------------------------------------------------------------------
# Diagnostics events fire INFO every time
# ---------------------------------------------------------------------------
def test_diagnostics_always_info(caplog_lsp):
for i in range(5):
eventlog.log_diagnostics("pyright", f"/x{i}.py", 1)
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
assert len(info) == 5
assert all("diags" in r.getMessage() for r in info)
# ---------------------------------------------------------------------------
# Action-required: WARNING once, DEBUG thereafter (or per call for novel events)
# ---------------------------------------------------------------------------
def test_server_unavailable_warns_once_per_binary(caplog_lsp):
for _ in range(20):
eventlog.log_server_unavailable("pyright", "pyright-langserver")
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
assert len(warns) == 1
assert "pyright-langserver" in warns[0].getMessage()
def test_server_unavailable_separate_per_binary(caplog_lsp):
eventlog.log_server_unavailable("pyright", "pyright-langserver")
eventlog.log_server_unavailable("typescript", "typescript-language-server")
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
assert len(warns) == 2
def test_no_server_configured_warns_once(caplog_lsp):
for _ in range(10):
eventlog.log_no_server_configured("pyright")
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
assert len(warns) == 1
def test_timeout_warns_every_call(caplog_lsp):
for _ in range(3):
eventlog.log_timeout("pyright", "/x.py")
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
assert len(warns) == 3
def test_server_error_warns_every_call(caplog_lsp):
for _ in range(3):
eventlog.log_server_error("pyright", "/x.py", RuntimeError("boom"))
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
assert len(warns) == 3
def test_spawn_failed_warns(caplog_lsp):
eventlog.log_spawn_failed("pyright", "/proj", FileNotFoundError("nope"))
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
assert len(warns) == 1
assert "spawn/initialize failed" in warns[0].getMessage()
# ---------------------------------------------------------------------------
# Format: log lines all carry the lsp[<server_id>] prefix for grep
# ---------------------------------------------------------------------------
def test_log_lines_use_lsp_prefix(caplog_lsp):
eventlog.log_clean("pyright", "/x.py")
eventlog.log_active("pyright", "/proj")
eventlog.log_diagnostics("typescript", "/y.ts", 2)
for r in caplog_lsp.records:
assert r.getMessage().startswith("lsp[")
# ---------------------------------------------------------------------------
# Steady-state contract: 1000 clean writes → 1 INFO at most
# ---------------------------------------------------------------------------
def test_thousand_clean_writes_emit_one_info(caplog_lsp):
"""A long session writes lots of files cleanly; agent.log should
show ONE 'active for' INFO and zero other INFO lines."""
eventlog.log_active("pyright", "/proj")
for _ in range(1000):
eventlog.log_clean("pyright", "/proj/x.py")
info_records = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
assert len(info_records) == 1
assert "active for" in info_records[0].getMessage()
# ---------------------------------------------------------------------------
# Path shortening
# ---------------------------------------------------------------------------
def test_short_path_uses_relative_when_inside_cwd(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
sub = tmp_path / "x.py"
sub.write_text("")
out = eventlog._short_path(str(sub))
assert out == "x.py"
def test_short_path_keeps_absolute_when_outside(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path / "a") if (tmp_path / "a").exists() else None
monkeypatch.chdir(tmp_path)
other = "/var/log/foo.txt"
out = eventlog._short_path(other)
# Outside cwd: keeps absolute (no leading "../")
assert out == "/var/log/foo.txt" or not out.startswith("..")
def test_short_path_handles_empty_string():
assert eventlog._short_path("") == ""

View file

@ -0,0 +1,279 @@
"""Tests for follow-up fixes to the LSP integration (PR after #24168).
Covers:
1. ``typescript-language-server`` install recipe pulls in ``typescript``
alongside the server, so the npm install command targets both.
2. ``hermes lsp status`` surfaces a ``Backend warnings`` section when
bash-language-server is installed but ``shellcheck`` is missing.
3. ``_check_lint`` returns ``skipped`` (not ``error``) when the linter
command exists on PATH but couldn't actually run — e.g. ``npx tsc``
without the typescript SDK installed. This is what unblocks the
LSP semantic tier on TypeScript files when the user doesn't also
have a project-level ``tsc``.
"""
from __future__ import annotations
import io
from contextlib import redirect_stdout
from unittest.mock import MagicMock, patch
import pytest
from agent.lsp.install import INSTALL_RECIPES
# ---------------------------------------------------------------------------
# Fix 1: typescript install recipe carries the typescript SDK
# ---------------------------------------------------------------------------
def test_typescript_recipe_includes_typescript_sdk():
recipe = INSTALL_RECIPES["typescript-language-server"]
extras = recipe.get("extra_pkgs") or []
assert "typescript" in extras, (
"typescript-language-server requires the `typescript` SDK as a "
"sibling install — without it `initialize` fails with "
"'Could not find a valid TypeScript installation'."
)
def test_install_npm_passes_extras_to_npm_command(tmp_path, monkeypatch):
"""Verify the npm subprocess is invoked with both pkg AND extras."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
captured = {}
def fake_run(cmd, **kwargs):
captured["cmd"] = cmd
# Pretend npm succeeded but binary doesn't exist — install code
# will return None, which is fine for this test.
return MagicMock(returncode=0, stderr="")
from agent.lsp import install as install_mod
monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
monkeypatch.setattr(install_mod.shutil, "which", lambda c: "/usr/bin/npm" if c == "npm" else None)
install_mod._install_npm("typescript-language-server", "typescript-language-server",
extra_pkgs=["typescript"])
cmd = captured["cmd"]
assert "typescript-language-server" in cmd
assert "typescript" in cmd
# Both must come AFTER the npm flags, in install-target position
install_idx = cmd.index("install")
assert cmd.index("typescript-language-server") > install_idx
assert cmd.index("typescript") > install_idx
def test_install_npm_works_without_extras(tmp_path, monkeypatch):
"""Backwards compat: pyright-style recipes (no extras) still install."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
captured = {}
def fake_run(cmd, **kwargs):
captured["cmd"] = cmd
return MagicMock(returncode=0, stderr="")
from agent.lsp import install as install_mod
monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
monkeypatch.setattr(install_mod.shutil, "which", lambda c: "/usr/bin/npm" if c == "npm" else None)
install_mod._install_npm("pyright", "pyright-langserver")
cmd = captured["cmd"]
assert "pyright" in cmd
# Should not blow up when extra_pkgs is omitted/None
install_targets = [c for c in cmd if not c.startswith("-") and c not in (
"install", "--prefix", str(install_mod.hermes_lsp_bin_dir().parent),
"/usr/bin/npm",
)]
assert install_targets == ["pyright"]
# ---------------------------------------------------------------------------
# Fix 2: ``hermes lsp status`` surfaces shellcheck-missing for bash
# ---------------------------------------------------------------------------
def test_backend_warnings_quiet_when_bash_not_installed(tmp_path, monkeypatch):
"""No bash → no warning."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from agent.lsp import cli as lsp_cli
with patch("shutil.which", return_value=None):
notes = lsp_cli._backend_warnings()
assert notes == []
def test_backend_warnings_quiet_when_bash_and_shellcheck_both_present(tmp_path, monkeypatch):
"""Both installed → no warning."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from agent.lsp import cli as lsp_cli
def which(name):
return f"/usr/bin/{name}" # both found
with patch("shutil.which", side_effect=which):
notes = lsp_cli._backend_warnings()
assert notes == []
def test_backend_warnings_fires_when_bash_installed_but_shellcheck_missing(tmp_path, monkeypatch):
"""The exact scenario from the bug report."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from agent.lsp import cli as lsp_cli
def which(name):
if name == "bash-language-server":
return "/fake/bin/bash-language-server"
return None # shellcheck missing
with patch("shutil.which", side_effect=which):
notes = lsp_cli._backend_warnings()
assert len(notes) == 1
assert "shellcheck" in notes[0].lower()
assert "bash-language-server" in notes[0].lower()
def test_status_output_includes_backend_warnings_section(tmp_path, monkeypatch):
"""End-to-end: status command output includes the warning section."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# Pretend bash-language-server is installed but shellcheck is missing
def which(name):
if name == "bash-language-server":
return "/fake/bin/bash-language-server"
return None
from agent.lsp import cli as lsp_cli
buf = io.StringIO()
with patch("shutil.which", side_effect=which), redirect_stdout(buf):
lsp_cli._cmd_status(emit_json=False)
output = buf.getvalue()
assert "Backend warnings" in output
assert "shellcheck" in output
# ---------------------------------------------------------------------------
# Fix 3: tier-1 lint treats unusable linters as ``skipped``, not ``error``
# ---------------------------------------------------------------------------
def test_npx_tsc_missing_treated_as_skipped():
"""The original bug: ``npx tsc`` errors when tsc isn't installed.
Without this fix, the lint result is ``error``, which means the LSP
semantic tier (gated on ``success or skipped``) is skipped the user
gets a useless tooling-error message instead of real diagnostics.
"""
from tools.file_operations import _looks_like_linter_unusable
npx_failure_output = (
" \n"
" This is not the tsc command you are looking for \n"
" \n"
"\n"
"To get access to the TypeScript compiler, tsc, from the command line either:\n"
"- Use npm install typescript to first add TypeScript to your project before using npx\n"
)
assert _looks_like_linter_unusable("npx", npx_failure_output) is True
def test_real_lint_error_not_classified_as_unusable():
"""A genuine TypeScript type error must NOT be misclassified."""
from tools.file_operations import _looks_like_linter_unusable
real_error = (
"bad.ts:5:1 - error TS2322: Type 'number' is not assignable to type 'string'.\n"
"5 const x: string = greet(42);\n"
" ~~~~~~~~~~~~~~~\n"
)
assert _looks_like_linter_unusable("npx", real_error) is False
def test_unknown_base_cmd_returns_false():
"""Unfamiliar linters fall through and use the normal error path."""
from tools.file_operations import _looks_like_linter_unusable
assert _looks_like_linter_unusable("eslint", "any output") is False
assert _looks_like_linter_unusable("", "anything") is False
def test_check_lint_returns_skipped_when_npx_tsc_unusable(tmp_path):
"""Integration: _check_lint sees npx exit non-zero with the npx banner
and returns a ``skipped`` LintResult so LSP can still run."""
from tools.environments.local import LocalEnvironment
from tools.file_operations import ShellFileOperations
ts_file = tmp_path / "bad.ts"
ts_file.write_text("const x: string = 42;\n")
env = LocalEnvironment()
fops = ShellFileOperations(env)
# Patch _exec to simulate ``npx tsc`` failing because tsc is missing.
npx_banner = (
" \n"
" This is not the tsc command you are looking for \n"
)
def fake_exec(cmd, **kwargs):
result = MagicMock()
result.exit_code = 1
result.stdout = npx_banner
return result
with patch.object(fops, "_exec", side_effect=fake_exec), \
patch.object(fops, "_has_command", return_value=True):
lint = fops._check_lint(str(ts_file))
assert lint.skipped is True, (
f"expected skipped (so LSP runs); got success={lint.success}, "
f"output={lint.output!r}"
)
assert "not usable" in (lint.message or "")
def test_check_lint_returns_error_for_real_ts_type_errors(tmp_path):
"""Sanity: real TypeScript errors still go through the error path."""
from tools.environments.local import LocalEnvironment
from tools.file_operations import ShellFileOperations
ts_file = tmp_path / "bad.ts"
ts_file.write_text("const x: string = 42;\n")
env = LocalEnvironment()
fops = ShellFileOperations(env)
real_tsc_error = (
"bad.ts:1:7 - error TS2322: Type 'number' is not assignable to type 'string'.\n"
"1 const x: string = 42;\n"
" ~\n"
"Found 1 error.\n"
)
def fake_exec(cmd, **kwargs):
result = MagicMock()
result.exit_code = 1
result.stdout = real_tsc_error
return result
with patch.object(fops, "_exec", side_effect=fake_exec), \
patch.object(fops, "_has_command", return_value=True):
lint = fops._check_lint(str(ts_file))
assert lint.skipped is False
assert lint.success is False
assert "TS2322" in lint.output
if __name__ == "__main__": # pragma: no cover
pytest.main([__file__, "-v"])

View file

@ -0,0 +1,144 @@
"""Tests for service-singleton lifecycle: atexit handler, idempotent shutdown.
These cover the exit-cleanup behavior added to plug the language-server
process leak without the atexit hook, ``hermes chat`` exits while
pyright/gopls/etc. are still alive on the host.
"""
from __future__ import annotations
import atexit
from unittest.mock import MagicMock, patch
import pytest
from agent import lsp as lsp_module
@pytest.fixture(autouse=True)
def _reset_singleton():
"""Force a clean module state before each test.
Tests in this file share process-global state (the lazy
singleton + atexit registration flag); reset both before and
after every test so order doesn't matter.
"""
lsp_module._service = None
lsp_module._atexit_registered = False
yield
lsp_module._service = None
lsp_module._atexit_registered = False
def test_get_service_registers_atexit_handler_once(monkeypatch):
"""First call to ``get_service`` must register an atexit handler;
subsequent calls must NOT register another one (Python's ``atexit``
runs every registered callable, so a duplicate would shutdown
twice harmless but wasteful)."""
fake_svc = MagicMock()
fake_svc.is_active.return_value = True
monkeypatch.setattr(
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
)
registrations = []
def fake_register(fn):
registrations.append(fn)
monkeypatch.setattr(atexit, "register", fake_register)
a = lsp_module.get_service()
b = lsp_module.get_service()
c = lsp_module.get_service()
assert a is fake_svc
assert b is fake_svc
assert c is fake_svc
assert len(registrations) == 1
# The registered callable must be our internal shutdown wrapper.
assert registrations[0] is lsp_module._atexit_shutdown
def test_atexit_shutdown_calls_shutdown_service(monkeypatch):
"""The atexit-registered wrapper invokes ``shutdown_service`` and
swallows any exception by the time atexit fires, the user has
already seen the response and a noisy traceback would be clutter."""
called = []
monkeypatch.setattr(
lsp_module, "shutdown_service", lambda: called.append("shutdown")
)
lsp_module._atexit_shutdown()
assert called == ["shutdown"]
def test_atexit_shutdown_swallows_exceptions(monkeypatch):
def boom():
raise RuntimeError("server already dead")
monkeypatch.setattr(lsp_module, "shutdown_service", boom)
# Must not raise.
lsp_module._atexit_shutdown()
def test_shutdown_service_idempotent(monkeypatch):
"""Calling shutdown twice must be safe — first call cleans up,
second call no-ops (nothing to shut down)."""
fake_svc = MagicMock()
fake_svc.is_active.return_value = True
fake_svc.shutdown = MagicMock()
monkeypatch.setattr(
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
)
monkeypatch.setattr(atexit, "register", lambda fn: None)
lsp_module.get_service()
lsp_module.shutdown_service()
lsp_module.shutdown_service() # must not raise
assert fake_svc.shutdown.call_count == 1
def test_shutdown_service_no_op_when_never_started():
"""Calling shutdown without ever creating the service is safe."""
lsp_module.shutdown_service() # must not raise
def test_shutdown_service_swallows_exception(monkeypatch):
"""An exception during ``svc.shutdown()`` must not propagate —
the caller (often atexit) has nothing useful to do with it."""
fake_svc = MagicMock()
fake_svc.is_active.return_value = True
fake_svc.shutdown = MagicMock(side_effect=RuntimeError("kill -9 already"))
monkeypatch.setattr(
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
)
monkeypatch.setattr(atexit, "register", lambda fn: None)
lsp_module.get_service()
lsp_module.shutdown_service() # must not raise
def test_get_service_returns_none_for_inactive_service(monkeypatch):
"""A service whose ``is_active()`` returns False is treated as
not running callers see ``None`` and fall back."""
fake_svc = MagicMock()
fake_svc.is_active.return_value = False
monkeypatch.setattr(
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
)
monkeypatch.setattr(atexit, "register", lambda fn: None)
assert lsp_module.get_service() is None
# Subsequent call returns None too — but the inactive instance is
# cached so we don't re-build it on every check.
assert lsp_module.get_service() is None
def test_get_service_returns_none_when_create_fails(monkeypatch):
"""Service factory returning ``None`` (no config, etc.) propagates."""
monkeypatch.setattr(
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: None)
)
monkeypatch.setattr(atexit, "register", lambda fn: None)
assert lsp_module.get_service() is None

View file

@ -0,0 +1,197 @@
"""Tests for the LSP protocol framing layer.
The framer is small but load-bearing Content-Length parsing is the
single most common reason for hand-rolled LSP clients to silently
deadlock. These tests exercise:
- exact wire format of outgoing messages (encode_message)
- partial-read tolerance + EOF handling (read_message)
- envelope helpers (request, response, notification, error)
- message classification
"""
from __future__ import annotations
import asyncio
import json
import pytest
from agent.lsp.protocol import (
ERROR_CONTENT_MODIFIED,
ERROR_METHOD_NOT_FOUND,
LSPProtocolError,
LSPRequestError,
classify_message,
encode_message,
make_error_response,
make_notification,
make_request,
make_response,
read_message,
)
# ---------------------------------------------------------------------------
# encode_message
# ---------------------------------------------------------------------------
def test_encode_message_uses_compact_separators_and_utf8():
msg = {"jsonrpc": "2.0", "id": 1, "method": "x", "params": {"k": "ä"}}
out = encode_message(msg)
# Header is plain ASCII Content-Length CRLF CRLF
header_end = out.index(b"\r\n\r\n") + 4
header = out[:header_end].decode("ascii")
body = out[header_end:]
assert "Content-Length:" in header
declared = int(header.split("Content-Length:")[1].split("\r\n")[0].strip())
# Declared length must equal actual body bytes.
assert declared == len(body)
# Body parses as JSON and round-trips.
parsed = json.loads(body.decode("utf-8"))
assert parsed == msg
# Body uses compact separators (no spaces between kv).
assert b'"id":1' in body
def test_encode_message_handles_unicode_in_strings():
msg = {"jsonrpc": "2.0", "method": "log", "params": {"text": "🚀 ünıcödé"}}
out = encode_message(msg)
header_end = out.index(b"\r\n\r\n") + 4
declared = int(out[: out.index(b"\r\n")].split(b": ")[1])
assert declared == len(out[header_end:])
assert json.loads(out[header_end:].decode("utf-8")) == msg
# ---------------------------------------------------------------------------
# read_message
# ---------------------------------------------------------------------------
async def _stream_from_bytes(data: bytes) -> asyncio.StreamReader:
"""Build an asyncio.StreamReader pre-populated with ``data``."""
reader = asyncio.StreamReader()
reader.feed_data(data)
reader.feed_eof()
return reader
@pytest.mark.asyncio
async def test_read_message_round_trip():
msg = {"jsonrpc": "2.0", "method": "ping"}
reader = await _stream_from_bytes(encode_message(msg))
parsed = await read_message(reader)
assert parsed == msg
@pytest.mark.asyncio
async def test_read_message_clean_eof_returns_none():
reader = await _stream_from_bytes(b"")
assert await read_message(reader) is None
@pytest.mark.asyncio
async def test_read_message_truncated_body_raises():
msg = encode_message({"jsonrpc": "2.0", "method": "x"})
truncated = msg[: -3] # cut the body
reader = await _stream_from_bytes(truncated)
with pytest.raises(LSPProtocolError):
await read_message(reader)
@pytest.mark.asyncio
async def test_read_message_missing_content_length_raises():
bad = b"X-Other: 5\r\n\r\n12345"
reader = await _stream_from_bytes(bad)
with pytest.raises(LSPProtocolError):
await read_message(reader)
@pytest.mark.asyncio
async def test_read_message_two_messages_back_to_back():
a = encode_message({"jsonrpc": "2.0", "method": "a"})
b = encode_message({"jsonrpc": "2.0", "method": "b"})
reader = await _stream_from_bytes(a + b)
assert (await read_message(reader))["method"] == "a"
assert (await read_message(reader))["method"] == "b"
@pytest.mark.asyncio
async def test_read_message_rejects_runaway_header():
"""A pathological server that streams headers without ever emitting
the CRLF-CRLF terminator must not loop forever the 8 KiB cap kicks
in and surfaces a protocol error."""
flood = (b"X-Junk: " + b"A" * 200 + b"\r\n") * 60 # ~12 KiB worth
reader = await _stream_from_bytes(flood)
with pytest.raises(LSPProtocolError) as exc:
await read_message(reader)
assert "8 KiB" in str(exc.value)
# ---------------------------------------------------------------------------
# envelope helpers
# ---------------------------------------------------------------------------
def test_make_request_includes_id_and_method():
msg = make_request(7, "ping", {"v": 1})
assert msg == {"jsonrpc": "2.0", "id": 7, "method": "ping", "params": {"v": 1}}
def test_make_request_omits_params_when_none():
msg = make_request(7, "ping", None)
assert "params" not in msg
def test_make_notification_omits_id():
msg = make_notification("log", {"line": "hi"})
assert "id" not in msg
assert msg["method"] == "log"
def test_make_response_carries_result():
msg = make_response(7, {"ok": True})
assert msg["id"] == 7 and msg["result"] == {"ok": True}
def test_make_error_response_shape():
msg = make_error_response(7, ERROR_CONTENT_MODIFIED, "stale", {"hint": "retry"})
assert msg["error"]["code"] == ERROR_CONTENT_MODIFIED
assert msg["error"]["message"] == "stale"
assert msg["error"]["data"] == {"hint": "retry"}
# ---------------------------------------------------------------------------
# classify_message
# ---------------------------------------------------------------------------
def test_classify_message_request():
msg = {"jsonrpc": "2.0", "id": 1, "method": "x"}
assert classify_message(msg) == ("request", 1)
def test_classify_message_response():
msg = {"jsonrpc": "2.0", "id": 1, "result": None}
assert classify_message(msg) == ("response", 1)
def test_classify_message_notification():
msg = {"jsonrpc": "2.0", "method": "log"}
assert classify_message(msg) == ("notification", "log")
def test_classify_message_invalid():
assert classify_message({"id": 1})[0] == "invalid"
assert classify_message({"jsonrpc": "1.0", "method": "x"})[0] == "invalid"
# ---------------------------------------------------------------------------
# LSPRequestError
# ---------------------------------------------------------------------------
def test_lsp_request_error_carries_code_and_data():
e = LSPRequestError(ERROR_METHOD_NOT_FOUND, "no", {"x": 1})
assert e.code == ERROR_METHOD_NOT_FOUND
assert e.message == "no"
assert e.data == {"x": 1}

View file

@ -0,0 +1,94 @@
"""Tests for the diagnostic reporter (formatting layer)."""
from __future__ import annotations
from agent.lsp.reporter import (
DEFAULT_SEVERITIES,
MAX_PER_FILE,
format_diagnostic,
report_for_file,
truncate,
)
def _diag(line=0, col=0, sev=1, code="E001", source="ls", msg="oops"):
return {
"range": {
"start": {"line": line, "character": col},
"end": {"line": line, "character": col + 1},
},
"severity": sev,
"code": code,
"source": source,
"message": msg,
}
def test_format_diagnostic_uses_one_indexed_position():
line = format_diagnostic(_diag(line=4, col=2))
assert "[5:3]" in line # +1 on both
def test_format_diagnostic_includes_severity_label():
assert format_diagnostic(_diag(sev=1)).startswith("ERROR")
assert format_diagnostic(_diag(sev=2)).startswith("WARN")
assert format_diagnostic(_diag(sev=3)).startswith("INFO")
assert format_diagnostic(_diag(sev=4)).startswith("HINT")
def test_format_diagnostic_includes_code_and_source():
line = format_diagnostic(_diag(code="X42", source="src"))
assert "[X42]" in line
assert "(src)" in line
def test_format_diagnostic_omits_missing_optional_fields():
line = format_diagnostic(
{
"range": {
"start": {"line": 0, "character": 0},
"end": {"line": 0, "character": 0},
},
"severity": 1,
"message": "bare",
}
)
assert "[" not in line.split("]", 1)[1] # no extra brackets after the position
assert "(" not in line
def test_report_for_file_returns_empty_when_only_warnings():
"""Default severity filter is ERROR-only."""
report = report_for_file("/x.py", [_diag(sev=2)])
assert report == ""
def test_report_for_file_emits_block_with_errors():
diag = _diag(msg="real error")
report = report_for_file("/x.py", [diag])
assert "<diagnostics file=\"/x.py\">" in report
assert "real error" in report
assert "</diagnostics>" in report
def test_report_for_file_caps_at_max_per_file():
diags = [_diag(line=i) for i in range(MAX_PER_FILE + 5)]
report = report_for_file("/x.py", diags)
assert "and 5 more" in report
def test_report_for_file_respects_custom_severities():
diag = _diag(sev=2, msg="warn")
report = report_for_file("/x.py", [diag], severities=frozenset({1, 2}))
assert "warn" in report
def test_truncate_below_limit_unchanged():
s = "abc" * 100
assert truncate(s, limit=4000) == s
def test_truncate_above_limit_appends_marker():
s = "x" * 10000
out = truncate(s, limit=200)
assert out.endswith("[truncated]")
assert len(out) <= 200

View file

@ -0,0 +1,149 @@
"""Tests for the synchronous LSPService wrapper.
Drives the service through ``snapshot_baseline``
``get_diagnostics_sync`` against the mock LSP server, exercising the
delta filter that ``tools/file_operations._check_lint_delta`` relies
on.
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
import pytest
from agent.lsp.manager import LSPService
from agent.lsp.servers import (
SERVERS,
ServerContext,
ServerDef,
SpawnSpec,
find_server_for_file,
)
MOCK_SERVER = str(Path(__file__).parent / "_mock_lsp_server.py")
def _install_mock_server(monkeypatch, script: str = "errors", server_id: str = "pyright"):
"""Replace one registered server with a wrapper that spawns the mock.
We reuse ``pyright`` so .py files route to it. This keeps the
test free of any LSP toolchain dependency.
"""
target_index = next(i for i, s in enumerate(SERVERS) if s.server_id == server_id)
original = SERVERS[target_index]
def _spawn(root: str, ctx: ServerContext) -> SpawnSpec:
env = {"MOCK_LSP_SCRIPT": script}
return SpawnSpec(
command=[sys.executable, MOCK_SERVER],
workspace_root=root,
cwd=root,
env=env,
initialization_options={},
)
replacement = ServerDef(
server_id=server_id,
extensions=original.extensions,
resolve_root=lambda fp, ws: ws, # always use workspace root
build_spawn=_spawn,
seed_first_push=False,
description="mock " + server_id,
)
# Patch the SERVERS list element directly + restore on teardown.
SERVERS[target_index] = replacement
yield
SERVERS[target_index] = original
@pytest.fixture
def mock_pyright(monkeypatch, tmp_path):
"""Install the mock as ``pyright`` and create a fake git workspace."""
repo = tmp_path / "repo"
repo.mkdir()
(repo / ".git").mkdir()
(repo / "pyproject.toml").write_text("") # so pyright's root resolver finds it
monkeypatch.chdir(str(repo))
gen = _install_mock_server(monkeypatch, "errors", "pyright")
next(gen)
yield repo
try:
next(gen)
except StopIteration:
pass
def test_service_returns_empty_when_disabled(tmp_path):
svc = LSPService(
enabled=False,
wait_mode="document",
wait_timeout=2.0,
install_strategy="auto",
)
assert not svc.is_active()
f = tmp_path / "x.py"
f.write_text("")
assert svc.get_diagnostics_sync(str(f)) == []
svc.shutdown()
def test_service_skips_files_outside_workspace(tmp_path):
"""Files outside any git worktree must not trigger LSP."""
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=2.0,
install_strategy="manual",
)
f = tmp_path / "x.py"
f.write_text("")
# No .git anywhere — service should report not enabled for this file.
assert not svc.enabled_for(str(f))
svc.shutdown()
def test_service_e2e_delta_filter(mock_pyright):
"""End-to-end: snapshot baseline → wait → delta returned."""
repo = mock_pyright
f = repo / "x.py"
f.write_text("print('hi')\n")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=3.0,
install_strategy="manual",
)
try:
assert svc.enabled_for(str(f))
# Baseline first — server pushes 1 error.
svc.snapshot_baseline(str(f))
# Re-poll: same error is in baseline, so delta is empty.
new_diags = svc.get_diagnostics_sync(str(f))
assert new_diags == []
finally:
svc.shutdown()
def test_service_status_includes_clients(mock_pyright):
repo = mock_pyright
f = repo / "x.py"
f.write_text("")
svc = LSPService(
enabled=True,
wait_mode="document",
wait_timeout=3.0,
install_strategy="manual",
)
try:
svc.get_diagnostics_sync(str(f))
info = svc.get_status()
assert info["enabled"] is True
assert any(c["server_id"] == "pyright" for c in info["clients"])
finally:
svc.shutdown()

View file

@ -0,0 +1,139 @@
"""Tests for workspace + project-root resolution."""
from __future__ import annotations
import os
from pathlib import Path
import pytest
from agent.lsp.workspace import (
clear_cache,
find_git_worktree,
is_inside_workspace,
nearest_root,
normalize_path,
resolve_workspace_for_file,
)
@pytest.fixture(autouse=True)
def _clear():
clear_cache()
yield
clear_cache()
def test_find_git_worktree_returns_none_outside_repo(tmp_path: Path):
sub = tmp_path / "sub"
sub.mkdir()
assert find_git_worktree(str(sub)) is None
def test_find_git_worktree_finds_dotgit(tmp_path: Path):
repo = tmp_path / "repo"
repo.mkdir()
(repo / ".git").mkdir()
sub = repo / "src" / "deep"
sub.mkdir(parents=True)
assert find_git_worktree(str(sub)) == str(repo)
def test_find_git_worktree_handles_dotgit_file(tmp_path: Path):
"""``.git`` can also be a file (gitfile pointing into a worktree)."""
repo = tmp_path / "repo"
repo.mkdir()
(repo / ".git").write_text("gitdir: /elsewhere\n")
assert find_git_worktree(str(repo)) == str(repo)
def test_is_inside_workspace_true_for_subpath(tmp_path: Path):
root = tmp_path / "p"
root.mkdir()
sub = root / "x" / "y.py"
sub.parent.mkdir(parents=True)
sub.write_text("")
assert is_inside_workspace(str(sub), str(root))
def test_is_inside_workspace_false_for_unrelated(tmp_path: Path):
a = tmp_path / "a"
b = tmp_path / "b"
a.mkdir()
b.mkdir()
f = b / "x.py"
f.write_text("")
assert not is_inside_workspace(str(f), str(a))
def test_nearest_root_finds_first_marker(tmp_path: Path):
root = tmp_path / "p"
deep = root / "src" / "pkg"
deep.mkdir(parents=True)
(root / "pyproject.toml").write_text("")
found = nearest_root(str(deep / "mod.py"), ["pyproject.toml"])
assert found == str(root)
def test_nearest_root_excludes_take_priority(tmp_path: Path):
"""If an exclude marker matches first, return None."""
root = tmp_path / "p"
sub = root / "deno-app"
sub.mkdir(parents=True)
(sub / "deno.json").write_text("{}")
(root / "package.json").write_text("{}") # would match if not for exclude
found = nearest_root(
str(sub / "main.ts"),
["package.json"],
excludes=["deno.json"],
)
assert found is None
def test_nearest_root_returns_none_when_no_marker(tmp_path: Path):
f = tmp_path / "x.py"
f.write_text("")
assert nearest_root(str(f), ["pyproject.toml"]) is None
def test_resolve_workspace_for_file_uses_cwd_first(tmp_path: Path, monkeypatch):
repo = tmp_path / "repo"
(repo / ".git").mkdir(parents=True)
file_path = repo / "x.py"
file_path.write_text("")
# cwd is inside the repo
monkeypatch.chdir(str(repo))
root, gated = resolve_workspace_for_file(str(file_path))
assert root == str(repo)
assert gated is True
def test_resolve_workspace_for_file_no_repo_returns_none(tmp_path: Path, monkeypatch):
monkeypatch.chdir(str(tmp_path))
f = tmp_path / "x.py"
f.write_text("")
root, gated = resolve_workspace_for_file(str(f))
assert root is None
assert gated is False
def test_resolve_workspace_falls_back_to_file_location(tmp_path: Path, monkeypatch):
"""When cwd isn't a git repo but the file is inside one, we still
discover the workspace from the file's path."""
not_a_repo = tmp_path / "loose"
not_a_repo.mkdir()
monkeypatch.chdir(str(not_a_repo))
repo = tmp_path / "actual-repo"
(repo / ".git").mkdir(parents=True)
f = repo / "x.py"
f.write_text("")
root, gated = resolve_workspace_for_file(str(f))
assert root == str(repo)
assert gated is True
def test_normalize_path_expands_tilde(monkeypatch):
monkeypatch.setenv("HOME", "/home/user")
p = normalize_path("~/x.py")
assert p == os.path.abspath("/home/user/x.py")

View file

@ -660,6 +660,7 @@ class TestAuxiliaryPoolAwareness:
with (
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
):
from agent.auxiliary_client import _try_nous

View file

@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"
# =========================================================================
# Nous Portal context-window resolution (provider="nous")
# =========================================================================
class TestNousPortalContextResolution:
"""Nous Portal /v1/models is authoritative for what Nous infra enforces
and may diverge from the OpenRouter catalog.
Invariants this class pins down:
1. Portal value wins over the OR fallback.
2. Portal-derived values are persisted to disk.
3. OR-fallback values are NEVER persisted otherwise a single portal
blip would freeze the wrong value in via step-1 cache short-circuit.
4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
bypassed at step 1 and overwritten once the portal responds.
5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
is unreachable no opportunistic invalidation that loses the only
value we have.
"""
def setup_method(self):
import agent.model_metadata as mm
mm._endpoint_model_metadata_cache.clear()
mm._endpoint_model_metadata_cache_time.clear()
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_portal_value_wins_over_openrouter_catalog(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""The motivating case: OR catalog says 1M for qwen3.6-plus, but
the Nous portal correctly enforces 262144. Portal must win."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url="https://inference-api.nousresearch.com/v1",
api_key="fake-token",
provider="nous",
)
assert ctx == 262_144, (
f"Portal must override OR catalog; got {ctx} (OR leak?)"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_portal_value_is_persisted_to_disk(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Portal-derived value should land in the persistent cache so
cross-process callers (e.g. child agents) see the same value."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
base_url = "https://inference-api.nousresearch.com/v1"
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 262_144
persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
"Portal-derived value should be persisted to disk"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_openrouter_fallback_is_not_persisted(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""When the portal can't resolve a model (network blip, auth glitch,
model not yet listed) we fall back to the OR catalog so the agent
keeps working but we must NOT write the OR value to disk. Once
cached on disk, step-1 short-circuits forever and the user is stuck
with the wrong number until they manually clear the cache."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
mock_portal.return_value = {} # portal unreachable / model unknown
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
base_url = "https://inference-api.nousresearch.com/v1"
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 1_000_000, "OR fallback should still serve the request"
assert not cache_file.exists() or not yaml.safe_load(
cache_file.read_text()
).get("context_lengths", {}), (
"OR-fallback values must NOT be persisted — a single portal blip "
"would otherwise freeze the wrong value in via step-1 cache hit"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_stale_cache_is_bypassed_and_overwritten_by_portal(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
1000000`` (OR-derived) sitting in their cache file. Step 1 must
NOT short-circuit on that entry step 5b reconciles against the
portal and overwrites the persistent value with 262144."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
stale_key = f"qwen3.6-plus@{base_url}"
other_key = "other-model@https://api.openai.com/v1"
cache_file.write_text(yaml.dump({"context_lengths": {
stale_key: 1_000_000, # pre-fix OR-derived value
other_key: 128_000, # unrelated, must survive
}}))
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
assert ctx == 262_144, (
f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
)
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert remaining.get(stale_key) == 262_144, (
"Portal value should have overwritten the stale entry on disk"
)
assert remaining.get(other_key) == 128_000, (
"Unrelated cache entries must not be touched"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_stale_cache_survives_when_portal_unreachable(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""When the portal is unreachable AND we have a (potentially stale)
on-disk cache entry, the entry must survive untouched we don't
want a transient outage to delete the only value we have. The
request itself still gets served via OR fallback for this call."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
existing_key = f"qwen3.6-plus@{base_url}"
cache_file.write_text(yaml.dump({"context_lengths": {
existing_key: 1_000_000,
}}))
mock_portal.return_value = {} # portal unreachable
mock_or.return_value = {
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
}
mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider="nous",
)
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
assert remaining.get(existing_key) == 1_000_000, (
"Persistent cache entry must survive a transient portal outage"
)
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
@patch("agent.model_metadata.fetch_model_metadata")
def test_bypass_keyed_on_url_not_provider_string(
self, mock_or, mock_portal, tmp_path, monkeypatch
):
"""Some call sites pass ``provider=""`` or ``provider="openrouter"``
when the user is really on Nous Portal (e.g. cred-pool fallback).
The Nous-URL bypass must trigger off the URL host, not the provider
string, so the portal-first resolver still runs in that case."""
import agent.model_metadata as mm
cache_file = tmp_path / "context_length_cache.yaml"
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
base_url = "https://inference-api.nousresearch.com/v1"
cache_file.write_text(yaml.dump({"context_lengths": {
f"qwen3.6-plus@{base_url}": 1_000_000, # stale
}}))
mock_portal.return_value = {
"qwen3.6-plus": {"context_length": 262_144},
}
mock_or.return_value = {}
for provider_arg in ("", "openrouter", "custom"):
mm._endpoint_model_metadata_cache.clear()
mm._endpoint_model_metadata_cache_time.clear()
ctx = mm.get_model_context_length(
model="qwen3.6-plus",
base_url=base_url,
api_key="fake",
provider=provider_arg,
)
assert ctx == 262_144, (
f"URL-based Nous detection must fire for provider={provider_arg!r}; "
f"got {ctx}"
)
# =========================================================================
# get_model_context_length — resolution order
# =========================================================================

View file

@ -190,3 +190,37 @@ def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
assert float(entry.input_cost_per_million) == 0.5
assert float(entry.output_cost_per_million) == 2.0
def test_deepseek_v4_pro_pricing_entry_exists():
"""Regression test: deepseek-v4-pro must have a pricing entry.
Before this fix, deepseek-v4-pro sessions showed as unknown cost
in hermes insights because the _OFFICIAL_DOCS_PRICING table had no
entry for that model. See #24218.
"""
entry = get_pricing_entry(
"deepseek-v4-pro",
provider="deepseek",
)
assert entry is not None
assert entry.input_cost_per_million is not None
assert entry.output_cost_per_million is not None
assert float(entry.input_cost_per_million) == 1.74
assert float(entry.output_cost_per_million) == 3.48
assert float(entry.cache_read_cost_per_million) == 0.0145
def test_deepseek_v4_pro_estimate_usage_cost():
"""Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown."""
result = estimate_usage_cost(
"deepseek-v4-pro",
CanonicalUsage(input_tokens=1000000, output_tokens=500000),
provider="deepseek",
)
assert result.status == "estimated"
assert result.amount_usd is not None
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
assert float(result.amount_usd) == 3.48

View file

@ -0,0 +1,43 @@
from unittest.mock import MagicMock, patch
from cli import HermesCLI
class _InsightsEngineStub:
calls = []
def __init__(self, db):
self.db = db
def generate(self, *, days=30, source=None):
self.calls.append({"days": days, "source": source})
return {"days": days, "source": source}
def format_terminal(self, report):
return f"days={report['days']} source={report['source']}"
def _run_show_insights(command: str):
cli_obj = HermesCLI.__new__(HermesCLI)
db = MagicMock()
_InsightsEngineStub.calls = []
with patch("hermes_state.SessionDB", return_value=db), \
patch("agent.insights.InsightsEngine", _InsightsEngineStub):
cli_obj._show_insights(command)
return _InsightsEngineStub.calls, db
def test_cli_insights_accepts_positional_days(capsys):
calls, db = _run_show_insights("/insights 7")
assert calls == [{"days": 7, "source": None}]
db.close.assert_called_once()
assert "days=7 source=None" in capsys.readouterr().out
def test_cli_insights_keeps_days_flag_and_source(capsys):
calls, db = _run_show_insights("/insights --days 14 --source discord")
assert calls == [{"days": 14, "source": "discord"}]
db.close.assert_called_once()
assert "days=14 source=discord" in capsys.readouterr().out

View file

@ -222,6 +222,9 @@ def make_runner(platform: Platform, session_entry: SessionEntry = None) -> "Gate
runner._capture_gateway_honcho_if_configured = lambda *a, **kw: None
runner._emit_gateway_run_progress = AsyncMock()
# Disable destructive slash confirm gate so /new executes immediately
runner._read_user_config = lambda: {"approvals": {"destructive_slash_confirm": False}}
runner.pairing_store = MagicMock()
runner.pairing_store._is_rate_limited = MagicMock(return_value=False)
runner.pairing_store.generate_code = MagicMock(return_value="ABC123")

View file

@ -681,6 +681,56 @@ class TestChatCompletionsEndpoint:
assert "[DONE]" in body
assert "Hello!" in body
@pytest.mark.asyncio
async def test_stream_task_done_callback_enqueues_eos_for_chat_completions(self, adapter):
"""Regression guard for #24451: completion callback must signal SSE EOS."""
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
class _FakeTask:
def __init__(self):
self.callbacks = []
def add_done_callback(self, cb):
self.callbacks.append(cb)
fake_task = _FakeTask()
def _fake_ensure_future(coro):
# We short-circuit task scheduling in this unit test.
coro.close()
return fake_task
with (
patch.object(
adapter,
"_run_agent",
new=AsyncMock(
return_value=(
{"final_response": "ok", "messages": [], "api_calls": 1},
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
)
),
),
patch("gateway.platforms.api_server.asyncio.ensure_future", side_effect=_fake_ensure_future),
patch.object(adapter, "_write_sse_chat_completion", new_callable=AsyncMock) as mock_write_sse,
):
mock_write_sse.return_value = web.Response(status=200, text="ok")
resp = await cli.post(
"/v1/chat/completions",
json={
"model": "test",
"messages": [{"role": "user", "content": "hi"}],
"stream": True,
},
)
assert resp.status == 200
assert len(fake_task.callbacks) == 1
stream_q = mock_write_sse.call_args.args[4]
assert stream_q.empty()
fake_task.callbacks[0](fake_task)
assert stream_q.get_nowait() is None
@pytest.mark.asyncio
async def test_stream_sends_keepalive_during_quiet_tool_gap(self, adapter):
"""Idle SSE streams should send keepalive comments while tools run silently."""
@ -1676,6 +1726,52 @@ class TestResponsesStreaming:
assert "Hello" in body
assert " world" in body
@pytest.mark.asyncio
async def test_stream_task_done_callback_enqueues_eos_for_responses(self, adapter):
"""Regression guard for #24451 on /v1/responses streaming path."""
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
class _FakeTask:
def __init__(self):
self.callbacks = []
def add_done_callback(self, cb):
self.callbacks.append(cb)
fake_task = _FakeTask()
def _fake_ensure_future(coro):
# We short-circuit task scheduling in this unit test.
coro.close()
return fake_task
with (
patch.object(
adapter,
"_run_agent",
new=AsyncMock(
return_value=(
{"final_response": "ok", "messages": [], "api_calls": 1},
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
)
),
),
patch("gateway.platforms.api_server.asyncio.ensure_future", side_effect=_fake_ensure_future),
patch.object(adapter, "_write_sse_responses", new_callable=AsyncMock) as mock_write_sse,
):
mock_write_sse.return_value = web.Response(status=200, text="ok")
resp = await cli.post(
"/v1/responses",
json={"model": "hermes-agent", "input": "hi", "stream": True},
)
assert resp.status == 200
assert len(fake_task.callbacks) == 1
stream_q = mock_write_sse.call_args.kwargs["stream_q"]
assert stream_q.empty()
fake_task.callbacks[0](fake_task)
assert stream_q.get_nowait() is None
@pytest.mark.asyncio
async def test_stream_emits_function_call_and_output_items(self, adapter):
app = _create_app(adapter)
@ -3061,4 +3157,3 @@ class TestSessionKeyHeader:
assert resp.status == 200
data = await resp.json()
assert data["features"]["session_key_header"] == "X-Hermes-Session-Key"

View file

@ -176,8 +176,8 @@ class TestStreamingConfig:
"fresh_final_after_seconds": "oops",
}
)
assert restored.edit_interval == 1.0
assert restored.buffer_threshold == 40
assert restored.edit_interval == 0.8
assert restored.buffer_threshold == 24
assert restored.fresh_final_after_seconds == 60.0

View file

@ -444,6 +444,93 @@ class TestScopedLocks:
assert acquired is False
assert existing["pid"] == 99999
def test_acquire_scoped_lock_replaces_pid_reused_by_unrelated_process(self, tmp_path, monkeypatch):
"""macOS regression: PID reused by an unrelated process with start_time=None.
On macOS /proc is unavailable, so both the lock record and the live
process report start_time=None. The live PID is alive (os.kill
succeeds) but belongs to a completely different program. The lock
must be treated as stale.
"""
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock_path.write_text(json.dumps({
"pid": 873,
"start_time": None,
"kind": "hermes-gateway",
"argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
}))
# Post-#21561 the liveness probe routes through
# ``gateway.status._pid_exists`` (psutil-first, safe on Windows),
# not ``os.kill``.
monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
# On macOS ``ps`` is available, so _read_process_cmdline returns the
# unrelated process's name. This confirms the PID was reused.
monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: "/usr/libexec/bluetoothuserd")
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
assert acquired is True
payload = json.loads(lock_path.read_text())
assert payload["pid"] == os.getpid()
assert payload["metadata"]["platform"] == "telegram"
def test_acquire_scoped_lock_keeps_lock_when_cmdline_unreadable_but_record_is_gateway(self, tmp_path, monkeypatch):
"""Windows regression: ps unavailable so cmdline cannot be read.
When start_time is None on both sides and _looks_like_gateway_process
returns False because ps is missing (not because the PID belongs to an
unrelated process), the stale check must not delete a valid gateway
lock. Fall back to the lock record's own argv — written by the
gateway at startup before declaring the lock stale.
"""
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock_path.write_text(json.dumps({
"pid": 99999,
"start_time": None,
"kind": "hermes-gateway",
"argv": ["hermes_cli/main.py", "gateway", "run"],
}))
monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
# Windows: ps not available, so _read_process_cmdline returns None
# and _looks_like_gateway_process returns False for every process.
monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
assert acquired is False
assert existing["pid"] == 99999
def test_acquire_scoped_lock_keeps_lock_when_pid_reused_by_gateway(self, tmp_path, monkeypatch):
"""When start_time is None but the live PID still looks like a gateway, keep the lock."""
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
lock_path.parent.mkdir(parents=True, exist_ok=True)
lock_path.write_text(json.dumps({
"pid": 99999,
"start_time": None,
"kind": "hermes-gateway",
"argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
}))
monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: True)
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
assert acquired is False
assert existing["pid"] == 99999
def test_acquire_scoped_lock_replaces_stale_record(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
@ -811,3 +898,46 @@ class TestPlannedStopMarker:
ok = status.write_planned_stop_marker(target_pid=12345)
assert ok is False
class TestReadProcessCmdlinePsFallback:
"""Tests for _read_process_cmdline falling back to ps on non-Linux."""
def test_ps_fallback_when_proc_unavailable(self, monkeypatch):
monkeypatch.setattr(status.Path, "read_bytes", lambda self: (_ for _ in ()).throw(FileNotFoundError))
monkeypatch.setattr(
status.subprocess, "run",
lambda args, **kwargs: SimpleNamespace(returncode=0, stdout="/usr/libexec/bluetoothuserd\n"),
)
result = status._read_process_cmdline(873)
assert result == "/usr/libexec/bluetoothuserd"
def test_ps_fallback_returns_none_on_failure(self, monkeypatch):
monkeypatch.setattr(status.Path, "read_bytes", lambda self: (_ for _ in ()).throw(FileNotFoundError))
monkeypatch.setattr(
status.subprocess, "run",
lambda args, **kwargs: SimpleNamespace(returncode=1, stdout=""),
)
result = status._read_process_cmdline(99999)
assert result is None
def test_proc_cmdline_takes_priority_over_ps(self, monkeypatch):
calls = []
def fake_read_bytes(self):
calls.append("proc")
return b"python\x00hermes_cli/main.py\x00gateway\x00"
monkeypatch.setattr(status.Path, "read_bytes", fake_read_bytes)
result = status._read_process_cmdline(12345)
assert "hermes_cli/main.py" in result
assert calls == ["proc"]
def test_ps_fallback_used_when_proc_returns_empty(self, monkeypatch):
monkeypatch.setattr(status.Path, "read_bytes", lambda self: b"")
monkeypatch.setattr(
status.subprocess, "run",
lambda args, **kwargs: SimpleNamespace(returncode=0, stdout="python hermes_cli/main.py gateway run\n"),
)
result = status._read_process_cmdline(12345)
assert "hermes_cli/main.py" in result

View file

@ -0,0 +1,451 @@
"""Tests for Telegram inline keyboard clarify buttons.
Mirrors test_telegram_approval_buttons.py for the new ``send_clarify`` and
``cl:`` callback dispatch added in feat/clarify-gateway-buttons.
"""
import asyncio
import os
import sys
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Ensure the repo root is importable
# ---------------------------------------------------------------------------
_repo = str(Path(__file__).resolve().parents[2])
if _repo not in sys.path:
sys.path.insert(0, _repo)
# ---------------------------------------------------------------------------
# Minimal Telegram mock so TelegramAdapter can be imported (mirrors
# test_telegram_approval_buttons.py)
# ---------------------------------------------------------------------------
def _ensure_telegram_mock():
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
return
mod = MagicMock()
mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
mod.constants.ParseMode.MARKDOWN = "Markdown"
mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
mod.constants.ParseMode.HTML = "HTML"
mod.constants.ChatType.PRIVATE = "private"
mod.constants.ChatType.GROUP = "group"
mod.constants.ChatType.SUPERGROUP = "supergroup"
mod.constants.ChatType.CHANNEL = "channel"
mod.error.NetworkError = type("NetworkError", (OSError,), {})
mod.error.TimedOut = type("TimedOut", (OSError,), {})
mod.error.BadRequest = type("BadRequest", (Exception,), {})
for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
sys.modules.setdefault(name, mod)
sys.modules.setdefault("telegram.error", mod.error)
_ensure_telegram_mock()
from gateway.platforms.telegram import TelegramAdapter
from gateway.config import Platform, PlatformConfig
def _make_adapter(extra=None):
config = PlatformConfig(enabled=True, token="test-token", extra=extra or {})
adapter = TelegramAdapter(config)
adapter._bot = AsyncMock()
adapter._app = MagicMock()
return adapter
def _clear_clarify_state():
from tools import clarify_gateway as cm
with cm._lock:
cm._entries.clear()
cm._session_index.clear()
cm._notify_cbs.clear()
# ===========================================================================
# send_clarify — render
# ===========================================================================
class TestTelegramSendClarify:
"""Verify the rendered prompt has buttons or none, and stores state."""
def setup_method(self):
_clear_clarify_state()
@pytest.mark.asyncio
async def test_multi_choice_renders_buttons_and_other(self):
adapter = _make_adapter()
mock_msg = MagicMock()
mock_msg.message_id = 100
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
result = await adapter.send_clarify(
chat_id="12345",
question="Which option?",
choices=["alpha", "beta", "gamma"],
clarify_id="cid1",
session_key="sk1",
)
assert result.success is True
assert result.message_id == "100"
kwargs = adapter._bot.send_message.call_args[1]
assert kwargs["chat_id"] == 12345
assert "Which option?" in kwargs["text"]
# InlineKeyboardMarkup with N+1 buttons (3 choices + Other)
markup = kwargs["reply_markup"]
assert markup is not None
# Mocked InlineKeyboardMarkup — just verify it was constructed
# with rows. We check state instead of poking the mock structure.
assert "cid1" in adapter._clarify_state
assert adapter._clarify_state["cid1"] == "sk1"
@pytest.mark.asyncio
async def test_open_ended_no_keyboard(self):
adapter = _make_adapter()
mock_msg = MagicMock()
mock_msg.message_id = 101
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
result = await adapter.send_clarify(
chat_id="12345",
question="What is your name?",
choices=None,
clarify_id="cid2",
session_key="sk2",
)
assert result.success is True
kwargs = adapter._bot.send_message.call_args[1]
# No reply_markup means no buttons — open-ended path
assert "reply_markup" not in kwargs
assert "What is your name?" in kwargs["text"]
assert adapter._clarify_state["cid2"] == "sk2"
@pytest.mark.asyncio
async def test_not_connected(self):
adapter = _make_adapter()
adapter._bot = None
result = await adapter.send_clarify(
chat_id="12345",
question="?",
choices=["a"],
clarify_id="cid3",
session_key="sk3",
)
assert result.success is False
@pytest.mark.asyncio
async def test_truncates_long_choice_label(self):
adapter = _make_adapter()
mock_msg = MagicMock()
mock_msg.message_id = 102
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
long_choice = "x" * 200 # > 60 char cap
result = await adapter.send_clarify(
chat_id="12345",
question="?",
choices=[long_choice],
clarify_id="cid4",
session_key="sk4",
)
assert result.success is True
# The truncation logic replaces with "..." past 57 chars; we don't
# inspect the mock's button labels directly (auto-MagicMock), but
# we can verify the call didn't raise on absurdly long input.
@pytest.mark.asyncio
async def test_html_escapes_question(self):
adapter = _make_adapter()
mock_msg = MagicMock()
mock_msg.message_id = 103
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
await adapter.send_clarify(
chat_id="12345",
question="<script>alert(1)</script>",
choices=["x"],
clarify_id="cid5",
session_key="sk5",
)
kwargs = adapter._bot.send_message.call_args[1]
# Must NOT contain raw <script> — html.escape should have neutralized
assert "<script>" not in kwargs["text"]
assert "&lt;script&gt;" in kwargs["text"]
# ===========================================================================
# Callback dispatch — _handle_callback_query routing for cl:* prefixes
# ===========================================================================
class TestTelegramClarifyCallback:
"""Verify clicking a button resolves the clarify primitive."""
def setup_method(self):
_clear_clarify_state()
@pytest.mark.asyncio
async def test_numeric_choice_resolves_with_choice_text(self):
from tools import clarify_gateway as cm
adapter = _make_adapter()
# Pre-register a clarify entry so the callback can look up the choice text
cm.register("cidA", "sk-cb", "Pick", ["red", "green", "blue"])
adapter._clarify_state["cidA"] = "sk-cb"
query = AsyncMock()
query.data = "cl:cidA:1" # green
query.message = MagicMock()
query.message.chat_id = 12345
query.message.text = "Pick"
query.from_user = MagicMock()
query.from_user.id = "777"
query.from_user.first_name = "Tester"
query.answer = AsyncMock()
query.edit_message_text = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
await adapter._handle_callback_query(update, context)
# State popped
assert "cidA" not in adapter._clarify_state
# Wait shouldn't be needed — resolve_gateway_clarify is sync.
# The entry's response should be set.
# We test by reading the entry's response directly.
with cm._lock:
entry = cm._entries.get("cidA")
# Entry might be popped by wait_for_response, but here we never
# called wait — so it's still in _entries with response set.
assert entry is not None
assert entry.response == "green"
assert entry.event.is_set()
query.answer.assert_called_once()
query.edit_message_text.assert_called_once()
@pytest.mark.asyncio
async def test_other_button_flips_to_text_mode(self):
from tools import clarify_gateway as cm
adapter = _make_adapter()
cm.register("cidB", "sk-cb-other", "Pick", ["x", "y"])
adapter._clarify_state["cidB"] = "sk-cb-other"
query = AsyncMock()
query.data = "cl:cidB:other"
query.message = MagicMock()
query.message.chat_id = 12345
query.message.text = "Pick"
query.from_user = MagicMock()
query.from_user.id = "777"
query.from_user.first_name = "Tester"
query.answer = AsyncMock()
query.edit_message_text = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
await adapter._handle_callback_query(update, context)
# Entry should now be in text-capture mode
pending = cm.get_pending_for_session("sk-cb-other")
assert pending is not None
assert pending.clarify_id == "cidB"
assert pending.awaiting_text is True
# State NOT popped — the user still needs to type their answer
assert "cidB" in adapter._clarify_state
# Entry NOT yet resolved
with cm._lock:
entry = cm._entries.get("cidB")
assert entry is not None
assert not entry.event.is_set()
@pytest.mark.asyncio
async def test_already_resolved(self):
adapter = _make_adapter()
# No state for cidGone
query = AsyncMock()
query.data = "cl:cidGone:0"
query.message = MagicMock()
query.message.chat_id = 12345
query.from_user = MagicMock()
query.from_user.id = "777"
query.from_user.first_name = "Tester"
query.answer = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
await adapter._handle_callback_query(update, context)
query.answer.assert_called_once()
# Should NOT resolve anything
assert "already" in query.answer.call_args[1]["text"].lower()
@pytest.mark.asyncio
async def test_unauthorized_user_rejected(self):
from tools import clarify_gateway as cm
adapter = _make_adapter()
cm.register("cidC", "sk-auth", "Pick", ["a", "b"])
adapter._clarify_state["cidC"] = "sk-auth"
# Hook up a runner that says NOT authorized
class _DenyRunner:
async def _handle_message(self, event):
return None
def _is_user_authorized(self, source):
return False
adapter._message_handler = _DenyRunner()._handle_message
query = AsyncMock()
query.data = "cl:cidC:0"
query.message = MagicMock()
query.message.chat_id = 12345
query.message.chat.type = "private"
query.message.text = "Pick"
query.from_user = MagicMock()
query.from_user.id = "999"
query.from_user.first_name = "Mallory"
query.answer = AsyncMock()
query.edit_message_text = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
await adapter._handle_callback_query(update, context)
# Must not resolve, must answer with not-authorized message
with cm._lock:
entry = cm._entries.get("cidC")
assert entry is not None
assert not entry.event.is_set()
query.answer.assert_called_once()
assert "not authorized" in query.answer.call_args[1]["text"].lower()
# State preserved
assert adapter._clarify_state["cidC"] == "sk-auth"
@pytest.mark.asyncio
async def test_invalid_choice_token(self):
from tools import clarify_gateway as cm
adapter = _make_adapter()
cm.register("cidD", "sk-inv", "Q?", ["a"])
adapter._clarify_state["cidD"] = "sk-inv"
query = AsyncMock()
query.data = "cl:cidD:not-a-number"
query.message = MagicMock()
query.message.chat_id = 12345
query.message.text = "Q?"
query.from_user = MagicMock()
query.from_user.id = "777"
query.from_user.first_name = "Tester"
query.answer = AsyncMock()
update = MagicMock()
update.callback_query = query
context = MagicMock()
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
await adapter._handle_callback_query(update, context)
with cm._lock:
entry = cm._entries.get("cidD")
assert entry is not None
assert not entry.event.is_set()
query.answer.assert_called_once()
assert "invalid" in query.answer.call_args[1]["text"].lower()
# ===========================================================================
# Base adapter fallback render — text numbered list
# ===========================================================================
class TestBaseAdapterClarifyFallback:
"""Adapters without button overrides should render numbered text."""
@pytest.mark.asyncio
async def test_numbered_text_fallback(self):
from gateway.platforms.base import BasePlatformAdapter, SendResult
# Subclass just enough to instantiate
class _Stub(BasePlatformAdapter):
name = "stub"
def __init__(self):
# Skip base __init__ — we're not exercising it
self.sent: list = []
async def connect(self): pass
async def disconnect(self): pass
async def send(self, chat_id, content, **kw):
self.sent.append({"chat_id": chat_id, "content": content})
return SendResult(success=True, message_id="1")
async def edit(self, *a, **k): return SendResult(success=False)
async def get_history(self, *a, **k): return []
async def get_chat_info(self, *a, **k): return {}
adapter = _Stub()
result = await adapter.send_clarify(
chat_id="c",
question="Pick a fruit",
choices=["apple", "banana"],
clarify_id="x",
session_key="s",
)
assert result.success is True
assert len(adapter.sent) == 1
text = adapter.sent[0]["content"]
assert "Pick a fruit" in text
assert "1." in text and "apple" in text
assert "2." in text and "banana" in text
@pytest.mark.asyncio
async def test_open_ended_fallback_renders_question_only(self):
from gateway.platforms.base import BasePlatformAdapter, SendResult
class _Stub(BasePlatformAdapter):
name = "stub"
def __init__(self):
self.sent: list = []
async def connect(self): pass
async def disconnect(self): pass
async def send(self, chat_id, content, **kw):
self.sent.append(content)
return SendResult(success=True, message_id="1")
async def edit(self, *a, **k): return SendResult(success=False)
async def get_history(self, *a, **k): return []
async def get_chat_info(self, *a, **k): return {}
adapter = _Stub()
await adapter.send_clarify(
chat_id="c",
question="Free form?",
choices=None,
clarify_id="x",
session_key="s",
)
assert "Free form?" in adapter.sent[0]
# No numbered list — choices were empty
assert "1." not in adapter.sent[0]

View file

@ -218,17 +218,62 @@ async def test_on_processing_complete_skipped_when_disabled(monkeypatch):
@pytest.mark.asyncio
async def test_on_processing_complete_cancelled_keeps_existing_reaction(monkeypatch):
"""Expected cancellation should not replace the in-progress reaction."""
async def test_on_processing_complete_cancelled_clears_reaction(monkeypatch):
"""Cancelled processing should clear the in-progress reaction.
Without this clear, the 👀 reaction lingers on the user's message
indefinitely (until another agent run swaps it for 👍/👎). On a
``/stop`` that ends a session, that reaction never gets cleaned up.
"""
monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
adapter = _make_adapter()
event = _make_event()
await adapter.on_processing_complete(event, ProcessingOutcome.CANCELLED)
# set_message_reaction with reaction=None clears all reactions on the
# message (Bot API documented semantics; equivalent to Bot API 10.0's
# deleteMessageReaction but works on PTB 22.6 already).
adapter._bot.set_message_reaction.assert_awaited_once_with(
chat_id=123,
message_id=456,
reaction=None,
)
@pytest.mark.asyncio
async def test_on_processing_complete_cancelled_skipped_when_disabled(monkeypatch):
"""Cancelled processing should not call the API when reactions are off."""
monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
adapter = _make_adapter()
event = _make_event()
await adapter.on_processing_complete(event, ProcessingOutcome.CANCELLED)
adapter._bot.set_message_reaction.assert_not_awaited()
@pytest.mark.asyncio
async def test_clear_reactions_handles_api_error_gracefully(monkeypatch):
"""API errors during clear should not propagate."""
monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
adapter = _make_adapter()
adapter._bot.set_message_reaction = AsyncMock(side_effect=RuntimeError("no perms"))
result = await adapter._clear_reactions("123", "456")
assert result is False
@pytest.mark.asyncio
async def test_clear_reactions_returns_false_without_bot(monkeypatch):
"""_clear_reactions should return False when bot is not available."""
adapter = _make_adapter()
adapter._bot = None
result = await adapter._clear_reactions("123", "456")
assert result is False
# ── config.py bridging ───────────────────────────────────────────────

View file

@ -8,7 +8,7 @@ only renders as a voice bubble when explicitly flagged) and via
"""
from types import SimpleNamespace
from unittest.mock import AsyncMock
from unittest.mock import AsyncMock, MagicMock
import pytest
@ -106,6 +106,16 @@ async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_
adapter.send_document.assert_not_awaited()
def _fake_runner(thread_meta):
"""Build a fake GatewayRunner-like object with the helper methods needed by
_deliver_media_from_response."""
runner = SimpleNamespace(
_thread_metadata_for_source=lambda source, anchor=None: thread_meta,
_reply_anchor_for_event=lambda event: None,
)
return runner
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender():
event = _event(thread_id="topic-1")
@ -121,7 +131,7 @@ async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sen
)
await GatewayRunner._deliver_media_from_response(
object(),
_fake_runner({"thread_id": "topic-1"}),
"MEDIA:/tmp/speech.flac",
event,
adapter,
@ -150,7 +160,7 @@ async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_doc
)
await GatewayRunner._deliver_media_from_response(
object(),
_fake_runner({"thread_id": "topic-1"}),
"MEDIA:/tmp/speech.ogg",
event,
adapter,
@ -181,7 +191,7 @@ async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(
)
await GatewayRunner._deliver_media_from_response(
object(),
_fake_runner({"thread_id": "topic-1"}),
"MEDIA:/tmp/speech.mp3",
event,
adapter,

View file

@ -45,6 +45,9 @@ def _make_runner(hermes_home=None):
runner._pending_messages = {}
runner._pending_approvals = {}
runner._failed_platforms = {}
# config is accessed by _check_slash_access and quick_commands lookup;
# None makes policy_for_source return a disabled (allow-all) policy.
runner.config = None
# Bypass the destructive-slash confirm gate — this test exercises
# update-prompt interception, not the confirm prompt.
runner._read_user_config = lambda: {

View file

@ -129,7 +129,7 @@ class TestVerboseCommand:
@pytest.mark.asyncio
async def test_defaults_to_all_when_no_tool_progress_set(self, tmp_path, monkeypatch):
"""When tool_progress is not in config, defaults to 'all' then cycles to verbose."""
"""When tool_progress is not in config, defaults to platform default then cycles."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
@ -143,17 +143,17 @@ class TestVerboseCommand:
runner = _make_runner()
result = await runner._handle_verbose_command(_make_event())
# Telegram default is "all" (high tier) → cycles to verbose
assert "VERBOSE" in result
# Telegram platform default is "new" → cycles to "all"
assert "ALL" in result
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "verbose"
assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "all"
@pytest.mark.asyncio
async def test_per_platform_isolation(self, tmp_path, monkeypatch):
"""Cycling /verbose on Telegram doesn't change Slack's setting.
Without a global tool_progress, each platform uses its built-in
default: Telegram = 'all' (high tier), Slack = 'off' (quiet Slack default).
default: Telegram = 'new' (overridden high tier), Slack = 'off' (quiet Slack default).
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
@ -178,8 +178,8 @@ class TestVerboseCommand:
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
platforms = saved["display"]["platforms"]
# Telegram: all -> verbose (high tier default = all)
assert platforms["telegram"]["tool_progress"] == "verbose"
# Telegram: new -> all (platform default = new)
assert platforms["telegram"]["tool_progress"] == "all"
# Slack: off -> new (first /verbose cycle from quiet default)
assert platforms["slack"]["tool_progress"] == "new"

View file

@ -170,6 +170,50 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
assert singleton["inference_base_url"] == "https://inference.example.com/v1"
def test_auth_add_minimax_oauth_starts_login_and_persists_pool_entry(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
_write_auth_store(tmp_path, {"version": 1, "providers": {}})
token = _jwt_with_email("minimax@example.com")
monkeypatch.setattr(
"hermes_cli.auth._minimax_oauth_login",
lambda **kwargs: {
"provider": "minimax-oauth",
"region": "global",
"portal_base_url": "https://api.minimax.io",
"inference_base_url": "https://api.minimax.io/anthropic",
"client_id": "client-id",
"scope": "group_id profile model.completion",
"token_type": "Bearer",
"access_token": token,
"refresh_token": "refresh-token",
"resource_url": None,
"obtained_at": "2026-05-11T10:00:00+00:00",
"expires_at": "2026-05-14T10:00:00+00:00",
"expires_in": 259200,
},
)
from hermes_cli.auth_commands import auth_add_command
class _Args:
provider = "minimax-oauth"
auth_type = "oauth"
api_key = None
label = None
no_browser = True
timeout = None
auth_add_command(_Args())
payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
entries = payload["credential_pool"]["minimax-oauth"]
entry = next(item for item in entries if item["source"] == "manual:minimax_oauth")
assert entry["label"] == "minimax@example.com"
assert entry["access_token"] == token
assert entry["refresh_token"] == "refresh-token"
assert entry["base_url"] == "https://api.minimax.io/anthropic"
def test_auth_add_nous_oauth_honors_custom_label(tmp_path, monkeypatch):
"""`hermes auth add nous --type oauth --label <name>` must preserve the
custom label end-to-end it was silently dropped in the first cut of the

View file

@ -242,12 +242,14 @@ class TestTelegramBotCommands:
tg_name = cmd.name.replace("-", "_")
assert tg_name not in names
def test_excludes_commands_with_required_args(self):
def test_includes_builtin_commands_with_required_args(self):
"""Built-in arg-taking commands (e.g. /queue, /steer, /background)
are now included because their handlers return usage text when
invoked without arguments issue #24312."""
names = {name for name, _ in telegram_bot_commands()}
assert "background" not in names
assert "queue" not in names
assert "steer" not in names
assert "background" in GATEWAY_KNOWN_COMMANDS
assert "background" in names
assert "queue" in names
assert "steer" in names
class TestSlackSubcommandMap:

View file

@ -2,10 +2,11 @@
from pathlib import Path
def test_profiles_nav_label_uses_short_multi_agents_copy():
def test_profiles_nav_label_uses_short_copy():
en_i18n = Path(__file__).resolve().parents[2] / "web" / "src" / "i18n" / "en.ts"
content = en_i18n.read_text(encoding="utf-8")
assert 'profiles: "profiles : multi agents"' in content
assert "Profiles: Running Multiple Agents" not in content
# Nav label should be the clean short form, not the old verbose string
assert 'profiles: "Profiles"' in content
assert "profiles : multi agents" not in content

View file

@ -0,0 +1,61 @@
"""Host-specific gating in ``hermes_cli.gateway._all_platforms()``.
Some messaging platforms can't function on every host. The gate lives
in one place ``_all_platforms()`` so the setup wizard, the curses
gateway-config menu, and any future picker all see the same filtered
list.
Currently:
- Matrix is hidden on Windows. The ``[matrix]`` extra pulls
``mautrix[encryption]`` -> ``python-olm``, which has no Windows wheel
and needs ``make`` + libolm to build from sdist. There's no native
Windows path that works.
"""
import sys
class TestMatrixHiddenOnWindows:
def test_matrix_present_on_linux(self, monkeypatch):
"""Sanity: matrix is still in the picker on Linux/macOS."""
import hermes_cli.gateway as gateway_mod
monkeypatch.setattr(gateway_mod.sys, "platform", "linux")
platforms = gateway_mod._all_platforms()
keys = {p["key"] for p in platforms}
assert "matrix" in keys, "matrix must be available on Linux"
def test_matrix_present_on_macos(self, monkeypatch):
import hermes_cli.gateway as gateway_mod
monkeypatch.setattr(gateway_mod.sys, "platform", "darwin")
platforms = gateway_mod._all_platforms()
keys = {p["key"] for p in platforms}
assert "matrix" in keys, "matrix must be available on macOS"
def test_matrix_hidden_on_windows(self, monkeypatch):
"""The actual gate: matrix must NOT appear on Windows."""
import hermes_cli.gateway as gateway_mod
monkeypatch.setattr(gateway_mod.sys, "platform", "win32")
platforms = gateway_mod._all_platforms()
keys = {p["key"] for p in platforms}
assert "matrix" not in keys, (
"matrix must be hidden on Windows — python-olm has no "
"Windows wheel and no native build path"
)
def test_other_platforms_unaffected_on_windows(self, monkeypatch):
"""Gating must only drop matrix, not collateral damage."""
import hermes_cli.gateway as gateway_mod
monkeypatch.setattr(gateway_mod.sys, "platform", "win32")
platforms = gateway_mod._all_platforms()
keys = {p["key"] for p in platforms}
# A representative sample of platforms that have no Windows
# blockers — picker should still surface them.
for must_have in ("telegram", "discord", "slack", "mattermost"):
assert must_have in keys, (
f"{must_have} disappeared from Windows picker — gate is "
"over-filtering"
)

View file

@ -7,6 +7,7 @@ from hermes_cli.models import (
is_nous_free_tier, partition_nous_models_by_tier,
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
union_with_portal_free_recommendations,
union_with_portal_paid_recommendations,
)
import hermes_cli.models as _models_mod
@ -506,6 +507,147 @@ class TestUnionWithPortalFreeRecommendations:
assert p["qwen/qwen3.6-plus"] == self._FREE
class TestUnionWithPortalPaidRecommendations:
"""Tests for union_with_portal_paid_recommendations.
Mirror of TestUnionWithPortalFreeRecommendations: the Portal's
paidRecommendedModels endpoint is the source of truth for what's a
blessed paid model *right now*. The in-repo curated list and
docs-hosted manifest can lag this helper guarantees newly-launched
paid models surface in the picker for paid-tier users without a CLI
release.
"""
_PAID = {"prompt": "0.000003", "completion": "0.000015"}
_FREE = {"prompt": "0", "completion": "0"}
def _payload(self, paid_models: list[str]) -> dict:
return {
"paidRecommendedModels": [
{"modelName": mid, "displayName": mid} for mid in paid_models
],
}
def test_adds_portal_paid_model_missing_from_curated(self):
"""A Portal-advertised paid model not in curated is prepended."""
curated = ["anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["openai/gpt-5.4"]),
):
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids[0] == "openai/gpt-5.4" # prepended
assert "anthropic/claude-opus-4.6" in ids
# Existing pricing untouched
assert p["anthropic/claude-opus-4.6"] == self._PAID
def test_does_not_synthesize_pricing_for_paid_models(self):
"""Paid recommendations missing from live pricing get no synthetic entry.
Synthesizing zero pricing (like the free helper does) would mislead
:func:`partition_nous_models_by_tier` into treating them as free;
synthesizing a non-zero placeholder would lie to the user. The
right thing is to leave pricing absent so the picker shows a blank
column until the live pricing endpoint catches up.
"""
curated = ["anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["openai/gpt-5.4"]),
):
_, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert "openai/gpt-5.4" not in p
assert p["anthropic/claude-opus-4.6"] == self._PAID
def test_does_not_duplicate_curated_entries(self):
"""A Portal paid model already in curated is not duplicated."""
curated = ["openai/gpt-5.4", "anthropic/claude-opus-4.6"]
pricing = {
"openai/gpt-5.4": self._PAID,
"anthropic/claude-opus-4.6": self._PAID,
}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["openai/gpt-5.4"]),
):
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_empty_payload_returns_inputs_unchanged(self):
"""Empty Portal response leaves curated + pricing untouched."""
curated = ["a", "b"]
pricing = {"a": self._PAID}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_missing_paidRecommendedModels_key(self):
"""Portal payload without paidRecommendedModels degrades gracefully."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value={"freeRecommendedModels": [{"modelName": "x"}]},
):
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_fetch_failure_returns_inputs(self):
"""Network failures don't blow up the picker."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
side_effect=RuntimeError("network down"),
):
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids == curated
assert p == pricing
def test_invalid_entries_skipped(self):
"""Non-dict / missing-modelName entries are filtered out."""
curated = ["a"]
pricing = {"a": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value={
"paidRecommendedModels": [
"not-a-dict",
{"displayName": "no-modelName"},
{"modelName": ""},
{"modelName": "openai/gpt-5.4"},
]
},
):
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids == ["openai/gpt-5.4", "a"]
# No synthetic entry — pricing is untouched.
assert "openai/gpt-5.4" not in p
def test_preserves_relative_order_of_new_paid_models(self):
"""Multiple new paid models are prepended in payload order."""
curated = ["anthropic/claude-opus-4.6"]
pricing = {"anthropic/claude-opus-4.6": self._PAID}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._payload(["openai/gpt-5.4", "openai/gpt-5.5"]),
):
ids, _ = union_with_portal_paid_recommendations(curated, pricing, "")
assert ids == [
"openai/gpt-5.4",
"openai/gpt-5.5",
"anthropic/claude-opus-4.6",
]
class TestCheckNousFreeTierCache:
"""Tests for the TTL cache on check_nous_free_tier()."""

View file

@ -2285,3 +2285,39 @@ def test_minimax_oauth_runtime_uses_inference_base_url(monkeypatch):
resolved = rp.resolve_runtime_provider(requested="minimax-oauth")
assert MINIMAX_OAUTH_CN_INFERENCE.rstrip("/") in resolved["base_url"]
def test_minimax_oauth_pool_forces_anthropic_messages_despite_stale_config(monkeypatch):
"""A pooled MiniMax OAuth token must not inherit stale chat_completions config."""
class _Entry:
access_token = "oauth-token"
source = "manual:minimax_oauth"
base_url = "https://api.minimax.io/anthropic"
class _Pool:
def has_credentials(self):
return True
def select(self):
return _Entry()
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax-oauth")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "minimax-oauth",
"default": "MiniMax-M2.7",
"api_mode": "chat_completions",
},
)
monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
monkeypatch.setattr(rp, "_resolve_named_custom_runtime", lambda **k: None)
monkeypatch.setattr(rp, "_resolve_explicit_runtime", lambda **k: None)
resolved = rp.resolve_runtime_provider(requested="minimax-oauth")
assert resolved["provider"] == "minimax-oauth"
assert resolved["api_mode"] == "anthropic_messages"
assert resolved["base_url"] == "https://api.minimax.io/anthropic"

View file

@ -0,0 +1,330 @@
"""Tests for hermes_cli.security_advisories.
The advisory module is the user-facing detection / remediation surface
for supply-chain attacks (e.g. the Mini Shai-Hulud worm of May 2026 that
poisoned mistralai 2.4.6 on PyPI). These tests exercise the public API in
isolation no real package metadata, no real config, no real cache.
"""
from __future__ import annotations
import time
from pathlib import Path
from typing import Iterator
import pytest
import hermes_cli.security_advisories as adv
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def fake_advisory() -> adv.Advisory:
"""A self-contained Advisory used across tests."""
return adv.Advisory(
id="test-advisory-2026-99",
title="Test advisory",
summary="Pretend this package has been compromised.",
url="https://example.com/advisory",
compromised=(
("fake-malicious-pkg", frozenset({"6.6.6"})),
),
remediation=(
"pip uninstall -y fake-malicious-pkg",
"Rotate any credentials that may have been exposed.",
),
published="2026-01-01",
severity="critical",
)
@pytest.fixture
def isolated_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Redirect HERMES_HOME so banner cache and config writes are sandboxed."""
home = tmp_path / ".hermes"
home.mkdir()
(home / "cache").mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
return home
@pytest.fixture
def patched_version(monkeypatch: pytest.MonkeyPatch) -> Iterator[dict[str, str]]:
"""Override _installed_version with a controllable lookup table."""
table: dict[str, str] = {}
monkeypatch.setattr(adv, "_installed_version", lambda pkg: table.get(pkg))
yield table
# ---------------------------------------------------------------------------
# detect_compromised
# ---------------------------------------------------------------------------
class TestDetectCompromised:
def test_no_match_returns_empty_list(self, fake_advisory, patched_version):
# No matching package installed.
hits = adv.detect_compromised(advisories=[fake_advisory])
assert hits == []
def test_exact_version_match(self, fake_advisory, patched_version):
patched_version["fake-malicious-pkg"] = "6.6.6"
hits = adv.detect_compromised(advisories=[fake_advisory])
assert len(hits) == 1
assert hits[0].advisory.id == fake_advisory.id
assert hits[0].package == "fake-malicious-pkg"
assert hits[0].installed_version == "6.6.6"
def test_safe_version_does_not_match(self, fake_advisory, patched_version):
# Package is installed but the version is not in the compromised set.
patched_version["fake-malicious-pkg"] = "6.6.5"
hits = adv.detect_compromised(advisories=[fake_advisory])
assert hits == []
def test_empty_compromised_set_matches_any_version(
self, patched_version
):
# An advisory with an empty version set is a "any version is suspect"
# wildcard — used when an entire maintainer namespace is owned.
wildcard = adv.Advisory(
id="wildcard",
title="Whole namespace owned",
summary="x",
url="x",
compromised=(("evil-namespace", frozenset()),),
remediation=("uninstall it",),
)
patched_version["evil-namespace"] = "0.0.1"
hits = adv.detect_compromised(advisories=[wildcard])
assert len(hits) == 1
assert hits[0].installed_version == "0.0.1"
# ---------------------------------------------------------------------------
# Acknowledgement persistence
# ---------------------------------------------------------------------------
class TestAck:
def test_get_acked_ids_empty_when_no_config(self, monkeypatch):
# load_config raises → returns empty set, doesn't crash.
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: (_ for _ in ()).throw(RuntimeError("boom")),
)
assert adv.get_acked_ids() == set()
def test_filter_unacked_strips_dismissed(self, fake_advisory, monkeypatch):
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
monkeypatch.setattr(adv, "get_acked_ids", lambda: {fake_advisory.id})
assert adv.filter_unacked([hit]) == []
def test_filter_unacked_passes_through_unknown(
self, fake_advisory, monkeypatch
):
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
assert adv.filter_unacked([hit]) == [hit]
def test_ack_advisory_persists_id(self, isolated_home, monkeypatch):
# Stub the config layer end-to-end with a tiny in-memory store so we
# don't depend on the full hermes_cli.config bootstrap.
store: dict = {"security": {}}
monkeypatch.setattr(
"hermes_cli.config.load_config", lambda: store
)
monkeypatch.setattr(
"hermes_cli.config.save_config",
lambda cfg: store.update(cfg) or None,
)
assert adv.ack_advisory("test-advisory-2026-99") is True
assert "test-advisory-2026-99" in store["security"]["acked_advisories"]
# Idempotent.
adv.ack_advisory("test-advisory-2026-99")
assert (
store["security"]["acked_advisories"].count("test-advisory-2026-99")
== 1
)
def test_ack_advisory_rejects_blank(self, isolated_home):
assert adv.ack_advisory("") is False
assert adv.ack_advisory(" ") is False
# ---------------------------------------------------------------------------
# Banner cache rate limiting
# ---------------------------------------------------------------------------
class TestBannerCache:
def test_first_call_returns_due_hits(
self, fake_advisory, isolated_home, monkeypatch
):
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
due = adv.hits_due_for_banner([hit])
assert due == [hit]
def test_second_call_within_window_suppresses(
self, fake_advisory, isolated_home, monkeypatch
):
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
adv.hits_due_for_banner([hit])
# Same banner inside repeat window → suppressed.
again = adv.hits_due_for_banner([hit])
assert again == []
def test_call_after_window_re_banners(
self, fake_advisory, isolated_home, monkeypatch
):
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
adv.hits_due_for_banner([hit])
# Backdate the cache so it looks like the banner was shown more
# than 24h ago — should re-banner.
cache_path = adv._banner_cache_path()
assert cache_path is not None
old_lines = cache_path.read_text(encoding="utf-8").splitlines()
backdated = []
for line in old_lines:
parts = line.split(None, 1)
if len(parts) == 2:
backdated.append(f"{parts[0]} {time.time() - 48 * 3600}")
cache_path.write_text("\n".join(backdated) + "\n", encoding="utf-8")
again = adv.hits_due_for_banner([hit])
assert again == [hit]
def test_acked_hits_never_banner(
self, fake_advisory, isolated_home, monkeypatch
):
monkeypatch.setattr(adv, "get_acked_ids", lambda: {fake_advisory.id})
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
assert adv.hits_due_for_banner([hit]) == []
# ---------------------------------------------------------------------------
# Rendering
# ---------------------------------------------------------------------------
class TestRendering:
def test_short_banner_lines_includes_id_and_version(self, fake_advisory):
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
lines = adv.short_banner_lines([hit])
joined = "\n".join(lines)
assert fake_advisory.id in joined
assert fake_advisory.title in joined
assert "fake-malicious-pkg==6.6.6" in joined
assert "hermes doctor" in joined
def test_full_remediation_text_contains_all_steps(self, fake_advisory):
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
body = "\n".join(adv.full_remediation_text(hit))
# All remediation steps must be present.
for step in fake_advisory.remediation:
assert step in body
assert fake_advisory.url in body
assert fake_advisory.summary in body
def test_render_doctor_section_clean_state(self):
# No hits → success message, has_problems=False.
has_problems, lines = adv.render_doctor_section([])
assert has_problems is False
assert any("No active security advisories" in line for line in lines)
def test_render_doctor_section_with_unacked_hit(
self, fake_advisory, monkeypatch
):
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
has_problems, lines = adv.render_doctor_section([hit])
assert has_problems is True
body = "\n".join(lines)
assert fake_advisory.title in body
def test_gateway_log_message_singular(self, fake_advisory, monkeypatch):
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
hit = adv.AdvisoryHit(
advisory=fake_advisory,
package="fake-malicious-pkg",
installed_version="6.6.6",
)
msg = adv.gateway_log_message([hit])
assert msg is not None
assert fake_advisory.id in msg
assert "fake-malicious-pkg==6.6.6" in msg
def test_gateway_log_message_returns_none_for_no_hits(self):
assert adv.gateway_log_message([]) is None
# ---------------------------------------------------------------------------
# Real catalog smoke test
# ---------------------------------------------------------------------------
class TestRealCatalog:
def test_advisories_well_formed(self):
"""Every shipped advisory must be self-consistent.
Catches data-entry mistakes (empty IDs, missing remediation, bad
compromised tuples) before they ship.
"""
seen_ids: set[str] = set()
for advisory in adv.ADVISORIES:
assert advisory.id, "advisory has empty id"
assert advisory.id not in seen_ids, f"duplicate id {advisory.id}"
seen_ids.add(advisory.id)
assert advisory.title, f"{advisory.id}: empty title"
assert advisory.summary, f"{advisory.id}: empty summary"
assert advisory.remediation, f"{advisory.id}: empty remediation"
assert advisory.url.startswith("http"), \
f"{advisory.id}: bad url {advisory.url!r}"
assert advisory.compromised, \
f"{advisory.id}: empty compromised tuple"
for pkg, versions in advisory.compromised:
assert pkg, f"{advisory.id}: empty package name"
assert isinstance(versions, frozenset), \
f"{advisory.id}: versions must be frozenset"

View file

@ -6,6 +6,7 @@ rather than leaving zombie processes or telling users to manually restart
when launchd will auto-respawn.
"""
import os
import subprocess
from types import SimpleNamespace
from unittest.mock import patch, MagicMock
@ -1068,13 +1069,18 @@ class TestFindGatewayPidsExclude:
def test_excludes_specified_pids(self, monkeypatch):
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
# Bypass /proc scan so the subprocess (ps) fallback is used
_real_isdir = os.path.isdir
monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
def fake_run(cmd, **kwargs):
return subprocess.CompletedProcess(
cmd, 0,
stdout=(
"user 100 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
"user 200 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
"100 python gateway/run.py\n"
"200 python gateway/run.py\n"
),
stderr="",
)
@ -1082,19 +1088,24 @@ class TestFindGatewayPidsExclude:
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
monkeypatch.setattr("os.getpid", lambda: 999)
pids = gateway_cli.find_gateway_pids(exclude_pids={100})
pids = gateway_cli.find_gateway_pids(exclude_pids={100}, all_profiles=True)
assert 100 not in pids
assert 200 in pids
def test_no_exclude_returns_all(self, monkeypatch):
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
# Bypass /proc scan so the subprocess (ps) fallback is used
_real_isdir = os.path.isdir
monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
def fake_run(cmd, **kwargs):
return subprocess.CompletedProcess(
cmd, 0,
stdout=(
"user 100 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
"user 200 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
"100 python gateway/run.py\n"
"200 python gateway/run.py\n"
),
stderr="",
)
@ -1102,7 +1113,7 @@ class TestFindGatewayPidsExclude:
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
monkeypatch.setattr("os.getpid", lambda: 999)
pids = gateway_cli.find_gateway_pids()
pids = gateway_cli.find_gateway_pids(all_profiles=True)
assert 100 in pids
assert 200 in pids
@ -1111,6 +1122,10 @@ class TestFindGatewayPidsExclude:
profile_dir.mkdir(parents=True)
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
# Bypass /proc scan so the subprocess (ps) fallback is used
_real_isdir = os.path.isdir
monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
def fake_run(cmd, **kwargs):
return subprocess.CompletedProcess(

View file

@ -19,6 +19,8 @@ The fix:
These tests pin the corrected behavior.
"""
import time
from datetime import datetime, timezone
from unittest.mock import patch
import pytest
@ -67,6 +69,53 @@ def test_minimax_login_does_not_launch_anthropic_flow():
assert body["expires_in"] == 600
def test_minimax_dashboard_poller_accepts_absolute_ms_expired_in():
"""Dashboard MiniMax completion must accept unix-ms token expiry values."""
from hermes_cli import web_server as ws
now = datetime.now(timezone.utc)
abs_ms = int((now.timestamp() + 1800) * 1000)
session_id = "minimax-absolute-ms-test"
ws._oauth_sessions[session_id] = {
"session_id": session_id,
"provider": "minimax-oauth",
"flow": "device_code",
"created_at": time.time(),
"status": "pending",
"error_message": None,
"portal_base_url": "https://api.minimax.io",
"client_id": "client-id",
"user_code": "ABCD-1234",
"code_verifier": "verifier",
"interval_ms": 2000,
"expired_in_raw": abs_ms,
"region": "global",
}
captured_state = {}
try:
with patch(
"hermes_cli.auth._minimax_poll_token",
return_value={
"status": "success",
"access_token": "access",
"refresh_token": "refresh",
"expired_in": abs_ms,
"token_type": "Bearer",
},
), patch(
"hermes_cli.auth._minimax_save_auth_state",
side_effect=lambda state: captured_state.update(state),
):
ws._minimax_poller(session_id)
finally:
ws._oauth_sessions.pop(session_id, None)
assert captured_state["access_token"] == "access"
assert 1790 <= captured_state["expires_in"] <= 1810
assert datetime.fromisoformat(captured_state["expires_at"]).year < 9999
def test_anthropic_pkce_branch_still_works():
"""Sanity: the dispatcher tightening doesn't break the legitimate Anthropic PKCE path."""
fake_anthropic_response = {

View file

@ -182,7 +182,7 @@ class TestClientCacheBoundedGrowth:
_get_cached_client,
)
key = ("test_replace", True, "", "", "", (), False)
key = ("test_replace", True, "", "", "", (), False, "")
# Simulate a stale entry from a closed loop
old_loop = asyncio.new_event_loop()

View file

@ -0,0 +1,308 @@
"""Tests for the per-turn file-mutation verifier footer.
Covers the three moving pieces:
1. ``_extract_file_mutation_targets`` pulls file paths from write_file /
patch (replace + V4A) tool-call argument dicts.
2. ``AIAgent._record_file_mutation_result`` builds the per-turn state
dict, removing entries when a later success supersedes an earlier
failure for the same path.
3. ``AIAgent._format_file_mutation_failure_footer`` renders the dict
as a user-visible advisory.
Regression target: the "Ben Eng llm-wiki" session where grok-4.1-fast
batched parallel patches, half failed, and the model summarised the
turn claiming every file was edited. This verifier makes over-claiming
structurally impossible past the model: the user always sees the real
list of files that did NOT change.
"""
from __future__ import annotations
import json
import pytest
from run_agent import (
AIAgent,
_FILE_MUTATING_TOOLS,
_extract_error_preview,
_extract_file_mutation_targets,
)
# ---------------------------------------------------------------------------
# _extract_file_mutation_targets
# ---------------------------------------------------------------------------
class TestExtractFileMutationTargets:
def test_non_mutating_tool_returns_empty(self):
assert _extract_file_mutation_targets("read_file", {"path": "/x"}) == []
assert _extract_file_mutation_targets("terminal", {"command": "ls"}) == []
def test_write_file_returns_single_path(self):
out = _extract_file_mutation_targets("write_file", {"path": "/tmp/a.md", "content": "x"})
assert out == ["/tmp/a.md"]
def test_write_file_missing_path_returns_empty(self):
assert _extract_file_mutation_targets("write_file", {"content": "x"}) == []
def test_patch_replace_mode_returns_path(self):
args = {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"}
assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
def test_patch_default_mode_is_replace(self):
# Mode omitted — schema default is ``replace``.
args = {"path": "/tmp/a.md", "old_string": "x", "new_string": "y"}
assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
def test_patch_v4a_single_file(self):
body = (
"*** Begin Patch\n"
"*** Update File: /tmp/a.md\n"
"@@ ctx @@\n"
" line1\n"
"-bad\n"
"+good\n"
"*** End Patch\n"
)
args = {"mode": "patch", "patch": body}
assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
def test_patch_v4a_multi_file(self):
body = (
"*** Begin Patch\n"
"*** Update File: /tmp/a.md\n"
"@@ @@\n-a\n+b\n"
"*** Add File: /tmp/new.md\n"
"+fresh\n"
"*** Delete File: /tmp/old.md\n"
"*** End Patch\n"
)
args = {"mode": "patch", "patch": body}
paths = _extract_file_mutation_targets("patch", args)
assert paths == ["/tmp/a.md", "/tmp/new.md", "/tmp/old.md"]
def test_patch_v4a_missing_body_returns_empty(self):
assert _extract_file_mutation_targets("patch", {"mode": "patch"}) == []
assert _extract_file_mutation_targets("patch", {"mode": "patch", "patch": ""}) == []
# ---------------------------------------------------------------------------
# _extract_error_preview
# ---------------------------------------------------------------------------
class TestExtractErrorPreview:
def test_json_error_field_preferred(self):
raw = json.dumps({"success": False, "error": "Could not find old_string in /tmp/x"})
assert _extract_error_preview(raw) == "Could not find old_string in /tmp/x"
def test_plain_string_falls_through(self):
assert _extract_error_preview("Error executing tool: boom") == "Error executing tool: boom"
def test_long_preview_truncated(self):
long = "x" * 500
out = _extract_error_preview(long, max_len=50)
assert len(out) <= 50
assert out.endswith("")
def test_none_returns_empty(self):
assert _extract_error_preview(None) == ""
# ---------------------------------------------------------------------------
# _record_file_mutation_result — state transitions
# ---------------------------------------------------------------------------
def _bare_agent() -> AIAgent:
"""Skip __init__ and only attach the per-turn state dict.
AIAgent.__init__ takes ~60 parameters and touches network, auth, and
the filesystem. For these tests we only need the two methods
``_record_file_mutation_result`` and ``_format_file_mutation_failure_footer``.
Using ``object.__new__`` mirrors the gateway-test pattern documented in
the agent pitfalls list.
"""
agent = object.__new__(AIAgent)
agent._turn_failed_file_mutations = {}
return agent
class TestRecordFileMutationResult:
def test_non_mutating_tool_ignored(self):
agent = _bare_agent()
agent._record_file_mutation_result(
"read_file", {"path": "/tmp/x"}, "{}", is_error=True,
)
assert agent._turn_failed_file_mutations == {}
def test_failure_recorded(self):
agent = _bare_agent()
result = json.dumps({"success": False, "error": "Could not find old_string"})
agent._record_file_mutation_result(
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"},
result, is_error=True,
)
state = agent._turn_failed_file_mutations
assert "/tmp/a.md" in state
assert state["/tmp/a.md"]["tool"] == "patch"
assert "Could not find old_string" in state["/tmp/a.md"]["error_preview"]
def test_success_removes_prior_failure(self):
agent = _bare_agent()
# First attempt fails
agent._record_file_mutation_result(
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"},
json.dumps({"error": "not found"}), is_error=True,
)
assert "/tmp/a.md" in agent._turn_failed_file_mutations
# Second attempt with corrected old_string succeeds
agent._record_file_mutation_result(
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "real", "new_string": "fixed"},
json.dumps({"success": True, "diff": "..."}), is_error=False,
)
assert agent._turn_failed_file_mutations == {}
def test_repeated_failure_keeps_first_error(self):
agent = _bare_agent()
agent._record_file_mutation_result(
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v1", "new_string": "y"},
json.dumps({"error": "first error"}), is_error=True,
)
agent._record_file_mutation_result(
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v2", "new_string": "y"},
json.dumps({"error": "second error"}), is_error=True,
)
# Keep the original error — swapping to the latest would obscure
# the initial root cause.
assert "first error" in agent._turn_failed_file_mutations["/tmp/a.md"]["error_preview"]
def test_v4a_multi_file_all_tracked(self):
agent = _bare_agent()
body = (
"*** Begin Patch\n"
"*** Update File: /tmp/a.md\n@@ @@\n-a\n+b\n"
"*** Update File: /tmp/b.md\n@@ @@\n-a\n+b\n"
"*** End Patch\n"
)
agent._record_file_mutation_result(
"patch", {"mode": "patch", "patch": body},
json.dumps({"error": "parse failure"}), is_error=True,
)
assert set(agent._turn_failed_file_mutations) == {"/tmp/a.md", "/tmp/b.md"}
def test_no_state_dict_silent_noop(self):
"""When called outside run_conversation the state dict is absent.
The record helper must never raise a tool dispatched from, say,
a direct ``chat()`` call should not blow up the call site just
because the verifier state hasn't been initialised.
"""
agent = object.__new__(AIAgent) # no state attached
# Should not raise
agent._record_file_mutation_result(
"patch", {"mode": "replace", "path": "/tmp/a.md"},
json.dumps({"error": "x"}), is_error=True,
)
def test_missing_path_arg_recorded_nowhere(self):
agent = _bare_agent()
agent._record_file_mutation_result(
"patch", {"mode": "replace"}, # no path
json.dumps({"error": "path required"}), is_error=True,
)
# No path → nothing to key on, state stays empty. The per-turn
# state is about file paths, not individual tool-call IDs.
assert agent._turn_failed_file_mutations == {}
# ---------------------------------------------------------------------------
# _format_file_mutation_failure_footer
# ---------------------------------------------------------------------------
class TestFormatFooter:
def test_empty_returns_empty_string(self):
assert AIAgent._format_file_mutation_failure_footer({}) == ""
def test_single_failure(self):
out = AIAgent._format_file_mutation_failure_footer(
{"/tmp/a.md": {"tool": "patch", "error_preview": "Could not find old_string"}},
)
assert "1 file(s) were NOT modified" in out
assert "/tmp/a.md" in out
assert "Could not find old_string" in out
assert "git status" in out # user-actionable hint
def test_truncation_at_10_entries(self):
failed = {
f"/tmp/f{i}.md": {"tool": "patch", "error_preview": "err"}
for i in range(15)
}
out = AIAgent._format_file_mutation_failure_footer(failed)
assert "15 file(s) were NOT modified" in out
assert "… and 5 more" in out
# Ten file bullets + header + "and X more" line
lines = out.split("\n")
bullet_lines = [ln for ln in lines if ln.lstrip().startswith("")]
assert len(bullet_lines) == 11 # 10 shown + 1 summary
# ---------------------------------------------------------------------------
# _file_mutation_verifier_enabled — env + config precedence
# ---------------------------------------------------------------------------
class TestVerifierEnabled:
def test_default_is_enabled(self, monkeypatch):
monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False)
agent = _bare_agent()
# With no env and no config present, safe default is True.
# load_config may surface a user config.yaml in some envs — stub it.
import hermes_cli.config as _cfg_mod
monkeypatch.setattr(_cfg_mod, "load_config", lambda: {})
assert agent._file_mutation_verifier_enabled() is True
@pytest.mark.parametrize("value", ["0", "false", "FALSE", "no", "off"])
def test_env_disables(self, monkeypatch, value):
monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", value)
agent = _bare_agent()
assert agent._file_mutation_verifier_enabled() is False
def test_env_enables_over_config(self, monkeypatch):
monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", "1")
import hermes_cli.config as _cfg_mod
monkeypatch.setattr(
_cfg_mod, "load_config",
lambda: {"display": {"file_mutation_verifier": False}},
)
agent = _bare_agent()
assert agent._file_mutation_verifier_enabled() is True
def test_config_disables_when_no_env(self, monkeypatch):
monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False)
import hermes_cli.config as _cfg_mod
monkeypatch.setattr(
_cfg_mod, "load_config",
lambda: {"display": {"file_mutation_verifier": False}},
)
agent = _bare_agent()
assert agent._file_mutation_verifier_enabled() is False
# ---------------------------------------------------------------------------
# Module-level invariants
# ---------------------------------------------------------------------------
def test_file_mutating_tools_set_shape():
"""write_file + patch are the only tools the verifier tracks.
Guard rail: if someone adds a third file-mutating tool (e.g. a new
``append_file``), they should also audit whether the verifier should
track it. This test fails loudly on unilateral additions.
"""
assert _FILE_MUTATING_TOOLS == frozenset({"write_file", "patch"})

Some files were not shown because too many files have changed in this diff Show more